In [1]:
# jupyter-notebookのcellの幅を広げる
from IPython.core.display import display,HTML
display(HTML("<style>.container{width:100%!important;}</style>"))

In [2]:
import numpy as np
import pandas as pd
import os
import sys
import re
import matplotlib.pyplot as plt
import seaborn as sns

# 音声解析用のライブラリ
import scipy.signal as ss
import librosa
import librosa.display
import IPython.display as ipd #jupyter-notebook上で音声再生import matplotlib.pyplot as plt
import pickle

# ユーティリティメソッドの読み込み 
from Util import NormalizeHorizontalDirection 

# jupyter-notebook上に図を出力するマジックコマンド
%matplotlib inline

Using TensorFlow backend.


In [3]:
# 作業ディレクトリの設定
os.chdir("/home/taichi/DataAnalysis/05_NTT_corevo")

In [4]:
anno = pd.read_csv("01_input/ntt_corevo/class_train.tsv",
                  delimiter = "\t",
                  names = ("filename","class"))

In [5]:
anno.head()

Unnamed: 0,filename,class
0,0002f1cd968ca78ada9e1c7037224773,MA_CH
1,0003747ec9268461d4cbb9e1b86e9663,FE_AD
2,0003b32f378b001f0f73bf0981da8773,MA_CH
3,0004ab975bf8b59e1b19f2b7b6d1548b,MA_CH
4,0005678b57ca265a65f8ef0cc7481277,MA_AD


In [6]:
# データのサイズの確認
# 約15万発話のデータセットを作成できる。
# 2018/07/08現在、youtubeのリンク切れなどでダウンロードできないデータが存在するため、142063発話のデータセットを作成した。
print("the number of speeches : {}".format(anno.shape[0]))
#print("the number of speakers : {}".format(anno["speaker"].drop_duplicates().shape[0]) )

the number of speeches : 37806


In [7]:
# filepath変数を作成
anno["filepath"] = "01_input/ntt_corevo/train/" + anno["filename"] + ".wav"
#
anno["savepath"] = "03_work/spectrogram/" + anno["filename"] + ".pickle"

In [8]:
anno.head()

Unnamed: 0,filename,class,filepath,savepath
0,0002f1cd968ca78ada9e1c7037224773,MA_CH,01_input/ntt_corevo/train/0002f1cd968ca78ada9e...,03_work/spectrogram/0002f1cd968ca78ada9e1c7037...
1,0003747ec9268461d4cbb9e1b86e9663,FE_AD,01_input/ntt_corevo/train/0003747ec9268461d4cb...,03_work/spectrogram/0003747ec9268461d4cbb9e1b8...
2,0003b32f378b001f0f73bf0981da8773,MA_CH,01_input/ntt_corevo/train/0003b32f378b001f0f73...,03_work/spectrogram/0003b32f378b001f0f73bf0981...
3,0004ab975bf8b59e1b19f2b7b6d1548b,MA_CH,01_input/ntt_corevo/train/0004ab975bf8b59e1b19...,03_work/spectrogram/0004ab975bf8b59e1b19f2b7b6...
4,0005678b57ca265a65f8ef0cc7481277,MA_AD,01_input/ntt_corevo/train/0005678b57ca265a65f8...,03_work/spectrogram/0005678b57ca265a65f8ef0cc7...


In [19]:
def ReadData(path):
    x,sr = librosa.load(path)
    return(x,sr)

def stfft(extract_x,sr):
    frame_bin = int(sr * (10 ** (-3)))
    f, t, Zxx = ss.stft(extract_x,fs = sr,
                    window = ss.get_window("hamming",frame_bin * 25),# 25ms
                    nperseg = frame_bin * 25, 
                    noverlap = frame_bin * 15,# 15msずつoverlapしていることになる    
                    nfft = 1023 # ここが1024で動くのがよくわからん。
                   )
    spectral = np.abs(Zxx)
    #周波数方向に正規化
    #Zxx_norm = calc_zscore(spectral)
    #正規化は秒数で抽出してから。
    return((f,t,spectral))

In [11]:

#for i,v in TestMeta.iterrows():
def Read_stFFT_save(v):
    filepath = v["filepath"]
    savepath = v["savepath"]
    print(filepath)
    if os.path.isfile(filepath):
        #ファイル名を設定
#        FileName = v["speaker"] + "-" + re.sub(".*/(.*)","\\1",v["segment"])
        #既にstFFTしたデータが存在する場合は、なにも返さない。
        if os.path.isfile(savepath):
            pass
            #print("File exist")
        else:
            #音声データの読み込み
            x,sr = ReadData(filepath)
            #一応、サンプリングレートが一致かどうか確認
            if sr !=22050:
                print("Diff")
#            start_index = int(v["start"] * sr)
#            end_index = int(v["end"] * sr)
            extract_x = x#[start_index:end_index]
            #短時間フーリエ変換
            fr,t,spectral = stfft(extract_x,sr)
            #output = (f,t,Zxx_norm)
            #ファイル名を設定
            #FileName = v["speaker"] + "-" + re.sub(".*/(.*)","\\1",v["segment"])
            with open(savepath,"wb") as f:
                pickle.dump((fr,t,spectral),f)
        
        #f,t,Zxx_normの保存
        
    #break


In [23]:
# スペクトログラムを確認
#for i,v in anno.head(5).iterrows():
#    break

In [24]:
#sr = 22050
#x,r = ReadData(v["filepath"])
#fr,t,spectral = stfft(x,sr)

In [25]:
spectral.shape

(512, 313)

In [26]:
#切り出したデータセットの作成
#周波数方向のNormlizationはしておらず。
from joblib import Parallel, delayed
from time import time

#音声の抽出を並列実行。
NJOBS = 12
Parallel(n_jobs=NJOBS)( [delayed(Read_stFFT_save)(v) for i,v in anno.iterrows()] )

[None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,
 None,

In [21]:
#with open('03_work/spectrogram/0002f1cd968ca78ada9e1c7037224773.pickle', mode='rb') as f:
#    a = pickle.load(f)

In [22]:
# FIXME : testデータのスペクトログラムを計算するスクリプトが消えちゃった。
test_info = pd.read_csv("03_work/test_time_distribution.csv")

In [23]:
test_info["savepath"] = "03_work/test_spectrogram/" + test_info["filename"] + ".pickle"

In [24]:
NJOBS = 12
Parallel(n_jobs=NJOBS)( [delayed(Read_stFFT_save)(v) for i,v in test_info.iterrows()] )

[None, None, None, None, None]