In [1]:
# Voxcelebで学習したデータに対して、Finetuningを実行
# 重みを新しいデータで更新する

In [1]:
# ライブラリの読み込み
## 標準
import os
import re
import glob
import pandas as pd
import numpy as np
import pickle

# keras
import keras
#from keras.datasets import mnist
from keras.models import Sequential,load_model,Model
from keras.layers import Dense, Dropout, Flatten,Activation
from keras.layers import Conv2D, MaxPooling2D,BatchNormalization
from keras.optimizers import Adam,SGD
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras.utils import to_categorical
from keras.callbacks import LearningRateScheduler


Using TensorFlow backend.


In [2]:
# ユーティリティ関数の読み込み
from Util import NormalizeHorizontalDirection
from Util import DefineModel
#from Util import SpectrogramDataGenerator

from project_utility import f1,f1_loss,SpectrogramGenerator

# resnet構築に必要な関数、クラスを読み込み
# resnetの構築に必要なライブラリを読み込み
from resnet import compose,ResNetConv2D,ResnetBuilder,shortcut,basic_block,residual_blocks,bottleneck

In [3]:
# ワーキングディレクトリの設定
os.chdir("/home/taichi/DataAnalysis/05_NTT_corevo/")

In [4]:
# 乱数のシードを固定b
np.random.seed(20190203)

In [5]:
train_info = pd.read_csv("01_input/ntt_corevo/class_train.tsv",
                        delimiter = "\t",
                        names = ["filename","label"])

train_info["filepath"] = "01_input/ntt_corevo/train/" + train_info["filename"] + ".wav"

In [6]:
train_info.head()

Unnamed: 0,filename,label,filepath
0,0002f1cd968ca78ada9e1c7037224773,MA_CH,01_input/ntt_corevo/train/0002f1cd968ca78ada9e...
1,0003747ec9268461d4cbb9e1b86e9663,FE_AD,01_input/ntt_corevo/train/0003747ec9268461d4cb...
2,0003b32f378b001f0f73bf0981da8773,MA_CH,01_input/ntt_corevo/train/0003b32f378b001f0f73...
3,0004ab975bf8b59e1b19f2b7b6d1548b,MA_CH,01_input/ntt_corevo/train/0004ab975bf8b59e1b19...
4,0005678b57ca265a65f8ef0cc7481277,MA_AD,01_input/ntt_corevo/train/0005678b57ca265a65f8...


In [7]:
test_info = pd.read_csv("03_work/test_time_distribution.csv")
test_info["filepath"] = "01_input/ntt_corevo/" + test_info["filename"] + ".wav"

In [8]:
test_info.head()

Unnamed: 0,filepath,filename,time
0,01_input/ntt_corevo/test/cd3bcd8851fe5104f8733...,cd3bcd8851fe5104f8733f6a986dff93,9.04
1,01_input/ntt_corevo/test/6b8ef99a30325b32c5eea...,6b8ef99a30325b32c5eea1203fc5faa7,2.250023
2,01_input/ntt_corevo/test/81ec9938d1cb7a7e3040d...,81ec9938d1cb7a7e3040df1d61a85ca1,10.34
3,01_input/ntt_corevo/test/d42bce65590d7aa68a959...,d42bce65590d7aa68a95932cd9a7c1ef,6.530023
4,01_input/ntt_corevo/test/55a0aac23227acb984dc3...,55a0aac23227acb984dc3db409c1416a,3.270023


In [9]:
# ラベルをintegerに変換するdict
label2int = {}
for i,v in enumerate(np.unique(train_info["label"])):
    label2int[v] = i

# クラスラベルのサイズ
num_classes = len(np.unique(train_info["label"]))
 

In [10]:
# 学習データの分割
# データを学習データと評価用データに分割
#X_train,X_val,y_train,y_val = train_test_split(np.array(train_info["filepath"]),
 #                                                np.array(train_info["label"]),
  #                                                          test_size=0.1,
   #                                             random_state = 1234)

In [11]:
# スペクトログラム(入力データ)のサイズ
#input_shape = (512,300,1)

# モデルを定義
###model = DefineModel(input_shape,num_classes)
#model = ResnetBuilder.build_resnet_50(input_shape, num_classes)

In [12]:
voxceleb_trained_model = load_model("03_work/models/resnet_voxceleb/resnet_voxceleb_model_time_500.h5",
                                    custom_objects={"f1":f1})

In [13]:
num_layer = len([name for name in voxceleb_trained_model.layers])
print(num_layer)

171


In [14]:
# fine tuningのため、モデルはパラメタは固定しない
# 転移学習のため、モデルのパラメタは固定
#for i in range(num_layer):
#    voxceleb_trained_model.layers[i].trainable = False

In [15]:
output = voxceleb_trained_model.layers[num_layer-2].output
output = Dense(num_classes,activation="softmax")(output)

In [16]:
model = Model(inputs = voxceleb_trained_model.input,
              outputs = output)

In [17]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 500, 1)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 256, 250, 64) 3200        input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 256, 250, 64) 256         conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 256, 250, 64) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________________
max_poolin

In [18]:
# コンパイル
LEARNING_RATE = 0.002
DECAY = 1e-6

model.compile(loss = "categorical_crossentropy",
                      #optimizer= Adam(),
                      optimizer = SGD(lr=LEARNING_RATE, decay=DECAY, momentum=0.9, nesterov=True, clipnorm=5),
                      metrics=[f1,'accuracy'])

In [19]:
# データのジェネレータを定義
"""
class SpectrogramDataGenerator(object):
    def __init__(self):
        self.reset()
#        self.num_classes = num_classes

    def reset(self):
        self.spectrograms = []
        self.labels = []
        
    def zero_padding(self,spectral,thre):
        n_col = spectral.shape[1]
        if n_col >= thre :
            spectral_pad = spectral[:,0:thre]
        else:
            # lbをランダムに設定
           # lb = np.random.randint(0,thre - n_col + 1)
            lb = 0
            ub = thre - n_col - lb
            spectral_pad = np.pad(spectral,((0,0),(lb,ub)),"constant")
        return(spectral_pad)
        
    def GenerateBatch(self,X,y,speaker_dict,time_len = 300,batch_size = 10,shuffle = True,margin_ms = 0):
        num_classes = len(speaker_dict.keys())
        while True:
            if shuffle:
                indexes = np.random.permutation(len(X))
                X = X[indexes]
                y = y[indexes]

            for tmp_X,tmp_y in zip(X,y):
                with open(tmp_X,"rb") as f:
                    fr,t,spectral = pickle.load(f)
                if (spectral.shape[1] - time_len) <= 0:
                    lb_ms = 0
                    ub_ms = spectral.shape[1]#
                    #np.random.randint(margin_ms,spectral.shape[1])
                else:
                    lb_ms = np.random.randint(margin_ms,spectral.shape[1] - time_len)
                    ub_ms = lb_ms + time_len

                #周波数方向に正規化
                spectral_norm = NormalizeHorizontalDirection(spectral[:,lb_ms:ub_ms])
                # thre 以下の場合はpadding
                spectral_norm = self.zero_padding(spectral_norm,time_len)
                target = to_categorical(speaker_dict[tmp_y],num_classes=num_classes)
                
                self.spectrograms.append(spectral_norm)
                self.labels.append(target)

                if len(self.labels) == batch_size:
                    inputs = np.asarray(self.spectrograms, dtype=np.float32).reshape(batch_size,
                                                                                   spectral_norm.shape[0],
                                                                                    spectral_norm.shape[1],1)
                    targets = np.asarray(self.labels, dtype=np.float32)
                    self.reset()
                    yield (inputs,targets)
"""

'\nclass SpectrogramDataGenerator(object):\n    def __init__(self):\n        self.reset()\n#        self.num_classes = num_classes\n\n    def reset(self):\n        self.spectrograms = []\n        self.labels = []\n        \n    def zero_padding(self,spectral,thre):\n        n_col = spectral.shape[1]\n        if n_col >= thre :\n            spectral_pad = spectral[:,0:thre]\n        else:\n            # lbをランダムに設定\n           # lb = np.random.randint(0,thre - n_col + 1)\n            lb = 0\n            ub = thre - n_col - lb\n            spectral_pad = np.pad(spectral,((0,0),(lb,ub)),"constant")\n        return(spectral_pad)\n        \n    def GenerateBatch(self,X,y,speaker_dict,time_len = 300,batch_size = 10,shuffle = True,margin_ms = 0):\n        num_classes = len(speaker_dict.keys())\n        while True:\n            if shuffle:\n                indexes = np.random.permutation(len(X))\n                X = X[indexes]\n                y = y[indexes]\n\n            for tmp_X,tmp_y in zi

In [20]:
# ジェネレータを生成
lb=0
ub=500
speaker_dict=label2int
BATCH_SIZE=8
time_len = 500

# 学習サンプルをn個にして学習がうまく行くかテストを実行
#n = 95
milestones = [-1,-1,-1,-1,100]
train_datagen = SpectrogramGenerator(lb,ub,train_info,#.head(n),
                                     speaker_dict,BATCH_SIZE,time_len,test_info,
                                    milestones)
#val_datagen = SpectrogramDataGenerator()

# エポック数を設定
NUM_EPOCHS = 200
#NUM_EPOCHS = 1

# バッチサイズの設定
#BATCH_SIZE = 8

# 1epoch当たりの学習につかうデータの量を設定
# たとえば、N_MULTIが2の時、1epoch当たり学習データを2倍に増やし、学習。
# N_MULTI = 1

In [None]:
# FIXME patience15は多すぎるかもしれない。8くらいで良いかも
patience = 1

save_dir = "03_work/models/fine_tuning/"

# 学習率
"""
def step_decay(epoch):
    x = 0.04
    if epoch <= 1: 
        x = 0.01
    elif epoch <= 2:
        x = 0.01
    elif epoch <= 3:
        x = 0.01
    elif epoch <= 4:
        x = 0.01
    elif epoch <= 5:
        x = 0.01
    elif epoch <= 6:
        x = 0.01   
    return x
    
"""

#lr_decay = LearningRateScheduler(step_decay)
es = EarlyStopping(monitor = "val_loss",
                   patience = patience)

mc = ModelCheckpoint(save_dir + "fine_tuning_model.h5",
                    monitor = "val_loss",
                    save_best_only = True,
                    verbose = 1)

steps_per_epoch = train_datagen.train_step_per_epoch
validation_steps = train_datagen.val_step_per_epoch

#steps_per_epoch = 10
#validation_steps = 10

# FIXME : momentumなどを用いて、最適化時間を短縮する。
hist = model.fit_generator(generator = train_datagen.next_train(),
                           epochs = NUM_EPOCHS,
                           steps_per_epoch = steps_per_epoch,
                           validation_data = train_datagen.next_val(),
                           validation_steps = validation_steps,
                           callbacks = [mc,es],
                           verbose = 1
                          )

Epoch 1/200
  521/11342 [>.............................] - ETA: 54:34 - loss: 0.9896 - f1: 0.2393 - acc: 0.6558

In [23]:
# 学習ログの保存
for key in hist.history.keys():
    np.savetxt(save_dir + "/logs/{0}.txt".format(key),
               hist.history[key],
               delimiter=",")