In [1]:
# kaggleの以下のURLを参考に、metric learningを実装
# https://www.kaggle.com/bestaar/deep-metric-learning-with-pretrained-keras-models

In [7]:
#  triplet networks can explicitely learn similarity between inputs:
#  -> Hoffer, Elad, and Nir Ailon. "Deep metric learning using triplet network." International Workshop on Similarity-Based Pattern Recognition. Springer, Cham, 2015.

import os
import numpy as np
import pandas as pd
import pickle
import math

# Here is a script for loading pretrained models in keras to finetune them in a triplet network setting
from keras.layers import Input,Lambda,subtract,GlobalMaxPooling2D,Dense,GlobalAveragePooling2D,concatenate,Activation,Flatten
from keras.applications.xception import Xception as Net
from keras.preprocessing import image
from keras.applications.xception import preprocess_input
from keras.models import Model,load_model
from keras.optimizers import Adam,SGD
from keras.callbacks import ModelCheckpoint,EarlyStopping

# sklearn
from sklearn.model_selection import train_test_split

#import cv2
import matplotlib.pyplot as plt


In [8]:
from Util import NormalizeHorizontalDirection

from project_utility import f1,f1_loss,SpectrogramGenerator, change_pitch,change_speed,change_speed_and_pitch,add_noise,slightly_timeshift,stfft,zero_padding,calc_spectrogram

from resnet import compose,ResNetConv2D,ResnetBuilder,shortcut,basic_block,residual_blocks,bottleneck

In [9]:
# 作業ディレクトリの設定
os.chdir("/home/taichi/DataAnalysis/05_NTT_corevo")

In [10]:
# 乱数のseedを固定
np.random.seed(1234)

In [11]:
def create_model(input_shape):
    ## Input shape
    d1 = input_shape[0]
    d2 = input_shape[1]
        
    # The triplet network takes 3 input images: 2 of the same class and 1 out-of-class sample
    input_tensor1 = Input(shape=(d1, d2, 1))
    input_tensor2 = Input(shape=(d1, d2, 1))
    input_tensor3 = Input(shape=(d1, d2, 1))
    
    # load a pretrained model (try, except block because the kernel would not let me download the weights for the network)
    try:
#        model_path = "03_work/models/resnet_voxceleb/resnet_voxceleb_model_time_500.h5"
        model_path = "03_work/models/fine_tuning/fine_tuning_model.h5"
       
        trained_model = load_model(model_path,
                                           custom_objects={"f1":f1})
    
#        trained_model = ResnetBuilder.build_resnet_50([d1,d2,1], 6)
        
        layer_name = 'activation_49'
        base_model = Model(inputs=trained_model.input, 
                                   outputs=trained_model.get_layer(layer_name).output)
        
#        base_model = Net(input_shape=(d1,d2,3),weights='imagenet',include_top=False)

    except:
        print('Could not download weights. Using random initialization...')
       # base_model = Net(input_shape=(d1,d2,3),weights=None,include_top=False)
    
    # predefine a summation layer for calculating the distances:
    # the weights of this layer will be set to ones and fixed  (since they
    # are shared we could also leave them trainable to get a weighted sum)
    summation = Dense(1,activation='linear',kernel_initializer='ones',bias_initializer='zeros',name='summation')
    # feed all 3 inputs into the pretrained keras model
    x1 = base_model(input_tensor1)
    x2 = base_model(input_tensor2)
    x3 = base_model(input_tensor3)
    # flatten/summarize the models output:
    # (here we could also use GlobalAveragePooling or simply Flatten everything)
    
    x1 = GlobalMaxPooling2D()(x1)
    x2 = GlobalMaxPooling2D()(x2)
    x3 = GlobalMaxPooling2D()(x3)
#    x1 = Flatten()(x1)
 #   x2 = Flatten()(x2)
  #  x3 = Flatten()(x3)
    
    # calculate something proportional to the euclidean distance
    #   a-b
    d1 = subtract([x1,x2])
    d2 = subtract([x1,x3])
    #   (a-b)**2
    d1 = Lambda(lambda val: val**2)(d1)
    d2 = Lambda(lambda val: val**2)(d2)
    # sum((a-b)**2)
    d1 = summation(d1)
    d2 = summation(d2)
    #  concatenate both distances and apply softmax so we get values from 0-1
    d = concatenate([d1,d2])
    d = Activation('softmax')(d)
    # build the model and show a summary
    model = Model(inputs=[input_tensor1,input_tensor2,input_tensor3], outputs=d)
    # a second model that can be used as metric between input 1 and input 2
    metric = Model(inputs=[input_tensor1,input_tensor2], outputs=d1)

    # draw the network (it looks quite nice)
    try:
        from keras.utils.vis_utils import plot_model as plot
        plot(model, to_file = 'Triplet_Dense121.png')
    except ImportError:
        print('It seems like the dependencies for drawing the model (pydot, graphviz) are not installed')
    # fix the weights of the summation layer (since the weight of this layer
    # are shared we could also leave them trainable to get a weighted sum)
    for l in model.layers:
        if l.name == 'summation':
            print('fixing weights of summation layer')
            l.trainable=False
    # compile model
    LEARNING_RATE = 0.02
    DECAY = 1e-6
    
    sgd = SGD(lr=LEARNING_RATE, decay=DECAY, momentum=0.9, nesterov=True, clipnorm=5)
    
    
    ## base_modelは同じデータを参照している。 (ポインタ的な)
    model.summary()
    
    model.compile(optimizer=sgd, 
                  loss='categorical_crossentropy')
        
    return model, metric

In [12]:
input_shape = (512,500,1)
model, metric = create_model(input_shape)

fixing weights of summation layer


In [13]:
# metric learning用のGeneratorを作成。
class SpectrogramGeneratorMetricLearning():
    
    def __init__(self,lb,ub,train_info,speaker_dict,batch_size,time_len,test_info,milestone):
        self.reset()

        self.lb = lb
        self.ub = ub
        #self.train_info = train_info
        # 学習する際は__init__の時点でcalc_spectrogram_funcを設定する必要はない。
        self.calc_spectrogram_func = lambda filepath: calc_spectrogram(filepath,
                                                               lb_ms=lb,ub_ms=ub,
                                                               speed=False,pitch=False,
                                                               time_shift=False,noise=False)
        
        X_train,X_val,y_train,y_val = train_test_split(np.array(train_info["filepath"]),
                                                       np.array(train_info["label"]),
                                                       test_size=0.1,
                                                       random_state = 1234)
        self.X_train = X_train
        self.y_train = y_train
        
        self.X_val = X_val
        self.y_val = y_val        
                
        self.speaker_dict = speaker_dict
        self.num_classes = len(speaker_dict)
        
        self.batch_size = batch_size
        self.time_len = time_len        
        # step_per_epoch
        
        self.train_step_per_epoch = int(np.ceil(len(X_train) / batch_size))
        self.val_step_per_epoch = int(np.ceil(len(X_val) / batch_size))
        
        # テストデータ
        X_test = np.array(test_info["filepath"])
        self.X_test = X_test
        # y_testはダミーを加えておく
        self.y_test = np.repeat("FE_AD",len(test_info["filepath"]))
        self.test_step_per_epoch = int(np.ceil(len(X_test) / batch_size))        
        
        ## カリキュラム学習
        self.milestones = milestones
        
        ## iterationの回数
        self.train_cur_index = 0
        self.val_cur_index = 0
                
    def reset(self):
        self.batch1 = []
        self.batch2 = []
        self.batch3 = []
        self.output= []
    
    def get_batch(self,X,y):
        
        # metric learningでは、3つのデータの距離d1,d2を出力にする
        target_labels = np.random.choice(list(self.speaker_dict.keys()), 
                                 size = 2, replace = False)
        
        # 3つのデータのうち2つは共通のラベルで、1つは異なるラベルにする
        match_label = target_labels[0]
        diff_label = target_labels[1]
#        print(match_label)
        
        
        # データをpandas dataframeからランダムに抽出
        match_data = np.random.choice(X[y == match_label],
                                      size = 2,
                                      replace = False)

        diff_data = np.random.choice(X[y == diff_label],
                                     size = 1,
                                     replace = False)
        
        # データのpathをマージして、スペクトログラムを計算
        merge_data = np.array([self.calc_spectrogram_func(path) for path in np.array([match_data[0],match_data[1],diff_data[0]])])
        
        # データのラベルをマージ
#        merge_label = np.array([match_label,match_label,diff_label])
        
        distance = [0,1]
                        
        # データの順番をシャッフル ->　base_modelは共通なので、shuffleは必要ない。
#        indexes = np.random.permutation(3)
        
 #       merge_data = merge_data[indexes]
  #      merge_label = merge_label[indexes]
        
        # d1, d2を計算
#        if merge_label[0] == merge_label[1]:
#           d1 = 0
#      else:
 #           d1 = 1

#        if merge_label[1] == merge_label[2]:
 #           d2 = 0
  #      else:
    #        d2 = 1
    
            
        # distanceを計算 0-1に値が抑えるようする
        
        #f d1 == d2:
            # d1:1, d2:1の場合しかない。
         #   distance = [0.5,0.5]
        #elif d1 > d2:
         #   distance = [1,0]
        #else:
         #   distance = [0,1]
                
        return merge_data, distance

    def next_train(self):
        while True:
            merge_input, distance = self.get_batch(self.X_train,self.y_train)
            
            self.batch1.append(merge_input[0])
            self.batch2.append(merge_input[1])                                                                        
            self.batch3.append(merge_input[2])
                                                                      
            self.output.append(distance)
            
            if len(self.output) == self.batch_size:
                input1 = np.asarray(self.batch1, dtype=np.float32).reshape(self.batch_size,
                                                                                   merge_input.shape[1],
                                                                                    merge_input.shape[2],1)
                
                input2 = np.asarray(self.batch1, dtype=np.float32).reshape(self.batch_size,
                                                                                   merge_input.shape[1],
                                                                                    merge_input.shape[2],1)
                
                input3 = np.asarray(self.batch1, dtype=np.float32).reshape(self.batch_size,
                                                                                   merge_input.shape[1],
                                                                                    merge_input.shape[2],1)
                
                inputs = [input1,input2,input3]
                                                                          
                targets = np.asarray(self.output,dtype = np.float32)
                                
                self.reset()
                
                yield (inputs,targets)

        
    def next_val(self):
        while True:
            merge_input, distance = self.get_batch(self.X_val,self.y_val)
            
            self.batch1.append(merge_input[0])
            self.batch2.append(merge_input[1])                                                                        
            self.batch3.append(merge_input[2])
                                                                      
            self.output.append(distance)
            
            if len(self.output) == self.batch_size:
                input1 = np.asarray(self.batch1, dtype=np.float32).reshape(self.batch_size,
                                                                                   merge_input.shape[1],
                                                                                    merge_input.shape[2],1)
                
                input2 = np.asarray(self.batch1, dtype=np.float32).reshape(self.batch_size,
                                                                                   merge_input.shape[1],
                                                                                    merge_input.shape[2],1)
                
                input3 = np.asarray(self.batch1, dtype=np.float32).reshape(self.batch_size,
                                                                                   merge_input.shape[1],
                                                                                    merge_input.shape[2],1)
                
                inputs = [input1,input2,input3]
                                                                          
                targets = np.asarray(self.output,dtype = np.float32)
                                
                self.reset()
                
                yield (inputs,targets)
    
    def test(self):
        ## テストも同様かな。
        pass
    
    def on_train_begin(self):
        pass
    
    def on_epoch_begin(self):
        # metric learningではカリキュラム学習やdata augmentaioは必要ないかも
        pass


In [14]:
# データの読み込み
with open("03_work/label2int.pickle",mode = "rb") as f:
    label2int = pickle.load(f)

In [15]:
# 学習データの情報を読み込み
train_info = pd.read_csv("01_input/ntt_corevo/class_train.tsv",
                  delimiter = "\t",
                  names = ("filename","label"))

# filepath変数を作成
train_info["filepath"] = "01_input/ntt_corevo/train/" + train_info["filename"] + ".wav"

In [16]:
# test dataの読み込み
test_info = pd.read_csv("03_work/test_time_distribution.csv")
test_info["filepath"] = "01_input/ntt_corevo/" + test_info["filename"] + ".wav"

In [17]:
lb = 0
ub = 500
batch_size = 2
time_len = 500
milestones = [1,2,3,4,5]

data_gen = SpectrogramGeneratorMetricLearning(lb,ub,train_info,label2int,batch_size,time_len,test_info,milestones)

In [18]:

"""
for i,v in enumerate(data_gen.next_train()):
#    print(v[1])
# metricではなく、distanceで距離を測定する必要がある?
    print(model.predict([v[0][0],v[0][1],v[0][2]]))
    print(metric.predict([v[0][0],v[0][1]]))
    if i == 100:
        break
"""
    

'\nfor i,v in enumerate(data_gen.next_train()):\n#    print(v[1])\n# metricではなく、distanceで距離を測定する必要がある?\n    print(model.predict([v[0][0],v[0][1],v[0][2]]))\n    print(metric.predict([v[0][0],v[0][1]]))\n    if i == 100:\n        break\n'

In [19]:
# ネットワーク構造の可視化
from IPython.display import SVG
from keras.utils.vis_utils import model_to_dot

def plot_keras_model(model, show_shapes=True,        show_layer_names=True):
    return SVG(model_to_dot(model, show_shapes=show_shapes,         show_layer_names=show_layer_names).create(prog='dot',format='svg'))

#plot_keras_model(model, show_shapes=True, show_layer_names=False)

In [23]:
NUM_EPOCHS = 200
BATCH_SIZE = 2
PATIENCE = 3

save_dir = "03_work/models/metric_learning/"
es = EarlyStopping(monitor = "val_loss",
                   patience = PATIENCE)

mc = ModelCheckpoint(save_dir + "metric_learning.h5",
                    monitor = "val_loss",
                    save_best_only = True,
                    verbose = 1)

# epochsの決め方が難しい。nC2*nC1　* 6??
# とりあえず、１００００

STEPS_PER_EPOCH = 10000
VAL_STEPS_PER_EPOCH = 0.1 * STEPS_PER_EPOCH 

In [21]:
# 学習
hist = model.fit_generator(data_gen.next_train(),
                          steps_per_epoch = STEPS_PER_EPOCH,
                           epochs= NUM_EPOCHS,
                           validation_data = data_gen.next_val(),
                           validation_steps =VAL_STEPS_PER_EPOCH,
                           callbacks = [mc,es],
                           verbose = 1)

Epoch 1/3

Epoch 00001: val_loss improved from inf to 0.79105, saving model to 03_work/models/metric_learning/metric_learning.h5
Epoch 2/3

Epoch 00002: val_loss improved from 0.79105 to 0.79065, saving model to 03_work/models/metric_learning/metric_learning.h5
Epoch 3/3

Epoch 00003: val_loss improved from 0.79065 to 0.79027, saving model to 03_work/models/metric_learning/metric_learning.h5


In [22]:
# 学習ログの保存
for key in hist.history.keys():
    np.savetxt(save_dir + "/logs/{0}.txt".format(key),
               hist.history[key],
               delimiter=",")