In [1]:
from keras.models import Model
import tensorflow as tf
from keras.layers import *

from Audio_CRNN import *
from Lyric_NN import *
from Training_Callbacks import *

Using TensorFlow backend.


In [2]:
import warnings
from keras.utils import multi_gpu_model
from keras.models import model_from_json

In [3]:
import os 

In [4]:
class DATASET:
    New_x_train = 'New_x_train.json'
    New_y_train = 'Old_y_train.json'
    New_x_test = 'New_x_test.json'
    New_y_test = 'Old_y_test.json'
    New_x_val = 'New_x_val.json'
    New_y_val = 'Old_y_val.json'

In [5]:
import json 
import codecs
import numpy as np
def read_json(filename):
    with codecs.open(filename,'r',encoding = 'utf8') as infile:
        return np.array(json.load(infile))

In [6]:
from keras import backend as K
def auc(y_true, y_pred):
    auc = tf.metrics.auc(y_true,  K.round(y_pred))[1]
    K.get_session().run(tf.local_variables_initializer())
    return auc

In [7]:
def STL(Input_Layers, Feature_Layers, _num_tags):
    if len(Feature_Layers) > 1 :
        Features = concatenate(Feature_Layers)
    else:
        Features = Feature_Layers[0]
    FC = Dense(_num_tags,kernel_regularizer=regularizers.l2(0.001), activation='sigmoid')(Features)
    return Model(inputs=Input_Layers,  outputs=FC)

In [8]:
def setModel(model_type, audio_method, lyric_method, Audio_kwargs, CRNN_kwargs, Lyric_kwargs, Lyric_CNN, Lyric_RNN, _num_Tags):
    if audio_method != '':
        audio_input, audio_feature = Audio_NN[audio_method](**Audio_kwargs,**CRNN_kwargs)
        #Audio_Model = STL([audio_input], [audio_feature], _num_Tags)
    if lyric_method != '':
        lyric_input, lyric_feature = Lyric_NN[lyric_method](**Lyric_kwargs, **Lyric_CNN, **Lyric_RNN)
        #Lyric_Model = STL([lyric_input], [lyric_feature], _num_Tags)
    
    if model_type == 'Both':
        return_Model = STL([audio_input, lyric_input],[audio_feature, lyric_feature], _num_Tags)
        exp_name = '_'.join([model_type, 'Audio', audio_method, 'Lyric', lyric_method])

    elif audio_method != '':    #audio only
        return_Model = STL([audio_input], [audio_feature], _num_Tags)
        exp_name = '_'.join([model_type, 'Audio', audio_method])
    else:
        return_Model = STL([lyric_input], [lyric_feature], _num_Tags)
        exp_name = '_'.join([model_type, 'Lyric', lyric_method])
    return return_Model, exp_name

In [9]:
def setDataGenerator(DG_Type, batchSize, Audio_Collection, Lyric_Collection):
    x_train = read_json(DATASET.New_x_train)#[:32]
    y_train = read_json(DATASET.New_y_train)#[:32]
    x_test = read_json(DATASET.New_x_test)#[:32]
    y_test = read_json(DATASET.New_y_test)#[:32]
    x_val = read_json(DATASET.New_x_val)#[:32]
    y_val = read_json(DATASET.New_y_val)#[:32]
    
    train_len = (len(x_train)//batchSize)*batchSize
    test_len = (len(x_test)//batchSize)*batchSize
    val_len = (len(x_val)//batchSize)*batchSize
    
    if DG_Type == 'Both':
        DG_train = Data_Generator[DG_Type](x_train[:train_len], y_train[:train_len], batchSize, Audio_Collection, Lyric_Collection)
        DG_test = Data_Generator[DG_Type](x_test[:test_len], y_test[:test_len], batchSize, Audio_Collection, Lyric_Collection)
        DG_val = Data_Generator[DG_Type](x_val[:val_len], y_val[:val_len], batchSize, Audio_Collection, Lyric_Collection)
    elif DG_Type == 'Audio':
        DG_train = Data_Generator[DG_Type](x_train[:train_len], y_train[:train_len], batchSize, Audio_Collection)
        DG_test = Data_Generator[DG_Type](x_test[:test_len], y_test[:test_len], batchSize, Audio_Collection)
        DG_val = Data_Generator[DG_Type](x_val[:val_len], y_val[:val_len], batchSize, Audio_Collection)
    elif DG_Type == 'Lyric':
        DG_train = Data_Generator[DG_Type](x_train[:train_len], y_train[:train_len], batchSize, Lyric_Collection)
        DG_test = Data_Generator[DG_Type](x_test[:test_len], y_test[:test_len], batchSize, Lyric_Collection)
        DG_val = Data_Generator[DG_Type](x_val[:val_len], y_val[:val_len], batchSize, Lyric_Collection)
    
    DG = {
        'train': DG_train,
        'test': DG_test,
        'val': DG_val
    }
    
    return DG

In [10]:
def setDataGenerator(DG_Type, batchSize, Audio_Collection, Lyric_Collection):
    x_train = read_json(DATASET.New_x_train)#[:32]
    y_train = read_json(DATASET.New_y_train)#[:32]
    x_test = read_json(DATASET.New_x_test)#[:32]
    y_test = read_json(DATASET.New_y_test)#[:32]
    x_val = read_json(DATASET.New_x_val)#[:32]
    y_val = read_json(DATASET.New_y_val)#[:32]
    
    train_len = (len(x_train)//batchSize)*batchSize
    test_len = (len(x_test)//batchSize)*batchSize
    val_len = (len(x_val)//batchSize)*batchSize
    
    if DG_Type == 'Both':
        DG_train = Data_Generator[DG_Type](x_train[:train_len], y_train[:train_len], batchSize, Audio_Collection, Lyric_Collection)
        DG_test = Data_Generator[DG_Type](x_test[:test_len], y_test[:test_len], batchSize, Audio_Collection, Lyric_Collection)
        DG_val = Data_Generator[DG_Type](x_val[:val_len], y_val[:val_len], batchSize, Audio_Collection, Lyric_Collection)
    elif DG_Type == 'Audio':
        DG_train = Data_Generator[DG_Type](x_train[:train_len], y_train[:train_len], batchSize, Audio_Collection)
        DG_test = Data_Generator[DG_Type](x_test[:test_len], y_test[:test_len], batchSize, Audio_Collection)
        DG_val = Data_Generator[DG_Type](x_val[:val_len], y_val[:val_len], batchSize, Audio_Collection)
    elif DG_Type == 'Lyric':
        DG_train = Data_Generator[DG_Type](x_train[:train_len], y_train[:train_len], batchSize, Lyric_Collection)
        DG_test = Data_Generator[DG_Type](x_test[:test_len], y_test[:test_len], batchSize, Lyric_Collection)
        DG_val = Data_Generator[DG_Type](x_val[:val_len], y_val[:val_len], batchSize, Lyric_Collection)
    
    DG = {
        'train': DG_train,
        'test': DG_test,
        'val': DG_val
    }
    
    return DG

In [11]:
def setCallbacks(eva_when_train, test_AUC, early_stop, timer, checkpointer, exp_name, batchSize, stopEpcho, DG_test):
    Return_Callbacks = []
    
    y_test = read_json(DATASET.New_y_test)#[:32]
    test_len = (len(y_test)//batchSize)*batchSize

    
    if eva_when_train == 1:
        Eva = AUC_Evalu(y_test[:test_len], DG_test, batchSize)
        Return_Callbacks.append(Eva)
    if test_AUC == 1:
        Test_AUC = BestAUC_callback_TF(y_test[:test_len], DG_test, batchSize)
        Return_Callbacks.append(Test_AUC)
    if early_stop == 1:
        early_stopping = EarlyStopping(monitor='AUC_test', mode='max', patience=stopEpcho, verbose=1)
        Return_Callbacks.append(early_stopping)
    if timer == 1:
        time_callback = TimeHistory()
        Return_Callbacks.append(time_callback)
    if checkpointer == 1:
        check_callback_1 = ModelCheckpoint(filepath=os.path.join(exp_name,exp_name+"_{epoch:02d}-{AUC_Best:.2f}.hdf5"), mode = 'max', monitor='AUC_Best', verbose=1, save_best_only=True, )
        check_callback_2 = ModelCheckpoint(filepath=os.path.join(exp_name,exp_name+"_{epoch:02d}-{AUC_test:.2f}.hdf5"), mode = 'max', monitor='AUC_test', verbose=1, save_best_only=True, )
    
        Return_Callbacks.append(check_callback_1)
        Return_Callbacks.append(check_callback_2)

    return Return_Callbacks

In [12]:
##read Data Base
from pymongo import MongoClient
from bson.objectid import ObjectId
uri = "mongodb://localhost:27017/database" #mongodb://<user_name>:<user_password>@ds<xxxxxx>.mlab.com:<xxxxx>/<database_name>
conn = MongoClient(uri)
db = conn.PaperData

In [13]:
db.collection_names()

  """Entry point for launching an IPython kernel.


['W2V_Re_80_25_100', 'W2V_Pre_80_25_100']

In [14]:
if __name__ == "__main__":
    Audio_kwargs = {
        "_num_mel_scale": 96,
        "_num_time_len": 1366,
        "_num_channel": 1
    }
    CRNN_kwargs = {
        "_CL_1_kernal": 169,
        "_CL_2_kernal": 339,
        "_CL_3_kernal": 339,
        "_CL_4_kernal": 339,
        "_RNN_1_kernal": 169,
        "_RNN_2_kernal": 169

    }
    Lyric_kwargs = {
        "_num_lines": 80,
        "_num_words": 25,
        "_num_WEdim": 100
    }
    Lyric_CNN = {
        "_num_LyricCNN_kernalSize": 3, # #CNN Size start from 3
        "_num_LyricCNN_kernalnum": 100 # for each size of kernal, how many different kernals 
    }
    Lyric_RNN = {
        "_num_WGRU": 100, # #CNN Size start from 3
        "_num_LGRU": 200 # for each size of kernal, how many different kernals 
    }
    Model_Setting = {
        "model_type": 'Lyric',
        "audio_method": '',
        "lyric_method": 'RNN',
        "Audio_kwargs": Audio_kwargs,
        "CRNN_kwargs": CRNN_kwargs,
        "Lyric_kwargs": Lyric_kwargs,
        "Lyric_CNN": Lyric_CNN,
        "Lyric_RNN": Lyric_RNN,
        "_num_Tags": 50
    }
    
    _epchos = 100
    _batchSize = 32
    
    Spectrogram_Collection  =  db.get_collection('MSD_1366')
    LineCNN_Collection  =  db.get_collection('Glove_80_25_100')

    model, exp_name = setModel(**Model_Setting)
    
    exp_addition_description = 'Test'
    
    exp_name = exp_name + exp_addition_description
    
    if not os.path.exists(exp_name):
        os.mkdir(exp_name)
    
    DG_Setting = {
        "DG_Type": Model_Setting['model_type'], 
        "batchSize": _batchSize,
        "Audio_Collection": db.get_collection('MSD_1366'),
        "Lyric_Collection": db.get_collection('W2V_Re_80_25_100')
    }
    DG = setDataGenerator(**DG_Setting)
    
    
    Callback_Setting = {
        "eva_when_train": 1,
        "test_AUC": 1,
        "early_stop": 1,
        "timer": 1,
        "checkpointer": 1,
        "exp_name": exp_name,
        'batchSize':_batchSize,
        "stopEpcho": 10,
        "DG_test": DG['test']
    }
    
    callbacks = setCallbacks(**Callback_Setting)

In [15]:
    x,y = DG['train'].getitem(0)

In [16]:
x.shape

(32, 80, 25, 100)

In [17]:
y.shape

(32, 50)

In [13]:
    warnings.filterwarnings('ignore')

In [14]:
    parallel_model = multi_gpu_model(model, gpus=2)
    parallel_model.compile(loss="binary_crossentropy",
                          optimizer='adam',
                          metrics=[auc])

In [15]:
    History = parallel_model.fit_generator(
                    generator=DG['train'],
                    steps_per_epoch=DG['train'].step,
                    epochs=_epchos,
                    verbose=1,
                    validation_data=DG['val'],
                    validation_steps=DG['val'].step,
                    workers=12, 
                    use_multiprocessing=True,
                    callbacks=callbacks
                    )

Epoch 1/100

Process ForkPoolWorker-9:
Process ForkPoolWorker-20:
Process ForkPoolWorker-24:
Process ForkPoolWorker-7:
Process ForkPoolWorker-10:
Process ForkPoolWorker-1:
Traceback (most recent call last):
Process ForkPoolWorker-2:
Process ForkPoolWorker-22:
Traceback (most recent call last):
Traceback (most recent call last):
Process ForkPoolWorker-6:
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Process ForkPoolWorker-8:
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
Process ForkPoolWorker-21:
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
Traceback (most recent call last):
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
Process ForkP

KeyboardInterrupt
  File "/usr/local/lib/python3.6/dist-packages/pymongo/cursor.py", line 1104, in _refresh
    self.__send_message(q)
  File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 335, in get
    res = self._reader.recv_bytes()
  File "/usr/local/lib/python3.6/dist-packages/pymongo/cursor.py", line 1104, in _refresh
    self.__send_message(q)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.6/multiprocessing/connection.py", line 216, in recv_bytes
    buf = self._recv_bytes(maxlength)
  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
Pr

  File "/usr/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
KeyboardInterrupt
  File "/root/ReformCode/Training_Callbacks.py", line 139, in __getitem__
    {"Filename":filename})['Spectrogram']).reshape((_mel_scale,_time_len,_channels))
KeyboardInterrupt
  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/usr/local/lib/python3.6/dist-packages/pymongo/pool.py", line 745, in _raise_connection_failure
    raise error
  File "/usr/local/lib/python3.6/dist-packages/pymongo/collection.py", line 1262, in find_one
    for result in cursor.limit(-1):
  File "/usr/local/lib/python3.6/dist-packages/pymongo/pool.py", line 610, in receive_message
    self.max_message_size)
  File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/p

KeyboardInterrupt
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()


Traceback (most recent call last):
  File "/usr/local/lib/python3.6/dist-packages/IPython/core/interactiveshell.py", line 3267, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-15-ce2116327564>", line 10, in <module>
    callbacks=callbacks
  File "/usr/local/lib/python3.6/dist-packages/keras/legacy/interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 1418, in fit_generator
    initial_epoch=initial_epoch)
  File "/usr/local/lib/python3.6/dist-packages/keras/engine/training_generator.py", line 217, in fit_generator
    class_weight=class_weight)
  File "/usr/local/lib/python3.6/dist-packages/keras/engine/training.py", line 1217, in train_on_batch
    outputs = self.train_function(ins)
  File "/usr/local/lib/python3.6/dist-packages/keras/backend/tensorflow_backend.py", line 2715, in __call__
    return self._call(inputs)
  File "/usr/local/lib/python3.6/

  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt

KeyboardInterrupt

  File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/usr/lib/python3.6/multiprocessing/pool.py", line 108, in worker
    task = get()
  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
  File "/usr/lib/python3.6/multiprocessing/queues.py", line 334, in get
    with self._rlock:
  File "/usr/lib/python3.6/multiprocessing/synchronize.py", line 95, in __enter__
    return self._semlock.__enter__()
KeyboardInterrupt
KeyboardInterrupt
Process ForkPoolWorker-34:
Process ForkPoolWorker-33:
Process ForkPoolWorker-35:
Traceback (most recent call last):
Tr

In [None]:

    
    json_string = parallel_model.to_json()
    with codecs.open(os.path.join(exp_name, exp_name+'.json'),'w', encoding='utf8') as outfile:
        json.dump(json_string,outfile)
    with codecs.open(os.path.join(exp_name, exp_name+'_History.json'),'w', encoding='utf8') as outfile:
        json.dump(History.history,outfile)