**Architecture **

<img src="http://drive.google.com/uc?export=view&id=12JomC2IswVbNGdE0IIvPpUk8vPjP-MBQ"  alt="artchtecture">


---



(1) Importing dependency

In [1]:
import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, Flatten,\
                         Conv1D, MaxPooling1D
from keras.layers.normalization import BatchNormalization

import pandas as pd
import librosa
from tqdm import tqdm_notebook as tqdm
import os

import numpy as np
np.random.seed(1000)

import matplotlib.pyplot as plt
% matplotlib inline

Using TensorFlow backend.


(2) Get Data

In [2]:
base_data_path = '/data/private/SU/bbchip13/brains_on_beats/magnatagatune/'

### Make Y data
annotations_filename = base_data_path+'annotations_final_top_50_ver_1.csv'
df = pd.read_csv(annotations_filename)
y_names = df.columns[2:]
y_dict = {info[1][0]:info[1][2:].values.astype(np.int32) for info in df.iterrows()}
y_dict

{2: array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0], dtype=int32),
 6: array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
        0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0], dtype=int32),
 10: array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0], dtype=int32),
 11: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0], dtype=int32),
 12: array([0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0], dtype=int32),
 14: array([0, 0, 0, 0, 0, 0, 0, 0, 0

In [3]:
def crop_random_5sec(filename):
    crop_range = 24
    offset = np.random.randint(crop_range)
    wav_5sec, sr = librosa.load(filename, offset=offset, duration=6, sr=16000)
    wav_5sec = np.expand_dims(wav_5sec, axis=1)
    return wav_5sec

def load_wavs_and_crop(filenames):
    return np.asarray([crop_random_5sec(filename) for filename in tqdm(filenames)])
    
def find_y_by_filename(filename, y_dict):
    basename = os.path.basename(filename)
    name = os.path.splitext(basename)[0]
    y = y_dict[int(name)]
    return y

def make_y_by_filenames(filenames, y_dict):
    return np.asarray([find_y_by_filename(filename, y_dict) 
                           for filename in filenames])

def make_train_data(filenames, y_dict):
    x_train = load_wavs_and_crop(filenames)
    y_train = make_y_by_filenames(filenames, y_dict)
    return x_train, y_train
    
def make_patch_data(wav, sr, i):
    patch_data = wav[i*sr:(i+6)*sr]
    patch_data = np.expand_dims(patch_data, axis=1)
    return patch_data
    
def make_patch_by_filename(filename):
    wav, sr = librosa.load(filename, sr=16000)
    patch_range = 24
    patch_list = [make_patch_data(wav, sr, i) for i in range(patch_range)]
    return np.asarray([patch_list])

def load_test_wavs(filenames):
    patch_arr = np.asarray([make_patch_by_filename(filename) for filename in tqdm(filenames)])
    result_arr = patch_arr.reshape(-1, *patch_arr.shape[-2:])
    return result_arr
#    return np.concatenate(patch_list, axis=None)

def make_test_data(filenames, y_dict):
    x_test = load_test_wavs(filenames)
    repeat_size = x_test.shape[0] / len(filenames)
    y_test = np.repeat(make_y_by_filenames(filenames, y_dict), repeat_size, axis=0)
    return x_test, y_test

In [None]:
base_mp3_path = base_data_path+'split_mp3/'
train_dir = base_mp3_path+'train/'
val_dir = base_mp3_path+'val/'
test_dir = base_mp3_path+'test/'

print('Make train data.......')
x_train_wav_filenames = [train_dir+filename for filename in os.listdir(train_dir)
                            if filename.endswith('.mp3')]
x_train, y_train = make_train_data(x_train_wav_filenames, y_dict)

print('Make validation data.......')
x_val_wav_filenames = [val_dir+filename for filename in os.listdir(val_dir)
                            if filename.endswith('.mp3')]
x_val, y_val = make_train_data(x_val_wav_filenames, y_dict)

print('Make test data.......')
x_test_wav_filenames = [test_dir+filename for filename in os.listdir(test_dir)
                            if filename.endswith('.mp3')]
x_test, y_test = make_test_data(x_test_wav_filenames, y_dict)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape, x_test.shape, y_test.shape)

Make train data.......


HBox(children=(IntProgress(value=0, max=18709), HTML(value='')))

(3) Create a sequential model

In [None]:
### Define Parametric Softplus

# alpha * log(1 + exp(beta * x))
def ParametricSoftplus(alpha=0.2, beta=5.0):
  return lambda x: alpha * keras.activations.softplus(beta * x)

In [None]:
model=Sequential()

# 1st Convolutional Layer (conv1)
model.add(Conv1D (kernel_size=121, filters=48, strides=16, padding='same',
                  input_shape=x_train.shape[1:]))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))
model.add(BatchNormalization())

# Pooling (pool1)
model.add(MaxPooling1D(pool_size=9, strides=4, padding='same'))

# 2nd Convolutional Layer (conv2)
model.add(Conv1D (kernel_size=25, filters=128, padding='same'))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))
model.add(BatchNormalization())

# Pooling (pool2)
model.add(MaxPooling1D(pool_size=9, strides=4, padding='same'))

# 3rd Convolutional Layer (conv3)
model.add(Conv1D (kernel_size=9, filters=192, padding='same'))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))

# 4rd Convolutional Layer (conv4)
model.add(Conv1D (kernel_size=9, filters=192, padding='same'))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))

# 5rd Convolutional Layer (conv5)
model.add(Conv1D (kernel_size=9, filters=128, padding='same'))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))

# Pooling (pool5)
model.add(MaxPooling1D(pool_size=9, strides=4, padding='same'))

# 1st Dense Layer (full6)
model.add(Flatten())
model.add(Dense(4096))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))
model.add(Dropout(0.5)) # Drop-out value is not specified in the paper

# 2nd Dense Layer (full7)
model.add(Dense(4096))
model.add(Activation(ParametricSoftplus(alpha=0.2, beta=0.5)))
model.add(Dropout(0.5))

# Output Layer (full8)
model.add(Dense(50))
model.add(Activation('sigmoid'))

model.summary()

(4) Compile 

In [None]:
adam_with_params = keras.optimizers.Adam(lr=0.0002, beta_1=0.1, beta_2=0.999, 
                                         epsilon=1e-8)


model.compile(loss='binary_crossentropy', optimizer=adam_with_params,
              metrics=['accuracy'])

(5) Train

In [None]:
model.fit(x_train, y_train, batch_size=36, epochs=100, validation_data=[x_val, y_val])

In [None]:
[loss, accuracy] = model.evaluate(x_test, y_test)
print('Loss:', loss, 'Accuracy:', accuracy)

In [None]:
pred = model.predict(x_test)
accu_table = (pred > 0.5) & y_test
accu_arr = accu_table.sum(axis=0) / pred.shape[0]
for tag, accu in zip(y_names, accu_arr):
    print(tag, ':', accu)

In [None]:
plt.figure(figsize = (15, 6))
x = np.arange(accu_arr.shape[0])
plt.bar(x, accu_arr)
plt.xticks(x, y_names, rotation='vertical')
plt.xlabel('Tag') # , fontsize=18)
plt.ylabel('AUC') # , fontsize=16)
# Pad margins so that markers don't get clipped by the axes
#plt.margins(0.2)
# Tweak spacing to prevent clipping of tick-labels
#plt.subplots_adjust(bottom=0.15)
#plt.legend()

plt.show()

In [None]:
# Save the weights
model.save_weights('model_weights.h5')

# Save the model architecture
with open('model_architecture.json', 'w') as f:
    f.write(model.to_json())

In [None]:
# from keras.models import model_from_json

# # Model reconstruction from JSON file
# with open('model_architecture.json', 'r') as f:
#     model = model_from_json(f.read())

# # Load weights into the new model
# model.load_weights('model_weights.h5')