In [1]:
import sys
import os
import tensorflow as tf
from tensorflow.python.client import device_lib
from keras import backend as K
from keras.layers import Activation, Add, Conv1D, Dense, Dropout, Flatten, Input, LeakyReLU
from keras.losses import binary_crossentropy, kullback_leibler_divergence
from keras.metrics import binary_accuracy, categorical_accuracy
from custom_metric import rounded_all_or_nothing_acc as RAON_accuracy
from keras.models import Model
from generator import AudioGenerator, kltls, multilabelled_labels_to_ys, multilabelled_ys_to_labels, onehot_superclass_labels_to_ys, onehot_superclass_ys_to_labels, MULTI_LABEL, ONE_HOT, TIME_SEQUENCE, LINEAR_SPECTROGRAM, LOG_SPECTROGRAM
import pickle
import numpy as np
import datetime
import math
from keras.callbacks import TensorBoard
from time_callback import Time_Callback

# Allows me to import my modules
sys.path.append('./modules')
from audio_utils import *

Using TensorFlow backend.


Attempting to read settings file...
	Read successfully!


In [2]:
device_lib.list_local_devices()

[name: "/device:CPU:0"
 device_type: "CPU"
 memory_limit: 268435456
 locality {
 }
 incarnation: 3841035028629122539, name: "/device:GPU:0"
 device_type: "GPU"
 memory_limit: 577778483
 locality {
   bus_id: 1
   links {
   }
 }
 incarnation: 2150436216077318062
 physical_device_desc: "device: 0, name: GeForce GTX 650, pci bus id: 0000:01:00.0, compute capability: 3.0"]

In [3]:
# Tells Tensorflow to use the GPU
gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.2)

config = tf.ConfigProto(allow_soft_placement=True,
                        device_count = {'CPU' : 1,
                                        'GPU' : 1},
                        log_device_placement = True,
                        gpu_options=gpu_options
                       )

session = tf.Session(config=config)
K.set_session(session)

#Loss flags
BINARY_CROSSENTROPY = 0
KL_DIVERGENCE = 1
LOSSES = {BINARY_CROSSENTROPY: binary_crossentropy, KL_DIVERGENCE: kullback_leibler_divergence}

# Model config
problem_type = ONE_HOT
input_type = TIME_SEQUENCE
loss = BINARY_CROSSENTROPY
optimizer = "adam"

# Name of model save and log
problem_type_str = "OneHot" if problem_type == ONE_HOT else "MultiHot"
input_type_str = {TIME_SEQUENCE: "(1D)", LINEAR_SPECTROGRAM: "(linear 2D)", LOG_SPECTROGRAM: "(log 2D)"}[input_type]  
loss_str = "BCE" if loss==BINARY_CROSSENTROPY else "KLD"
optimizer_str = optimizer.upper()
date_time_str = str(datetime.datetime.now().strftime('%d-%m-%Y_%H-%M-%S'))
model_name = "FinalModel-v5-{}-{}-{}-{}_{}".format(input_type_str, problem_type_str, loss_str, optimizer_str, date_time_str)

In [4]:
# Data generators
batch_size = 50
generators = {"training": None, "validation": None, "test": None}
N = {"training": 0, "validation": 0, "test": 0}
for data_type in generators.keys():
    sample_metadata = get_file_classes(data_type)
    N[data_type] = len(sample_metadata)
    filenames = [sm["filepath"] for sm in sample_metadata]
    labels = [sm["labels"] for sm in sample_metadata]
    generators[data_type] = AudioGenerator(filenames, labels, data_type, batch_size, shuffle=True, problem_type=problem_type, input_type=input_type)

In [5]:
batch_x_shape = None
batch_y_shape = None 
for name, gen in generators.items():
    print("Gen", name)
    batch_0 = gen.__getitem__(0)
    print("In shape:", batch_0[0].shape, "\nOut shape:", batch_0[1].shape)
    batch_x_shape = batch_0[0].shape
    batch_y_shape = batch_0[1].shape

Gen training
In shape: (50, 12000, 1) 
Out shape: (50, 87)
Gen validation
In shape: (50, 12000, 1) 
Out shape: (50, 87)
Gen test
In shape: (50, 12000, 1) 
Out shape: (50, 87)


In [6]:
# Test whether generator arguments are picklable (whether they can be multiprocessed)
use_multiprocessing = True
for gen in generators:
    try:
        pickle.dumps(gen)
    except:
        print(sys.exc_info())
        use_multiprocessing = False
        break
print("Picklable:", use_multiprocessing)

Picklable: True


## Rationale

### Model structure

4 1D casual conv convolution layers which reduce the size of the sample space while increasing the size of the convolution/feature space.

Creates feature space of 32, while downscaling the sample space to 500. Compared to 12000, total tensor sizes: 12000 -> 16000 (increase in data).

After convoluton layers, LeakyReLU was used for activation because ReLU has been shown to perform well and LeakyReLU takes negatives into account slightly which appear in the data. For this reason He-normal was used to initialise the convolution kernals as this performs well with ReLU.

Then 3 lots of "DilatedDropoutSkipModule" which retain the size of the sample space while extracting more features. With skip connections preserving earlier features. Finished with dropout layers to aid in generalisation during training.

Flattened and passed to a fully-connected (dense layer) which reshapes the network into the output shape.

Softmax activation layer for one-hot classification.

Techniques from lit review:
- LeNet: Convolutions with `stride > 1` to downscale sample space.
- Using 1x1 convolution layers to downscale feature space, instead of pooling layers. 
- Leaky ReLU & He kernal inits.
- Dropout for generalisation during training.
- ResNet for skip connections.

In [7]:
# Variables.
NMODULES = 5
DROP_RATES = np.around(np.arange(0.12, 0.21, 0.02), 2).tolist() # 6 repeatitions with 0.025 difference in drop rate
final_activation = {ONE_HOT: "softmax", MULTI_LABEL: "sigmoid"}[problem_type]
leaky_gradient = 0.25

# Reusable dilated convolution / inception module / dropout layer
def DilatedDropoutSkipModule(og_model, drop_rate):
    model = Conv1D(filters=1, kernel_size=1, padding="valid", dilation_rate=1, kernel_initializer='he_normal')(og_model)
    model = Conv1D(filters=16, kernel_size=3, padding="causal", dilation_rate=2, kernel_initializer='he_normal')(model)
    model = Conv1D(filters=32, kernel_size=3, padding="causal", dilation_rate=2, kernel_initializer='he_normal')(model)
    model = LeakyReLU(leaky_gradient)(model)
    model = Add()([og_model, model])
    return Dropout(rate=drop_rate)(model)

# Structure
data = Input(shape=(12000, 1))
cnn = Conv1D(filters=8, kernel_size=5, strides=2, padding="causal", dilation_rate=1, kernel_initializer='he_normal')(data)
cnn = LeakyReLU(leaky_gradient)(cnn)
cnn = Conv1D(filters=16, kernel_size=3, strides=2, padding="causal", dilation_rate=1, kernel_initializer='he_normal')(cnn)
cnn = LeakyReLU(leaky_gradient)(cnn)
cnn = Conv1D(filters=32, kernel_size=3, strides=2, padding="causal", dilation_rate=1, kernel_initializer='he_normal')(cnn)
cnn = LeakyReLU(leaky_gradient)(cnn)
cnn = Dropout(rate=0.1)(cnn)
for i in range(NMODULES):
    cnn = DilatedDropoutSkipModule(cnn, DROP_RATES[i])
cnn = Flatten()(cnn)
cnn = Dense(batch_y_shape[1], kernel_initializer='he_normal')(cnn)
cnn = Activation(final_activation)(cnn)
model = Model(inputs=data, outputs=cnn)

for layer in model.layers:
    print(layer.name, layer.output_shape)

# The label specific metric, dependant on the problem type from custom settings.
problem_metric = {ONE_HOT: categorical_accuracy, MULTI_LABEL: binary_accuracy}[problem_type]
    
# Compile with custom settings defined earlier
model.compile(optimizer=optimizer, loss=LOSSES[loss], metrics=[problem_metric, RAON_accuracy, LOSSES[(loss + 1)%2]])

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
input_1 (None, 12000, 1)
conv1d_1 (None, 6000, 8)
leaky_re_lu_1 (None, 6000, 8)
conv1d_2 (None, 3000, 16)
leaky_re_lu_2 (None, 3000, 16)
conv1d_3 (None, 1500, 32)
leaky_re_lu_3 (None, 1500, 32)
dropout_1 (None, 1500, 32)
conv1d_4 (None, 1500, 1)
conv1d_5 (None, 1500, 16)
conv1d_6 (None, 1500, 32)
leaky_re_lu_4 (None, 1500, 32)
add_1 (None, 1500, 32)
dropout_2 (None, 1500, 32)
conv1d_7 (None, 1500, 1)
conv1d_8 (None, 1500, 16)
conv1d_9 (None, 1500, 32)
leaky_re_lu_5 (None, 1500, 32)
add_2 (None, 1500, 32)
dropout_3 (None, 1500, 32)
conv1d_10 (None, 1500, 1)
conv1d_11 (None, 1500, 16)
conv1d_12 (None, 1500, 32)
leaky_re_lu_6 (None, 1500, 32)
add_3 (None, 1500, 32)
dropout_4 (None, 1500, 32)
conv1d_13 (None, 1500, 1)
conv1d_14 (None, 1500, 16)
conv1d_15 (None, 1500, 32)
leaky_re_lu_7 (None, 1500, 32)
add_

In [8]:
# Training logs
log_dir = "logs/{}".format(model_name)
# Tensorboard log
tb_log = TensorBoard(log_dir=log_dir)
# Custom time log
time_log = Time_Callback(log_dir=log_dir)

# Train model
epochs = 10
dataset_perc = 1
training_history = model.fit_generator(
                generator = generators["training"],
                steps_per_epoch = int(N["training"]*dataset_perc) // batch_size,
                validation_data = generators["validation"],
                validation_steps = int(N["validation"]*dataset_perc) // batch_size,
                epochs = epochs,
                callbacks = [tb_log, time_log],
                use_multiprocessing = use_multiprocessing,
                workers = 4
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


### Evaluation of model

After 10 epochs of training and validation with the training and validation data sets, a seperate evaluation run calculated the final accuracy of the model using the test data set.

In [9]:
# Evaluate model
model.evaluate_generator(
    generators["test"],
    int(N["test"]*dataset_perc) // batch_size,
    use_multiprocessing = use_multiprocessing,
    workers = 4
)

[0.009912935102543903,
 0.8685640803896464,
 0.8514871576466622,
 0.5719704684330514]

### Some examples of predictions using test data set

In [10]:
generators["test"].on_epoch_end()
batch0_test = generators["test"].__getitem__(0)

In [11]:
preds, trues = [], []
for i in range(10):
    x, y = batch0_test[0][i], batch0_test[1][i]
    pred_y = np.reshape(model.predict(x.reshape(1, batch_x_shape[1], batch_x_shape[2])), batch_y_shape[1])
    print("Sample", i)
    print("Actual:\n\t{},\n\t{}\nPrediction:\n\t{},\n\t{}\n".format(y, onehot_superclass_ys_to_labels(y), [round(p_y, 3) for p_y in pred_y.tolist()], onehot_superclass_ys_to_labels([int(round(p_y)) for p_y in pred_y.tolist()])))
    preds.append(pred_y)
    trues.append(y)

preds_tensor = K.variable(np.array(preds))
trues_tensor = K.variable(np.array(trues))
problem_acc = K.eval(K.mean(problem_metric(trues_tensor, preds_tensor)))
print("{} accuracy (avg): {}".format("Categorical" if problem_type is ONE_HOT else "Binary", problem_acc))
print("RAON accuracy: ", K.eval(RAON_accuracy(trues_tensor, preds_tensor)))

Sample 0
Actual:
	[1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.],
	{'hit_label': ['beater'], 'kit_label': ['bass_drum'], 'tech_label': ['normal']}
Prediction:
	[0.096, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.001, 0.031, 0.01, 0.0, 0.001, 0.715, 0.008, 0.021, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.062, 0.0, 0.0, 0.0, 0.001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.051, 0.001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0],
	{'hit_label': ['beater', 'stick'], 'kit_label': ['bass_drum', 'mid_tom'], 'tech_label': ['normal', 'normal']}

Sample 1
Actual:
	[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 

Categorical accuracy (avg): 0.800000011920929
RAON accuracy:  0.8


In [12]:
# Save model and weights to `/models` directory
save_dir = os.path.join(os.getcwd(), "models")
if not os.path.exists(save_dir):
    os.makedirs(save_dir)
model_json = model.to_json()
with open(os.path.join(save_dir, "{}.json".format(model_name)), "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights(os.path.join(save_dir, "{}.h5".format(model_name)))
print("Saved model to disk")

Saved model to disk
