In [60]:
import sys
import tensorflow.keras.backend as K
from keras.utils import Sequence
from keras.layers import Activation, concatenate, Conv1D, Dense, Dropout, Flatten, Input, Lambda
from keras.models import Model
import pickle
import numpy as np
from time import time as timestamp
from keras.callbacks import TensorBoard

# Allows me to import my modules
sys.path.append('./modules')
from audio_utils import *

In [2]:
kltls = ['bass_drum-normal','hi_hat-normal',
  'hi_hat-open',
  'high_tom-normal',
  'ride-normal',
  'ride-bell',
  'crash-normal',
  'snare-normal',
  'low_tom-normal',
  'mid_tom-normal']

In [3]:
def labels_to_ys(labels):
    ys = np.zeros(len(kltls))
    for n in range(len(kltls)):
        kl, tl = kltls[n].split("-")
        for label_i in range(len(labels["hit_label"])):
            if (kl in labels["kit_label"][label_i] and tl in labels["tech_label"][label_i]):
                ys[n] = 1
    return ys

def ys_to_labels(ys, threshold = 0.6):
    labels = {"hit_label": [], "kit_label": [], "tech_label": []}
    for n in range(len(kltls)):
        kl, tl = kltls[n].split("-")
        if (ys[n] > threshold):
            hl = "beater" if kl == "bass_drum" else "stick"
            labels["hit_label"].append(hl)
            labels["kit_label"].append(kl)
            labels["tech_label"].append(tl)
    return labels

In [64]:
class AudioGenerator(Sequence):
    def get_ys(self, labels):
        return labels_to_ys(labels)
    
    def __init__(self, filenames, labels, data_type):
        self.filenames, self.labels, self.batch_size = filenames, labels, SETTINGS.data[data_type]["batch_size"]
        
    def __len__(self):
        return int(np.floor(len(self.filenames) / float(self.batch_size)))

    def __getitem__(self, idx):
        batch_x = self.filenames[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.labels[idx * self.batch_size:(idx + 1) * self.batch_size]
        return np.array([np.loadtxt(file_name) for file_name in batch_x]).reshape(100, 12000, 1), np.array(list(map(self.get_ys, batch_y)))

In [65]:
# Data generators
generators = {"training": None, "test": None}
for data_type in generators.keys():
    sample_metadata = get_file_classes(data_type)
    filenames = [sm["filepath"] for sm in sample_metadata]
    labels = [sm["labels"] for sm in sample_metadata]
    generators[data_type] = AudioGenerator(filenames, labels, data_type)

In [66]:
batch_0 = generators["training"].__getitem__(0)
print("In shape:", batch_0[0].shape, "\nOut shape:", batch_0[1].shape)

In shape: (100, 12000, 1) 
Out shape: (100, 10)


In [6]:
# Test whether generator arguments are picklable (whether they can be multiprocessed)
use_multiprocessing = True
for gen in generators:
    try:
        pickle.dumps(gen)
    except:
        print(sys.exc_info())
        use_multiprocessing = False
        break
print("Picklable:", use_multiprocessing)

Picklable: True


In [67]:
# Adapted from https://keras.io/layers/writing-your-own-keras-layers/
def InceptionModule(model):
    # Skip connection (uses input in concat)
    skip = Lambda(lambda x: x)(model)
    # Size 1 kernel conv of input (with tanh activation)
    conv_1_tower = Conv1D(filters=128, kernel_size=1, strides=1, padding="valid", activation="tanh")(model)
    # Size 1 -> size 3 kernel conv of input (with tanh activation)
    conv_3_tower = Conv1D(filters=1, kernel_size=1, strides=1, padding="valid")(model)
    conv_3_tower = Conv1D(filters=128, kernel_size=3, strides=1, padding="causal", activation="tanh")(conv_3_tower)
    # Size 1 -> size 5 kernel conv of input (with tanh activation)
    conv_5_tower = Conv1D(filters=1, kernel_size=1, strides=1, padding="valid")(model)
    conv_5_tower = Conv1D(filters=128, kernel_size=5, strides=1, padding="causal", activation="tanh")(conv_5_tower)
    # Size 1 -> size 7 kernel conv of input (with tanh activation)
    conv_7_tower = Conv1D(filters=1, kernel_size=1, strides=1, padding="valid")(model)
    conv_7_tower = Conv1D(filters=128, kernel_size=7, strides=1, padding="causal", activation="tanh")(conv_7_tower)
    # Concatenate all activation images
    return concatenate([skip, conv_1_tower, conv_3_tower, conv_5_tower, conv_7_tower], axis=2)

In [68]:
# Reusable dilated convolution / inception module / dropout layer
def DilatedInceptionModuleLayer(model, drop_rate):
    model = Conv1D(filters=128, kernel_size=1, padding="causal", dilation_rate=2)(model)
    model = InceptionModule(model)
    return Dropout(rate=drop_rate)(model)

dim_rates = [0.1, 0.15, 0.2, 0.25, 0.3]

# Structure
"""
Rationale: 

3 "CausalConvAct" convolution layers which reduce the size of the sample space while increasing the size of the convolution space.
- Providing downscaling
(Feature extraction, while preserving temporal relationships)

Then "DilatedInceptionModule" layers which retain the size of the sample space while extracting more features.

- Using Convolutions to downsample from LeNet (?)
- Dropout paper
- ResNet for skip connections
- Inception module adapted from GoogLeNet
- Causal convolutions from WaveNet
"""
data = Input(shape=(12000, 1))
cnn = Conv1D(filters=32, kernel_size=7, strides=3, padding="causal", dilation_rate=1, activation="tanh")(data)
cnn = Conv1D(filters=64, kernel_size=7, strides=2, padding="causal", dilation_rate=1, activation="tanh")(cnn)
cnn = Conv1D(filters=128, kernel_size=5, strides=2, padding="causal", dilation_rate=1, activation="tanh")(cnn)
cnn = Dropout(rate=0.1)(cnn)
for drop_rate in dim_rates:
    cnn = DilatedInceptionModuleLayer(cnn, drop_rate)
cnn = Flatten()(cnn)
cnn = Dense(10, activation='sigmoid')(cnn)
model = Model(inputs=data, outputs=cnn)

for layer in model.layers:
    print(layer.name, layer.output_shape)

# Tensorboard logs
tb_logs = TensorBoard(log_dir="logs/{}".format(timestamp()))

# Compile with stocastic gradient descent and mean squared error loss (same as multilabelled paper)
model.compile(optimizer="sgd", loss="mean_squared_error")

input_12 (None, 12000, 1)
conv1d_432 (None, 4000, 32)
conv1d_433 (None, 2000, 64)
conv1d_434 (None, 1000, 128)
dropout_61 (None, 1000, 128)
conv1d_435 (None, 1000, 128)
conv1d_437 (None, 1000, 1)
conv1d_439 (None, 1000, 1)
conv1d_441 (None, 1000, 1)
lambda_51 (None, 1000, 128)
conv1d_436 (None, 1000, 128)
conv1d_438 (None, 1000, 128)
conv1d_440 (None, 1000, 128)
conv1d_442 (None, 1000, 128)
concatenate_51 (None, 1000, 640)
dropout_62 (None, 1000, 640)
conv1d_443 (None, 1000, 128)
conv1d_445 (None, 1000, 1)
conv1d_447 (None, 1000, 1)
conv1d_449 (None, 1000, 1)
lambda_52 (None, 1000, 128)
conv1d_444 (None, 1000, 128)
conv1d_446 (None, 1000, 128)
conv1d_448 (None, 1000, 128)
conv1d_450 (None, 1000, 128)
concatenate_52 (None, 1000, 640)
dropout_63 (None, 1000, 640)
conv1d_451 (None, 1000, 128)
conv1d_453 (None, 1000, 1)
conv1d_455 (None, 1000, 1)
conv1d_457 (None, 1000, 1)
lambda_53 (None, 1000, 128)
conv1d_452 (None, 1000, 128)
conv1d_454 (None, 1000, 128)
conv1d_456 (None, 1000, 128)
con

In [69]:
# Train model
model.fit_generator(generator=generators["training"],
                   validation_data=generators["test"],
                   callbacks=[tb_logs])
#use_multiprocessing=use_multiprocessing,
#workers=6,

Epoch 1/1


<keras.callbacks.History at 0x12f75922f98>

In [70]:
model_json = model.to_json()
with open("modelA.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("modelA.h5")
print("Saved model to disk")

Saved model to disk
