In [None]:
# https://keras.io/examples/timeseries/timeseries_classification_from_scratch/
# https://keras.io/examples/timeseries/timeseries_classification_transformer/

In [1]:
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [2]:
h = pd.read_json('C:/Users/Simon/Documents/projects/MusicLabel/data/h_time_series.json')
s = pd.read_json('C:/Users/Simon/Documents/projects/MusicLabel/data/s_time_series.json')
df_raw = pd.concat([h, s], ignore_index=True)

In [4]:
# turn list of list into multiple time series list
# so that each mfcc/d1/d2 (13*3) is a column
mfcc = df_raw['mfcc']
mfcc_d1 = df_raw['mfcc_d1']
mfcc_d2 = df_raw['mfcc_d2']

df = pd.concat([pd.DataFrame.from_records(mfcc, columns=["mfcc_" + str(i) for i in range(13)]),
               pd.DataFrame.from_records(mfcc_d1, columns=["mfcc_d1_" + str(i) for i in range(13)]),
               pd.DataFrame.from_records(mfcc_d2, columns=["mfcc_d2_" + str(i) for i in range(13)]),
               df_raw['label']], axis=1)

df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 308 entries, 0 to 307
Data columns (total 40 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   mfcc_0      308 non-null    object
 1   mfcc_1      308 non-null    object
 2   mfcc_2      308 non-null    object
 3   mfcc_3      308 non-null    object
 4   mfcc_4      308 non-null    object
 5   mfcc_5      308 non-null    object
 6   mfcc_6      308 non-null    object
 7   mfcc_7      308 non-null    object
 8   mfcc_8      308 non-null    object
 9   mfcc_9      308 non-null    object
 10  mfcc_10     308 non-null    object
 11  mfcc_11     308 non-null    object
 12  mfcc_12     308 non-null    object
 13  mfcc_d1_0   308 non-null    object
 14  mfcc_d1_1   308 non-null    object
 15  mfcc_d1_2   308 non-null    object
 16  mfcc_d1_3   308 non-null    object
 17  mfcc_d1_4   308 non-null    object
 18  mfcc_d1_5   308 non-null    object
 19  mfcc_d1_6   308 non-null    object
 20  mfcc_d1_7 

In [6]:
# minimum length of music
# need to make it the same length
# experiment with cutting from the middle or from the start
# if worried about short songs gets to finish vs long - can also excerpt first minute or so
min_length = df.loc[:, 'mfcc_0'].str.len().min()

# separate out the features and labels - feature is to be cut
df_x = df.loc[:, df.columns != 'label']
df_y = df.loc[:, df.columns == 'label'].values

In [44]:
def cut_from_middle(x, min_length):
    midp = round(len(x)/2)
    startp = round(midp - (min_length/2))
    endp = round(midp + (min_length/2))
    
    return x[startp:endp]

In [75]:
# Cut the music from the middle
df_mid_x = df_x.applymap(lambda x: cut_from_middle(x, min_length))

# Verify the length - number of records not meeting the length
df_mid_x[df_mid_x['mfcc_0'].str.len() != min_length].shape[0]

0

In [204]:
# Convert the data into (nobs, each time series feature, n feature)
l = []
for col in df_mid_x.columns:
    t = df_mid_x.loc[:, col].to_numpy()
    l.append(np.concatenate(t).reshape(df_mid_x.shape[0], len(df_mid_x.iloc[0, 0])))
    
np_mid_x = np.dstack(l)
np_mid_x.shape

(308, 8512, 39)

In [253]:
# split the data into train/test
x_mid_train, x_mid_test, y_train, y_test = train_test_split(np_mid_x, df_y, test_size=0.2, random_state=363)

# random sort the data for validation
idx = np.random.permutation(len(x_mid_train))
x_mid_train = x_mid_train[idx]
y_train = y_train[idx]

# define number of labels
num_classes = len(np.unique(y_train))

In [215]:
def make_model(input_shape):
    input_layer = keras.layers.Input(input_shape)

    conv1 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(input_layer)
    conv1 = keras.layers.BatchNormalization()(conv1)
    conv1 = keras.layers.ReLU()(conv1)

    conv2 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv1)
    conv2 = keras.layers.BatchNormalization()(conv2)
    conv2 = keras.layers.ReLU()(conv2)

    conv3 = keras.layers.Conv1D(filters=64, kernel_size=3, padding="same")(conv2)
    conv3 = keras.layers.BatchNormalization()(conv3)
    conv3 = keras.layers.ReLU()(conv3)

    gap = keras.layers.GlobalAveragePooling1D()(conv3)

    output_layer = keras.layers.Dense(num_classes, activation="softmax")(gap)

    return keras.models.Model(inputs=input_layer, outputs=output_layer)


model = make_model(input_shape=x_train.shape[1:])

('You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) ', 'for plot_model/model_to_dot to work.')


In [254]:
epochs = 200
batch_size = 32

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "cut_from_mid_conv", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001 
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]
model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
history = model.fit(
    x_mid_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    verbose=1,
)

Epoch 1/200
INFO:tensorflow:Assets written to: cut_from_mid_conv\assets
Epoch 2/200
Epoch 3/200
INFO:tensorflow:Assets written to: cut_from_mid_conv\assets
Epoch 4/200
Epoch 5/200
Epoch 6/200
INFO:tensorflow:Assets written to: cut_from_mid_conv\assets
Epoch 7/200
Epoch 8/200
INFO:tensorflow:Assets written to: cut_from_mid_conv\assets
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 00058: ea

In [255]:
model = keras.models.load_model("cut_from_mid_conv")

test_loss, test_acc = model.evaluate(x_mid_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

Test accuracy 0.7096773982048035
Test loss 0.9743765592575073


In [7]:
# Cut from the start
df_start_x = df_x.applymap(lambda x: x[0:min_length])

# Verify the length - number of records not meeting the length
df_start_x[df_start_x['mfcc_0'].str.len() != min_length].shape[0]

0

In [8]:
# Convert the data into (nobs, each time series feature, n feature)
l = []
for col in df_start_x.columns:
    t = df_start_x.loc[:, col].to_numpy()
    l.append(np.concatenate(t).reshape(df_start_x.shape[0], len(df_start_x.iloc[0, 0])))
    
np_start_x = np.dstack(l)
np_start_x.shape

(308, 8512, 39)

In [9]:
# split the data into train/test
x_start_train, x_start_test, y_train, y_test = train_test_split(np_start_x, df_y, test_size=0.2, random_state=363)

# random sort the data for validation
idx = np.random.permutation(len(x_start_train))
x_start_train = x_start_train[idx]
y_train = y_train[idx]

# define number of labels
num_classes = len(np.unique(y_train))

In [251]:
epochs = 200
batch_size = 32

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "cut_from_start_conv", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.ReduceLROnPlateau(
        monitor="val_loss", factor=0.5, patience=20, min_lr=0.0001
    ),
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=50, verbose=1),
]

model.compile(
    optimizer="adam",
    loss="sparse_categorical_crossentropy",
    metrics=["sparse_categorical_accuracy"],
)
history = model.fit(
    x_start_train,
    y_train,
    batch_size=batch_size,
    epochs=epochs,
    callbacks=callbacks,
    validation_split=0.2,
    verbose=1,
)

Epoch 1/200
INFO:tensorflow:Assets written to: cut_from_start_conv\assets
Epoch 2/200
INFO:tensorflow:Assets written to: cut_from_start_conv\assets
Epoch 3/200
Epoch 4/200
Epoch 5/200
INFO:tensorflow:Assets written to: cut_from_start_conv\assets
Epoch 6/200
Epoch 7/200
INFO:tensorflow:Assets written to: cut_from_start_conv\assets
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
INFO:tensorflow:Assets written to: cut_from_start_conv\assets
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
INFO:tensorflow:Assets written to: cut_from_start_conv\assets
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200


In [252]:
model = keras.models.load_model("cut_from_start_conv")

test_loss, test_acc = model.evaluate(x_start_test, y_test)

print("Test accuracy", test_acc)
print("Test loss", test_loss)

Test accuracy 0.8064516186714172
Test loss 0.8593730330467224


In [None]:
# from the start is a much superior choice (10% accurarcy improvement)
# try with transformer

In [10]:
def transformer_encoder(inputs, head_size, num_heads, ff_dim, dropout=0):
    # Normalization and Attention
    x = layers.LayerNormalization(epsilon=1e-6)(inputs)
    x = layers.MultiHeadAttention(
        key_dim=head_size, num_heads=num_heads, dropout=dropout
    )(x, x)
    x = layers.Dropout(dropout)(x)
    res = x + inputs

    # Feed Forward Part
    x = layers.LayerNormalization(epsilon=1e-6)(res)
    x = layers.Conv1D(filters=ff_dim, kernel_size=1, activation="relu")(x)
    x = layers.Dropout(dropout)(x)
    x = layers.Conv1D(filters=inputs.shape[-1], kernel_size=1)(x)
    return x + res

In [11]:
def build_model(
    input_shape,
    head_size,
    num_heads,
    ff_dim,
    num_transformer_blocks,
    mlp_units,
    dropout=0,
    mlp_dropout=0,
):
    inputs = keras.Input(shape=input_shape)
    x = inputs
    for _ in range(num_transformer_blocks):
        x = transformer_encoder(x, head_size, num_heads, ff_dim, dropout)

    x = layers.GlobalAveragePooling1D(data_format="channels_first")(x)
    for dim in mlp_units:
        x = layers.Dense(dim, activation="relu")(x)
        x = layers.Dropout(mlp_dropout)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    return keras.Model(inputs, outputs)

In [19]:
input_shape = x_start_train.shape[1:]

model = build_model(
    input_shape,
    head_size=16, #256
    num_heads=2, #4
    ff_dim=2, #4
    num_transformer_blocks=2, #4
    mlp_units=[8], #128
    mlp_dropout=0.4,
    dropout=0.25,
)

model.compile(
    loss="sparse_categorical_crossentropy",
    optimizer=keras.optimizers.Adam(learning_rate=1e-4),
    metrics=["sparse_categorical_accuracy"],
)

callbacks = [
    keras.callbacks.ModelCheckpoint(
        "cut_from_start_trans", save_best_only=True, monitor="val_loss"
    ),
    keras.callbacks.EarlyStopping(patience=10, restore_best_weights=True)
]

model.fit(
    x_start_train,
    y_train,
    validation_split=0.2,
    epochs=200,
    batch_size=32, #64
    callbacks=callbacks,
)

Epoch 1/200


ResourceExhaustedError:  OOM when allocating tensor with shape[32,2,8512,8512] and type float on /job:localhost/replica:0/task:0/device:CPU:0 by allocator cpu
	 [[node model_5/multi_head_attention_16/einsum/Einsum (defined at <ipython-input-19-49ae9f45d730>:27) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_26773]

Function call stack:
train_function


In [None]:
# OOM w/ local machine, and I was happy w/ 80% accuracy for the moment.