In [3]:
import matplotlib.pyplot as plt
import pandas as pd
from keras.callbacks import EarlyStopping
import seaborn as sns
from IPython.core.display_functions import display
from IPython.display import Audio
from livelossplot import PlotLossesKeras

import keras
from keras import layers
from keras.models import Sequential
from keras_preprocessing.image import ImageDataGenerator

from keras_preprocessing.image import img_to_array

import tensorflow as tf
from keras.layers import *

import os
from tqdm.notebook import tqdm
from pathlib import Path
import shutil


from music_plots import *
import numpy as np

In [4]:
files = os.listdir('data/spectrograms')
tracks_df = load("data/tracks.csv")
genres_df = load("data/genres.csv")

In [None]:
track_ids = []
print(files)
for file in tqdm(files):
    pre, ext = os.path.splitext(file)
    track_ids.append(int(pre))

In [None]:
filtered_tracks = tracks_df.xs('track', level=0, axis=1)['genre_top'].loc[track_ids]
filtered_tracks

In [None]:
genres = {}
tmp_x = 0
#Assign value for the genres
#for i in genres_df[genres_df['parent']==0]['title'].unique():
for i in filtered_tracks.unique():
    genres[i] = tmp_x
    tmp_x = tmp_x + 1

print(genres)
genres_df[genres_df['parent']==0]

In [None]:
#Split dataset into chunks
train_ds = filtered_tracks.sample(frac = 0.8)

tmp_ds = filtered_tracks.drop(train_ds.index)
validation_ds = tmp_ds.sample(frac = 0.5)
test_ds = tmp_ds.drop(validation_ds.index)

print(f"Training has {len(train_ds)}, Validation has {len(validation_ds)}, Testing has {len(test_ds)}")

In [None]:
import keras

tf_train_data = pd.DataFrame(columns=['Filename', 'label'])
tf_validation_data = pd.DataFrame(columns=['Filename', 'label'])
tf_test_data = pd.DataFrame(columns=['Filename', 'label'])
for i in tqdm(train_ds.index):
    tf_train_data.loc[i]=["data/spectrograms/" + str(i) + ".png", genres[train_ds.loc[i]]]

for i in tqdm(validation_ds.index):
    tf_validation_data.loc[i]=["data/spectrograms/" + str(i) + ".png", genres[validation_ds.loc[i]]]

for i in tqdm(test_ds.index):
    tf_test_data.loc[i]=["data/spectrograms/" + str(i) + ".png", genres[test_ds.loc[i]]]
#genres[test_ds.loc[i]]]

In [None]:
tf_validation_data

In [None]:
#tf_train_data = pd.DataFrame({'image': training_data, 'label': training_labels}, columns=['image', 'label'])
tf_train_data.loc[2]
#filtered_tracks.loc[2]
#genres['Hip-Hop']

In [None]:
train_generator = ImageDataGenerator(
    rescale=1 / 255.,           # normalize pixel values between 0-1
    vertical_flip=False,         # vertical transposition
    horizontal_flip=True,       # horizontal transposition
    rotation_range=0,
    height_shift_range=0.3,     # shift the height of the image 30%
    brightness_range=[0.1, 0.9] # specify the range in which to decrease/increase brightness
)

validation_generator = ImageDataGenerator(
    rescale=1 / 255.,           # normalize pixel values between 0-1
    vertical_flip=False,         # vertical transposition
    horizontal_flip=True,       # horizontal transposition
    rotation_range=0,
    height_shift_range=0.3,     # shift the height of the image 30%
    brightness_range=[0.1, 0.9] # specify the range in which to decrease/increase brightness
)

test_generator = ImageDataGenerator(
    rescale=1 / 255.,           # normalize pixel values between 0-1
    rotation_range=0
)

BATCH_SIZE = 32
print(list(genres.keys()))

traingen = train_generator.flow_from_dataframe(tf_train_data,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               subset='training',
                                               shuffle=True,
                                               batch_size=BATCH_SIZE,
                                               target_size=(174,484),
                                               seed=42)

validationgen = validation_generator.flow_from_dataframe(tf_validation_data,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               subset='training',
                                               batch_size=BATCH_SIZE,
                                               shuffle=True,
                                                         target_size=(174,484),
                                               seed=42)

testgen = test_generator.flow_from_dataframe(tf_test_data,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               subset='training',
                                               shuffle=True,
                                             batch_size=BATCH_SIZE,
                                             target_size=(174,484),
                                               seed=42)

In [None]:
import keras_tuner as kt

def model2hp(hp):
    md = keras.Sequential()
    md.add(keras.Input(shape=(174, 484, 3)))
    hp_size = hp.Int('size', min_value=128, max_value=512, step=128)
    md.add(layers.Resizing(hp_size, hp_size))

    md.add(layers.Conv2D(64, kernel_size=(5, 5), activation="relu"))
    md.add(layers.BatchNormalization())
    md.add(layers.MaxPooling2D(pool_size=(2, 2)))

    md.add(layers.Conv2D(64, kernel_size=(5, 5), activation="relu"))
    md.add(layers.BatchNormalization())
    md.add(layers.MaxPooling2D(pool_size=(2, 2)))

    md.add(layers.Conv2D(128, kernel_size=(5, 5), activation="relu"))
    md.add(layers.BatchNormalization())
    md.add(layers.MaxPooling2D(pool_size=(2, 2)))

    md.add(layers.Conv2D(128, kernel_size=(5, 5), activation="relu"))
    md.add(layers.BatchNormalization())
    md.add(layers.MaxPooling2D(pool_size=(2, 2)))

    md.add(layers.Flatten())
    hp_units1 = hp.Int('units1', min_value=32, max_value=512, step=32)
    md.add(layers.Dense(hp_units1, activation='relu'))

    hp_dropout1 = hp.Float('dropout1', min_value=0.1, max_value=0.9, step=0.1)
    md.add(layers.Dropout(hp_dropout1))
    md.add(layers.Dense(8, activation="softmax"))

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    md.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

    return md

tuner = kt.Hyperband(model2hp,
                     objective='val_accuracy',
                     max_epochs=100,
                     factor=3,
                     directory='my_dir_2',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(validationgen, validation_data=validationgen, epochs=50, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Units: {best_hps.get('units1')}
Dropout:{best_hps.get('dropout1')}
Learning Rate: {best_hps.get('learning_rate')}
Size: {best_hps.get('size')}
""")

In [None]:
def model2(width, height, depth, classes):
    return keras.Sequential([
        keras.Input(shape=(height, width, depth)),
        layers.Resizing(128, 128),

        layers.Conv2D(64, kernel_size=(5, 5), activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(64, kernel_size=(5, 5), activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(128, kernel_size=(5, 5), activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Conv2D(128, kernel_size=(5, 5), activation="relu"),
        layers.BatchNormalization(),
        layers.MaxPooling2D(pool_size=(2, 2)),

        layers.Flatten(),
        # Adding this layer
        layers.Dense(100, activation='relu'),
        layers.Dropout(0.2),
        layers.Dense(classes, activation="softmax")
    ])
model = model2(
    width=484, height=174,
    depth=3, classes=len(genres))


model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(model.summary())
keras.utils.plot_model(model, show_shapes=True)

In [None]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input

In [None]:
new_input = Input(shape=(174,484,3))
base_model = VGG16(weights="imagenet", include_top=False, input_tensor=new_input)
#base_model = VGG16(weights="imagenet", include_top=False)
base_model.trainable = False ## Not trainable weights

In [None]:
from keras import Model
from keras.models import Sequential

def create_model(input_shape, n_classes, fine_tune=0):
    """
    Compiles a model integrated with VGG16 pretrained layers

    input_shape: tuple - the shape of input images (width, height, channels)
    n_classes: int - number of classes for the output layer
    optimizer: string - instantiated optimizer to use for training. Defaults to 'RMSProp'
    fine_tune: int - The number of pre-trained layers to unfreeze.
                If set to 0, all pretrained layers will freeze during training
    """

    # Pretrained convolutional layers are loaded using the Imagenet weights.
    # Include_top is set to False, in order to exclude the model's fully-connected layers.
    conv_base = VGG16(include_top=False,
                     weights='imagenet',
                     input_shape=input_shape)

    # Defines how many layers to freeze during training.
    # Layers in the convolutional base are switched from trainable to non-trainable
    # depending on the size of the fine-tuning parameter.
    if fine_tune > 0:
        for layer in conv_base.layers[:-fine_tune]:
            layer.trainable = False
    else:
        for layer in conv_base.layers:
            layer.trainable = False

    # Create a new 'top' of the model (i.e. fully-connected layers).
    # This is 'bootstrapping' a new top_model onto the pretrained layers.
    top_model = conv_base.output
    top_model = Flatten(name="flatten")(top_model)
    top_model = Dense(4096, activation='relu')(top_model)
    top_model = Dense(1072, activation='relu')(top_model)
    top_model = Dropout(0.2)(top_model)
    output_layer = Dense(n_classes, activation='softmax')(top_model)

    # Group the convolutional base and new fully-connected layers into a Model object.
    md = Model(inputs=conv_base.input, outputs=output_layer)

    return md

model_vgg = create_model((174,484,3), 8)

#print("==============BASE MODEL==============")
#print(base_model.summary())
print("==============NEW MODEL===============")
print(model_vgg.summary())

model_vgg.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

In [None]:
import keras_tuner as kt
from keras.applications.vgg16 import VGG16
from keras import Model

def create_model_kt(hp):
    """
    Compiles a model integrated with VGG16 pretrained layers

    input_shape: tuple - the shape of input images (width, height, channels)
    n_classes: int - number of classes for the output layer
    optimizer: string - instantiated optimizer to use for training. Defaults to 'RMSProp'
    fine_tune: int - The number of pre-trained layers to unfreeze.
                If set to 0, all pretrained layers will freeze during training
    """

    # Pretrained convolutional layers are loaded using the Imagenet weights.
    # Include_top is set to False, in order to exclude the model's fully-connected layers.
    conv_base = VGG16(include_top=False,
                     weights='imagenet',
                     input_shape=(174,484,3))

    # Defines how many layers to freeze during training.
    # Layers in the convolutional base are switched from trainable to non-trainable
    # depending on the size of the fine-tuning parameter.
    hp_fine_tune = hp.Int('included_layers', min_value=0, max_value=10, step=1)
    if hp_fine_tune > 0:
        for layer in conv_base.layers[:-hp_fine_tune]:
            layer.trainable = False
    else:
        for layer in conv_base.layers:
            layer.trainable = False

    # Create a new 'top' of the model (i.e. fully-connected layers).
    # This is 'bootstrapping' a new top_model onto the pretrained layers.
    top_model = conv_base.output
    top_model = Flatten(name="flatten")(top_model)

    hp_units1 = hp.Int('units1', min_value=100, max_value=4096, step=100)
    top_model = Dense(hp_units1, activation='relu')(top_model)
    hp_dropout1 = hp.Float('dropout1', min_value=0.1, max_value=0.9, step=0.1)
    top_model = Dropout(hp_dropout1)(top_model)

    hp_units2 = hp.Int('units2', min_value=100, max_value=1000, step=100)
    top_model = Dense(hp_units2, activation='relu')(top_model)
    hp_dropout2 = hp.Float('dropout2', min_value=0.1, max_value=0.9, step=0.1)
    top_model = Dropout(hp_dropout2)(top_model)

    output_layer = Dense(8, activation='softmax')(top_model)

    # Group the convolutional base and new fully-connected layers into a Model object.
    md = Model(inputs=conv_base.input, outputs=output_layer)

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    md.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

    return md


tuner = kt.Hyperband(create_model_kt,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir_vgg',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(validationgen, validation_data=validationgen, epochs=10, callbacks=[stop_early])

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
Included: {best_hps.get('included_layers')}
Units1: {best_hps.get('units1')}
Dropout1:{best_hps.get('dropout1')}
Units2: {best_hps.get('units2')}
Dropout2:{best_hps.get('dropout2')}
Learning Rate: {best_hps.get('learning_rate')}
Size: {best_hps.get('size')}
""")

model_spec = tuner.hypermodel.build(best_hps)
print(model_spec.summary())
keras.utils.plot_model(model_spec, show_shapes=True)

In [None]:
history = model_vgg.fit(traingen, validation_data=validationgen, epochs=10, batch_size=64, callbacks=[early_stop, PlotLossesKeras()])

In [None]:
import sklearn.metrics

image = tf.keras.preprocessing.image.load_img("dataset/Rock/87121.png")
image = img_to_array(image)
grr = [image]

image = tf.keras.preprocessing.image.load_img("dataset/Electronic/20667.png")
image = img_to_array(image)
grr.append(image)

grr = np.array(grr, dtype="float") / 255.0

print(genres)

prediction = model_vgg.predict(grr)

display(Audio('data/tracks_wav//020667.wav', rate=22500))
pd.DataFrame(prediction, columns=list(genres.keys()))

In [None]:
pred = model_vgg.predict(testgen)


In [None]:
from sklearn.metrics import confusion_matrix
import math


number_of_examples = len(testgen.filenames)
number_of_generator_calls = math.ceil(number_of_examples / (1.0 * BATCH_SIZE))
# 1.0 above is to skip integer division

test_labels = []

for i in range(0,int(number_of_generator_calls)):
    test_labels.extend(np.array(testgen[i][1]))
cm = confusion_matrix(test_labels, pred.argmax(axis=1))

Confusion Matrix

In [None]:
import seaborn as sns

f = plt.figure(figsize=(15,15))
ax= f.add_subplot()

sns.heatmap(cm, annot=True, fmt='g', ax=ax)
ax.set_xlabel('Predicted labels')
ax.xaxis.set_label_position('top')
ax.set_ylabel('True labels')
ax.set_title('Confusion Matrix')

ax.xaxis.set_ticklabels(list(genres.keys()))
ax.xaxis.set_ticks_position('top')
ax.yaxis.set_ticklabels(list(genres.keys()));

In [None]:
tf_train_data_chroma = pd.DataFrame(columns=['Filename', 'label'])
tf_validation_data_chroma = pd.DataFrame(columns=['Filename', 'label'])
tf_test_data_chroma = pd.DataFrame(columns=['Filename', 'label'])
for i in tqdm(train_ds.index):
    tf_train_data_chroma.loc[i]=["data/chroma/" + str(i) + ".png", genres[train_ds.loc[i]]]

for i in tqdm(validation_ds.index):
    tf_validation_data_chroma.loc[i]=["data/chroma//" + str(i) + ".png", genres[validation_ds.loc[i]]]

for i in tqdm(test_ds.index):
    tf_test_data_chroma.loc[i]=["data/chroma//" + str(i) + ".png", genres[test_ds.loc[i]]]

In [None]:
generator = ImageDataGenerator(
    rescale=1 / 255.,           # normalize pixel values between 0-1
    vertical_flip=False,         # vertical transposition
    horizontal_flip=True,       # horizontal transposition
    rotation_range=0,
    height_shift_range=0.3,     # shift the height of the image 30%
    brightness_range=[0.1, 0.9] # specify the range in which to decrease/increase brightness
)

genX1 = generator.flow_from_dataframe(tf_train_data,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               shuffle=False,
                                               batch_size=BATCH_SIZE,
                                               target_size=(174,484),
                                               seed=42)
genX2 = generator.flow_from_dataframe(tf_train_data_chroma,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               shuffle=False,
                                               batch_size=BATCH_SIZE,
                                               target_size=(174,484),
                                               seed=42)

genX1val = generator.flow_from_dataframe(tf_validation_data,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               shuffle=False,
                                               batch_size=BATCH_SIZE,
                                               target_size=(174,484),
                                               seed=42)
genX2val = generator.flow_from_dataframe(tf_validation_data_chroma,class_mode='raw',
                                               x_col='Filename',
                                               y_col = 'label',
                                               shuffle=False,
                                               batch_size=BATCH_SIZE,
                                               target_size=(174,484),
                                               seed=42)

def format_gen_outputs(gen1,gen2):
    x1 = gen1[0]
    x2 = gen2[0]
    y1 = gen1[1]
    return [x1, x2], y1

traingen2 = map(format_gen_outputs, genX1, genX2)
validationgen2 = map(format_gen_outputs, genX1val, genX2val)


In [None]:
from keras import Model

def create_multiinput_model(width, height, depth, classes):
    inputA = Input(shape=(height,width,depth))
    inputB = Input(shape=(height,width,depth))

    # the first branch operates on the first input
    x = Resizing(128, 128)(inputA)

    x = Conv2D(64, kernel_size=(5, 5), activation="relu", name="X")(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Flatten()(x)
    x = Dense(100)(x)
    x = Dropout(0.2)(x)

    x = Model(inputs=inputA, outputs=x)
    # the second branch opreates on the second input
    y = Resizing(128, 128)(inputB)

    y = Conv2D(64, kernel_size=(5, 5), activation="relu")(y)
    y = BatchNormalization()(y)
    y = MaxPooling2D(pool_size=(2, 2))(y)

    y = Flatten()(y)
    y = Dense(100)(y)
    y = Dropout(0.2)(y)
    y = Model(inputs=inputB, outputs=y)
    # combine the output of the two branches
    combined = concatenate([x.output, y.output])
    # apply a FC layer and then a regression prediction on the
    # combined outputs
    z = Dense(100)(combined)
    z = Dense(classes, activation="softmax")(z)
    # our model will accept the inputs of the two branches and
    # then output a single value
    md = Model(inputs=[x.input, y.input], outputs=z)

    return md

multi_model = create_multiinput_model(484, 174, 3, 8)

multi_model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
print(multi_model.summary())
keras.utils.plot_model(multi_model, show_shapes=True)

In [None]:
import keras_tuner as kt
from keras.applications.vgg16 import VGG16
from keras import Model

def create_multi_model_kt(hp):
    inputA = Input(shape=(484, 174, 3))
    inputB = Input(shape=(484, 174, 3))

    hp_size = hp.Int('size', min_value=128, max_value=512, step=128)

    # the first branch operates on the first input
    x = Resizing(hp_size, hp_size)(inputA)

    x = Conv2D(64, kernel_size=(5, 5), activation="relu", name="X")(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2))(x)

    x = Flatten()(x)
    hp_units_x= hp.Int('units_x', min_value=10, max_value=100, step=10)
    x = Dense(hp_units_x)(x)

    hp_dropout_x = hp.Float('dropout_x', min_value=0.1, max_value=0.9, step=0.1)
    x = Dropout(hp_dropout_x)(x)

    x = Model(inputs=inputA, outputs=x)
    # the second branch opreates on the second input
    y = Resizing(hp_size, hp_size)(inputB)

    y = Conv2D(64, kernel_size=(5, 5), activation="relu")(y)
    y = BatchNormalization()(y)
    y = MaxPooling2D(pool_size=(2, 2))(y)

    y = Flatten()(y)
    hp_units_y= hp.Int('units_y', min_value=10, max_value=100, step=10)
    y = Dense(hp_units_y)(y)
    hp_dropout_y = hp.Float('dropout_y', min_value=0.1, max_value=0.9, step=0.1)
    y = Dropout(hp_dropout_y)(y)
    y = Model(inputs=inputB, outputs=y)
    # combine the output of the two branches
    combined = concatenate([x.output, y.output])
    # apply a FC layer and then a regression prediction on the
    # combined outputs
    hp_units_z= hp.Int('units_z', min_value=10, max_value=100, step=10)
    z = Dense(hp_units_z)(combined)
    z = Dense(8, activation="softmax")(z)
    # our model will accept the inputs of the two branches and
    # then output a single value
    md = Model(inputs=[x.input, y.input], outputs=z)

    hp_learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])

    md.compile(optimizer=keras.optimizers.Adam(learning_rate=hp_learning_rate),
                loss='sparse_categorical_crossentropy',
                metrics=['accuracy'])

    return md


tuner = kt.Hyperband(create_multi_model_kt,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir_multi',
                     project_name='intro_to_kt')

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)
tuner.search(validationgen2, validation_data=validationgen2, epochs=10, callbacks=[stop_early], steps_per_epoch=len(tf_train_data)//BATCH_SIZE, validation_steps=len(tf_validation_data)//BATCH_SIZE)

# Get the optimal hyperparameters
best_hps=tuner.get_best_hyperparameters(num_trials=1)[0]

print(f"""
UnitsX: {best_hps.get('units_x')}
DropoutX:{best_hps.get('dropout_x')}
UnitsY: {best_hps.get('units_y')}
DropoutY:{best_hps.get('dropout_y')}
UnitsZ: {best_hps.get('units_z')}
Learning Rate: {best_hps.get('learning_rate')}
Size: {best_hps.get('size')}
""")

multi_model_spec = tuner.hypermodel.build(best_hps)
print(multi_model_spec.summary())
keras.utils.plot_model(multi_model_spec, show_shapes=True)

In [None]:
validationgen2.__next__()

In [None]:
#history = multi_model.fit_generator(traingen2, epochs=1, steps_per_epoch=1, validation_steps=2)
early_stop = EarlyStopping(monitor='val_loss',
                           patience=10,
                           restore_best_weights=True,
                           mode='min')

history = multi_model.fit(traingen2, epochs=10, validation_data=validationgen2, steps_per_epoch=len(tf_train_data)//BATCH_SIZE, validation_steps=len(tf_validation_data)//BATCH_SIZE, callbacks=[early_stop, PlotLossesKeras()])

In [None]:
multi_model.evaluate(validationgen2, steps=25)