## TEMPLATE CNN - TL

In [None]:
#importing google drive
from google.colab import drive
drive.mount('/gdrive')

In [None]:
#setting the working directory
%cd /gdrive/MyDrive/polimi/NAML/NAML_proj/

In [None]:
import tensorflow as tf
from PIL import Image
import numpy as np
import os
import random
import seaborn as sns
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.metrics import confusion_matrix
import librosa

tfk = tf.keras
tfkl = tf.keras.layers
seed = 42

In [None]:
dataset = []
genres = {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}
n_genres = 10

for genre, genre_number in genres.items():
    for filename in os.listdir(f'dataset_old/genres/{genre}'):
        songname = f'dataset_old/genres/{genre}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=29.7)
        ps = librosa.feature.melspectrogram(y=y, sr=sr, hop_length = 256, n_fft = 512)
        ps = librosa.power_to_db(ps**2)
        dataset.append( (ps, genre_number) )
    print(str(genre+' done'))

In [None]:
order = np.arange(start = 0, stop = 100, step = 1)

np.random.seed(seed)

training = []
validation = []
test = []

for i in range(n_genres):
  shuffle = np.random.permutation(order)
  for k in range(70):
    training.append(dataset[i*100 + shuffle[k]])
  for l in range(20):
    validation.append(dataset[i*100 + shuffle[l+70]])
  for m in range(10):
    test.append(dataset[i*100 + shuffle[m+90]])

In [None]:
X_train, Y_train = zip(*training)
X_valid, Y_valid = zip(*validation)
X_test, Y_test = zip(*test)

X_train = np.array([x.reshape( (128, 2559, 1) ) for x in X_train])
X_valid = np.array([x.reshape( (128, 2559, 1) ) for x in X_valid])
X_test = np.array([x.reshape( (128, 2559, 1) ) for x in X_test])

Y_train = np.array(tfk.utils.to_categorical(Y_train, n_genres))
Y_valid = np.array(tfk.utils.to_categorical(Y_valid, n_genres))
Y_test = np.array(tfk.utils.to_categorical(Y_test, n_genres))

In [None]:
supernet = tfk.applications.ResNet50(

    include_top=False,

    weights="imagenet",

    input_shape = (128, 2559, 3)

)

supernet.summary()

In [None]:
def build_model(input_shape, supernet, n_units):

    # Build the neural network layer by layer

    input_layer = tfkl.Input(shape=input_shape, name='Input')
    
    add_channels = tf.keras.layers.Conv2D(filters = 3, kernel_size = (3,3),
                                          padding="same")(input_layer)


    resnet50 = supernet(add_channels)

    glob_pooling = tfkl.GlobalAveragePooling2D(name='GloablPooling')(resnet50)



    classifier_layer = tfkl.Dense(

        units=32,  

        activation='relu',

        kernel_initializer = tfk.initializers.GlorotUniform(seed),

        name='Classifier')(glob_pooling)

    classifier_layer = tfkl.Dropout(0.2, seed=seed, name='ClassifierDropout')(classifier_layer)



    output_layer = tfkl.Dense(

        units=n_units, 

        activation='softmax', 

        kernel_initializer = tfk.initializers.GlorotUniform(seed),

        name='Output')(classifier_layer)



    # Connect input and output through the Model class

    model = tfk.Model(inputs=input_layer, outputs=output_layer, name='model')



    # Compile the model

    model.compile(loss=tfk.losses.CategoricalCrossentropy(), optimizer=tfk.optimizers.Adam(), metrics='accuracy')



    # Return the model

    return model

In [None]:
supernet.trainable = False

input_shape = (128, 2559, 1)

model = build_model(input_shape, supernet, n_genres)

model.summary()

In [None]:
early_stopping = tfk.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=20, restore_best_weights=True)
adaptive_LR = tfk.callbacks.ReduceLROnPlateau(monitor='val_loss', mode='min', patience=5, factor=0.5, min_lr=1e-4)

standard_history = model.fit(
    x = X_train,
    y = Y_train,
    epochs = 500,
    batch_size = 64,
    validation_data= (X_valid, Y_valid),
    callbacks = [early_stopping, adaptive_LR]
    )

In [None]:
predictions = model.predict(X_test)

# Compute the confusion matrix
cm = confusion_matrix(np.argmax(Y_test, axis=-1), np.argmax(predictions, axis=-1))

# Compute the classification metrics
accuracy = accuracy_score(np.argmax(Y_test, axis=-1), np.argmax(predictions, axis=-1))
precision = precision_score(np.argmax(Y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
recall = recall_score(np.argmax(Y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
f1 = f1_score(np.argmax(Y_test, axis=-1), np.argmax(predictions, axis=-1), average='macro')
print('Accuracy:',accuracy.round(4))
print('Precision:',precision.round(4))
print('Recall:',recall.round(4))
print('F1:',f1.round(4))

# Plot the confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm.T)#, xticklabels=list(labels.values()), yticklabels=list(labels.values()))
plt.xlabel('True labels')
plt.ylabel('Predicted labels')
plt.show()

In [None]:
model.save('/gdrive/MyDrive/polimi/NAML/NAML_proj/models/***')