In [None]:
#Packages imported for the functions used

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import sklearn as skl
import sys
import os
import pickle
import librosa
import librosa.display
import IPython.display as ipd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image
from keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import datetime

import utils

In [None]:
#Setting the Directory which will be scanned i.e. the subset we will be using
AUDIO_DIR = "fma_small"

#Loading the Data from the FMA Dataset
tracks = utils.load("fma_metadata/tracks.csv")
genres = utils.load("fma_metadata/genres.csv")
features = utils.load("fma_metadata/features.csv")
echonest = utils.load("fma_metadata/echonest.csv")

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

tracks.shape, genres.shape, features.shape, echonest.shape

In [None]:
#Function that encodes the music pieces into their respective labels
def encodeData(train = "test"):
    if train == "training":
        for i in range(0,len(genres_low)):
            ind = gen.loc[gen['genre_id'] == genres_low[i]].index[0]
            
            #Depending on the ind number, the value is given.
            #i.e. 38 = [0], 15 = [1] etc

            #The label is encoded using one-hot encoding
            if ind == 38:
                train_labels[x][0] = 1
            elif ind == 15:
                train_labels[x][1] = 1
            elif ind == 12:
                train_labels[x][2] = 1
            elif ind == 1235:
                train_labels[x][3] = 1
            elif ind == 10:
                train_labels[x][4] = 1
            elif ind == 17:
                train_labels[x][5] = 1
            elif ind == 21:
                train_labels[x][6] = 1
            elif ind == 2:
                train_labels[x][7] = 1
            elif ind == 4:
                train_labels[x][8] = 1
            elif ind == 5:
                train_labels[x][9] = 1
            elif ind == 9:
                train_labels[x][10] = 1
            elif ind == 20:
                train_labels[x][11] = 1
            elif ind == 3:
                train_labels[x][12] = 1
            elif ind == 14:
                train_labels[x][13] = 1
            elif ind == 8:
                train_labels[x][14] = 1
            else:
                train_labels[x][15] = 1
    elif train == "validation":
        for i in range(0,len(genres_low)):
            ind = gen.loc[gen['genre_id'] == genres_low[i]].index[0]
            if ind == 38:
                validation_labels[x_val][0] = 1
            elif ind == 15:
                validation_labels[x_val][1] = 1
            elif ind == 12:
                validation_labels[x_val][2] = 1
            elif ind == 1235:
                validation_labels[x_val][3] = 1
            elif ind == 10:
                validation_labels[x_val][4] = 1
            elif ind == 17:
                validation_labels[x_val][5] = 1
            elif ind == 21:
                validation_labels[x_val][6] = 1
            elif ind == 2:
                validation_labels[x_val][7] = 1
            elif ind == 4:
                validation_labels[x_val][8] = 1
            elif ind == 5:
                validation_labels[x_val][9] = 1
            elif ind == 9:
                validation_labels[x_val][10] = 1
            elif ind == 20:
                validation_labels[x_val][11] = 1
            elif ind == 3:
                validation_labels[x_val][12] = 1
            elif ind == 14:
                validation_labels[x_val][13] = 1
            elif ind == 8:
                validation_labels[x_val][14] = 1
            else:
                validation_labels[x_val][15] = 1

    else:
        for i in range(0,len(genres_low)):
            ind = gen.loc[gen['genre_id'] == genres_low[i]].index[0]
            if ind == 38:
                test_labels[x_test][0] = 1
            elif ind == 15:
                test_labels[x_test][1] = 1
            elif ind == 12:
                test_labels[x_test][2] = 1
            elif ind == 1235:
                test_labels[x_test][3] = 1
            elif ind == 10:
                test_labels[x_test][4] = 1
            elif ind == 17:
                test_labels[x_test][5] = 1
            elif ind == 21:
                test_labels[x_test][6] = 1
            elif ind == 2:
                test_labels[x_test][7] = 1
            elif ind == 4:
                test_labels[x_test][8] = 1
            elif ind == 5:
                test_labels[x_test][9] = 1
            elif ind == 9:
                test_labels[x_test][10] = 1
            elif ind == 20:
                test_labels[x_test][11] = 1
            elif ind == 3:
                test_labels[x_test][12] = 1
            elif ind == 14:
                test_labels[x_test][13] = 1
            elif ind == 8:
                test_labels[x_test][14] = 1
            else:
                test_labels[x_test][15] = 1

In [None]:
#Function used to fill the Lists of the labels with 0s before they are encoded
def filLists(train = "test"):
    #Filling one list with 0s to be prepared for the current music piece
    if train == "training":
        train_labels.append([])
        
        for c in range(0,16):
            train_labels[x].append(0)
    elif train == 'validation':
        validation_labels.append([])

        for c in range(0,16):
            validation_labels[x_val].append(0)
    else:
        test_labels.append([])

        for c in range(0,16):
            test_labels[x_test].append(0)

In [None]:
#This is the main list which will contains the sub-lists of labels
train_labels = []
validation_labels = []
test_labels = []

#Lists which will hold the ids of the music pieces
train_id = []
val_id = []
test_id = []
counter = 0

#Counters for each respective dataset i.e. training, validation, and testing
x = 0
x_val = 0
x_test = 0

pic_folder = "small"

#Going through each individual folder
for folder in os.listdir(AUDIO_DIR):
    if folder == "checksums" or folder == "README.txt":
        continue
    else:
        full_folder_path = os.path.join(AUDIO_DIR, folder)
        for file in os.listdir(full_folder_path):
            counter += 1
            print(counter)
            train = True
            if file == "desktop.ini":
                continue
            
            #Finding the image path
            full_music_path = os.path.join(full_folder_path, file)
            pic_name = file.rsplit(".")[0] + ".png"
            pic = int(file.rsplit(".")[0])
            
            if pic_name not in os.listdir(pic_folder):
                continue
            
            splits = pd.DataFrame(tracks.index, index=tracks['set', 'split'].values)
            curr_split = splits.loc[splits['track_id'] == pic].index[0]
            
            d = pd.DataFrame(tracks.index, index=tracks['track', 'genres'].values)
            genres_low = d.loc[d['track_id'] == pic].index[0]
            gen = pd.DataFrame(genres.index, index = genres['top_level'].values)
            
            #Filling all the respective label and id lists
            if curr_split == 'training':
                train_id.append(pic_name)
                filLists("training")
                encodeData("training")
                x = x + 1
            elif curr_split == 'validation':
                val_id.append(pic_name)
                filLists("validation")
                encodeData("validation")
                x_val = x_val + 1
            else:
                test_id.append(pic_name)
                filLists("test")
                encodeData("test")
                x_test = x_test + 1

#After all the lists are populated, the dataframes are set with all the data
data_train = pd.DataFrame({
    'Image': train_id,
    'Label 1': [l[0] for l in train_labels],
    'Label 2': [l[1] for l in train_labels],
    'Label 3': [l[2] for l in train_labels],
    'Label 4': [l[3] for l in train_labels],
    'Label 5': [l[4] for l in train_labels],
    'Label 6': [l[5] for l in train_labels],
    'Label 7': [l[6] for l in train_labels],
    'Label 8': [l[7] for l in train_labels],
    'Label 9': [l[8] for l in train_labels],
    'Label 10': [l[9] for l in train_labels],
    'Label 11': [l[10] for l in train_labels],
    'Label 12': [l[11] for l in train_labels],
    'Label 13': [l[12] for l in train_labels],
    'Label 14': [l[13] for l in train_labels],
    'Label 15': [l[14] for l in train_labels],
    'Label 16': [l[15] for l in train_labels],
})

data_val = pd.DataFrame({
    'Image': val_id,
    'Label 1': [l[0] for l in validation_labels],
    'Label 2': [l[1] for l in validation_labels],
    'Label 3': [l[2] for l in validation_labels],
    'Label 4': [l[3] for l in validation_labels],
    'Label 5': [l[4] for l in validation_labels],
    'Label 6': [l[5] for l in validation_labels],
    'Label 7': [l[6] for l in validation_labels],
    'Label 8': [l[7] for l in validation_labels],
    'Label 9': [l[8] for l in validation_labels],
    'Label 10': [l[9] for l in validation_labels],
    'Label 11': [l[10] for l in validation_labels],
    'Label 12': [l[11] for l in validation_labels],
    'Label 13': [l[12] for l in validation_labels],
    'Label 14': [l[13] for l in validation_labels],
    'Label 15': [l[14] for l in validation_labels],
    'Label 16': [l[15] for l in validation_labels],
})

data_test = pd.DataFrame({
    'Image': test_id,
    'Label 1': [l[0] for l in test_labels],
    'Label 2': [l[1] for l in test_labels],
    'Label 3': [l[2] for l in test_labels],
    'Label 4': [l[3] for l in test_labels],
    'Label 5': [l[4] for l in test_labels],
    'Label 6': [l[5] for l in test_labels],
    'Label 7': [l[6] for l in test_labels],
    'Label 8': [l[7] for l in test_labels],
    'Label 9': [l[8] for l in test_labels],
    'Label 10': [l[9] for l in test_labels],
    'Label 11': [l[10] for l in test_labels],
    'Label 12': [l[11] for l in test_labels],
    'Label 13': [l[12] for l in test_labels],
    'Label 14': [l[13] for l in test_labels],
    'Label 15': [l[14] for l in test_labels],
    'Label 16': [l[15] for l in test_labels],
})


In [None]:
h,w = 369,496

#ImageDataGenerator objects in order to generate the images of the dataset
train_gen = ImageDataGenerator(rescale=1./255)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

#Images flow through the dataframes created before with a batch size of 32
train_data = train_gen.flow_from_dataframe(
    dataframe= data_train,
    directory='small',
    x_col='Image',
    y_col=['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6', 'Label 7', 'Label 8', 'Label 9', 'Label 10', 'Label 11', 'Label 12', 'Label 13', 'Label 14', 'Label 15', 'Label 16'],
    target_size=(h,w),
    class_mode='raw',
    batch_size=32,
    shuffle = True
)

val_data = val_gen.flow_from_dataframe(
    dataframe= data_val,
    directory='small',
    x_col='Image',
    y_col=['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6', 'Label 7', 'Label 8', 'Label 9', 'Label 10', 'Label 11', 'Label 12', 'Label 13', 'Label 14', 'Label 15', 'Label 16'],
    target_size=(h,w),
    class_mode='raw',
    batch_size=32,
    shuffle = False
)

test_data = test_gen.flow_from_dataframe(
    dataframe= data_test,
    directory='small',
    x_col='Image',
    y_col=['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6', 'Label 7', 'Label 8', 'Label 9', 'Label 10', 'Label 11', 'Label 12', 'Label 13', 'Label 14', 'Label 15', 'Label 16'],
    target_size=(h,w),
    class_mode='raw',
    batch_size=32,
    shuffle = False
)

In [None]:
#Function where the CNN model is compiled
def compile_model():
    model = keras.Sequential()

    model.add(keras.layers.Conv2D(8, (3,3), activation= 'relu', input_shape= (h,w,3), padding= 'same'))
    model.add(keras.layers.MaxPooling2D((4,4), padding= 'same'))
    
    model.add(keras.layers.Conv2D(16, (3,3), activation= 'relu', padding= 'same'))
    model.add(keras.layers.MaxPooling2D((4,4), padding= 'same'))

    model.add(keras.layers.Conv2D(32, (3,3), activation= 'relu', padding= 'same'))
    model.add(keras.layers.MaxPooling2D((4,4), padding= 'same'))

    model.add(keras.layers.Conv2D(64, (3,3), activation= 'relu', padding= 'same'))
    model.add(keras.layers.MaxPooling2D((4,4), padding= 'same'))

    model.add(keras.layers.Conv2D(64, (3,3), activation= 'relu', padding= 'same'))
    model.add(keras.layers.MaxPooling2D((4,4), padding= 'same'))

    model.add(keras.layers.Flatten())

    model.add(keras.layers.Dense(128, activation= 'relu'))
    model.add(keras.layers.Dense(64, activation= 'relu'))
    model.add(keras.layers.Dense(16, activation= 'softmax'))

    return model

model = compile_model()
model.summary()

In [None]:
#Model is compiled and Trained
model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics=["accuracy"])
#The Logs of the Training are saved for analysis and evaluation
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq = 1)

hist = model.fit(
    train_data, 
    steps_per_epoch=400,
    epochs = 30,
    validation_data=val_data,
    callbacks=[tensorboard_callback])

#The model is saved for future use
model.save("models/small_model/")

In [None]:
model = keras.models.load_model('models/small_model/')
#The Model makes its predictions using the testing data
pred = model.predict(test_data)

In [None]:
#Preparing the data for evaluation

final_x_labels = []
final_y_labels = []
final_pred_labels = []

for l in train_labels:
    final_x_labels.append(l.index(1))

for m in test_labels:
    final_y_labels.append(m.index(1))

for k in pred:
    final_pred_labels.append(np.where(k == max(k))[0][0])

genre_names = ["Experimental", "Electric", "Rock", "Instrumental", "Pop", "Folk", "Hip-Hop", "International", "Jazz", "Classical", "Country", "Spoken", "Blues", "Soul", "Old-Time", "Easy-Listening"]
target_names = []
genres = 0
finished = False

if AUDIO_DIR == "fma_small":
    genres = 8
else:
    genres = 16

for temp in final_pred_labels:
    if genre_names[temp] != 0:
        target_names.append(genre_names[temp])
        print(genre_names[temp])
        genre_names[temp] = 0
    
    for i in range(0,genres):
        if genre_names[i] != 0:
            finished = False
            break
        else:
            finished = True

    if finished:
        break
    else:
        continue

#Generating a confusion matrix for the model predictions
import matplotlib.pyplot as plt
import numpy
from sklearn import metrics

confusion_matrix = metrics.confusion_matrix(final_y_labels, final_pred_labels)

cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)

cm_display.plot()
plt.show()