In [None]:
#Packages imported for the functions used

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy
import sklearn as skl
import sys
import os
import pickle
import librosa
import librosa.display

import IPython.display as ipd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn import svm, datasets
import sklearn.model_selection as model_selection
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.ensemble import RandomForestClassifier

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import load_img
from tensorflow.keras.utils import img_to_array
from tensorflow.keras.utils import to_categorical
from keras.preprocessing import image
from keras.callbacks import EarlyStopping
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import datetime

import utils

In [None]:
#Setting the Directory of the Music Pieces i.e. choosing the subset
AUDIO_DIR = "fma_small"

#Loading the Data from the Dataset
tracks = utils.load("fma_metadata/tracks.csv")
genres = utils.load("fma_metadata/genres.csv")
features = utils.load("fma_metadata/features.csv")
echonest = utils.load("fma_metadata/echonest.csv")

np.testing.assert_array_equal(features.index, tracks.index)
assert echonest.index.isin(tracks.index).all()

In [None]:
#Function that encodes the music genre labels for each music piece
def encodeData(train = "test"):
    if train == "training":
        for i in range(0,len(genres_low)):
            ind = gen.loc[gen['genre_id'] == genres_low[i]].index[0]
            
            #Depending on the ind number, the value is given.
            #i.e. 38 = [0], 15 = [1] etc
            if ind == 38:
                train_labels[x][0] = 1
            elif ind == 15:
                train_labels[x][1] = 1
            elif ind == 12:
                train_labels[x][2] = 1
            elif ind == 1235:
                train_labels[x][3] = 1
            elif ind == 10:
                train_labels[x][4] = 1
            elif ind == 17:
                train_labels[x][5] = 1
            elif ind == 21:
                train_labels[x][6] = 1
            elif ind == 2:
                train_labels[x][7] = 1
            elif ind == 4:
                train_labels[x][8] = 1
            elif ind == 5:
                train_labels[x][9] = 1
            elif ind == 9:
                train_labels[x][10] = 1
            elif ind == 20:
                train_labels[x][11] = 1
            elif ind == 3:
                train_labels[x][12] = 1
            elif ind == 14:
                train_labels[x][13] = 1
            elif ind == 8:
                train_labels[x][14] = 1
            else:
                train_labels[x][15] = 1
    elif train == "validation":
        for i in range(0,len(genres_low)):
            ind = gen.loc[gen['genre_id'] == genres_low[i]].index[0]
            if ind == 38:
                validation_labels[x_val][0] = 1
            elif ind == 15:
                validation_labels[x_val][1] = 1
            elif ind == 12:
                validation_labels[x_val][2] = 1
            elif ind == 1235:
                validation_labels[x_val][3] = 1
            elif ind == 10:
                validation_labels[x_val][4] = 1
            elif ind == 17:
                validation_labels[x_val][5] = 1
            elif ind == 21:
                validation_labels[x_val][6] = 1
            elif ind == 2:
                validation_labels[x_val][7] = 1
            elif ind == 4:
                validation_labels[x_val][8] = 1
            elif ind == 5:
                validation_labels[x_val][9] = 1
            elif ind == 9:
                validation_labels[x_val][10] = 1
            elif ind == 20:
                validation_labels[x_val][11] = 1
            elif ind == 3:
                validation_labels[x_val][12] = 1
            elif ind == 14:
                validation_labels[x_val][13] = 1
            elif ind == 8:
                validation_labels[x_val][14] = 1
            else:
                validation_labels[x_val][15] = 1

    else:
        for i in range(0,len(genres_low)):
            ind = gen.loc[gen['genre_id'] == genres_low[i]].index[0]
            if ind == 38:
                test_labels[x_test][0] = 1
            elif ind == 15:
                test_labels[x_test][1] = 1
            elif ind == 12:
                test_labels[x_test][2] = 1
            elif ind == 1235:
                test_labels[x_test][3] = 1
            elif ind == 10:
                test_labels[x_test][4] = 1
            elif ind == 17:
                test_labels[x_test][5] = 1
            elif ind == 21:
                test_labels[x_test][6] = 1
            elif ind == 2:
                test_labels[x_test][7] = 1
            elif ind == 4:
                test_labels[x_test][8] = 1
            elif ind == 5:
                test_labels[x_test][9] = 1
            elif ind == 9:
                test_labels[x_test][10] = 1
            elif ind == 20:
                test_labels[x_test][11] = 1
            elif ind == 3:
                test_labels[x_test][12] = 1
            elif ind == 14:
                test_labels[x_test][13] = 1
            elif ind == 8:
                test_labels[x_test][14] = 1
            else:
                test_labels[x_test][15] = 1

In [None]:
#Function that prepares a list before Encoding it
def filLists(train = "test"):
    #Use for loop to fill only 1 list full of 0s
    if train == "training":
        train_labels.append([])
        
        for c in range(0,16):
            train_labels[x].append(0)
    elif train == 'validation':
        validation_labels.append([])

        for c in range(0,16):
            validation_labels[x_val].append(0)
    else:
        test_labels.append([])

        for c in range(0,16):
            test_labels[x_test].append(0)

In [None]:
#Function that extracts the different features to be used for the SVM and RF models
def feature_add(file, train = "test"):
    mfcc1 = pd.DataFrame(features.index, index = features['mfcc', 'mean', '01'].values)
    mfcc2 = pd.DataFrame(features.index, index = features['mfcc', 'mean', '02'].values)
    mfcc3 = pd.DataFrame(features.index, index = features['mfcc', 'mean', '03'].values)

    spec1 = pd.DataFrame(features.index, index = features['spectral_contrast', 'mean', '01'].values)
    spec2 = pd.DataFrame(features.index, index = features['spectral_contrast', 'mean', '02'].values)
    spec3 = pd.DataFrame(features.index, index = features['spectral_contrast', 'mean', '03'].values)

    if(train == "training"):
        features_train.append([])
        for i in range(1,13):
            if i == 10 or i == 11 or i == 12:
                num = str(i)
            else:
                num = '0' + str(i)
    
            mfcc = pd.DataFrame(features.index, index = features['mfcc', 'mean', num].values)
            stft = pd.DataFrame(features.index, index = features['chroma_stft', 'mean', num].values)
            features_train[x].append(mfcc.loc[mfcc['track_id'] == file].index[0])
            features_train[x].append(stft.loc[stft['track_id'] == file].index[0])
            if i > 7:
                continue
            else:
                spec = pd.DataFrame(features.index, index = features['spectral_contrast', 'std', num].values)
                features_train[x].append(spec.loc[spec['track_id'] == file].index[0])
      
    elif(train == "validation"):
        features_val.append([])
        for i in range(1,13):
            if i == 10 or i == 11 or i == 12:
                num = str(i)
            else:
                num = '0' + str(i)
    
            mfcc = pd.DataFrame(features.index, index = features['mfcc', 'mean', num].values)
            stft = pd.DataFrame(features.index, index = features['chroma_stft', 'mean', num].values)
            features_val[x_val].append(mfcc.loc[mfcc['track_id'] == file].index[0])
            features_val[x_val].append(stft.loc[stft['track_id'] == file].index[0])
            if i > 7:
                continue
            else:
                spec = pd.DataFrame(features.index, index = features['spectral_contrast', 'std', num].values)
                features_val[x_val].append(spec.loc[spec['track_id'] == file].index[0])
    else:
        features_test.append([])
        for i in range(1,13):
            if i == 10 or i == 11 or i == 12:
                num = str(i)
            else:
                num = '0' + str(i)
    
            mfcc = pd.DataFrame(features.index, index = features['mfcc', 'mean', num].values)
            stft = pd.DataFrame(features.index, index = features['chroma_stft', 'mean', num].values)
            features_test[x_test].append(mfcc.loc[mfcc['track_id'] == file].index[0])
            features_test[x_test].append(stft.loc[stft['track_id'] == file].index[0])
            if i > 7:
                continue
            else:
                spec = pd.DataFrame(features.index, index = features['spectral_contrast', 'std', num].values)
                features_test[x_test].append(spec.loc[spec['track_id'] == file].index[0])

In [None]:
exp,elec,rock,instrum,pop,folk,hip,inter,jazz,classic,count,spok,blue,soul,old,easy = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

#Labels Lists
train_labels = []
validation_labels = []
test_labels = []

#Features Lists
features_train = []
features_val = []
features_test = []

#Id Lists
train_id = []
val_id = []
test_id = []
counter = 0

#Counters used for the specific sections i.e. training, validation, and testing
x = 0
x_val = 0
x_test = 0

#Setting the Folder to be used for the Mel-Spectrograms
if(AUDIO_DIR == 'fma_small'):
    pic_folder = "small"
elif(AUDIO_DIR == 'fma_medium'):
    pic_folder = "medium"
else:
    pic_folder = "full"

In [None]:
#Loop that scans each and every folder in the directory 
for folder in os.listdir(AUDIO_DIR):
    if folder == "checksums" or folder == "README.txt":
        continue
    else:
        full_folder_path = os.path.join(AUDIO_DIR, folder)
        for file in os.listdir(full_folder_path):
            counter += 1
            train = True
            if file == "desktop.ini": 
                continue
            
            #Creating the image path name
            full_music_path = os.path.join(full_folder_path, file)
            file_name = int(file.rsplit(".")[0])
            pic_name = file.rsplit(".")[0] + '.png'

            if pic_name not in os.listdir(pic_folder):
                continue
            
            #Extracting the Top-Genre/s of the music piece
            splits = pd.DataFrame(tracks.index, index=tracks['set', 'split'].values)
            curr_split = splits.loc[splits['track_id'] == file_name].index[0]
            
            all_genres = pd.DataFrame(tracks.index, index=tracks['track', 'genres'].values)
            genres_low = all_genres.loc[all_genres['track_id'] == file_name].index[0]
            gen = pd.DataFrame(genres.index, index = genres['top_level'].values)

            #Updating the Training, Validation, or Testing Id and Label lists
            if curr_split == 'training':
                train_id.append(pic_name)
                filLists("training")
                encodeData("training")
                feature_add(file_name, "training")
                x = x + 1
            elif curr_split == 'validation':
                val_id.append(pic_name)
                filLists("validation")
                encodeData("validation")
                feature_add(file_name, "validation")
                x_val = x_val + 1
            else:
                test_id.append(pic_name)
                filLists("test")
                encodeData("test")
                feature_add(file_name, "test")
                x_test = x_test + 1
            


In [None]:
#Setting the three image dataframes
data_train = pd.DataFrame({
    'Image': train_id,
    'Label 1': [l[0] for l in train_labels],
    'Label 2': [l[1] for l in train_labels],
    'Label 3': [l[2] for l in train_labels],
    'Label 4': [l[3] for l in train_labels],
    'Label 5': [l[4] for l in train_labels],
    'Label 6': [l[5] for l in train_labels],
    'Label 7': [l[6] for l in train_labels],
    'Label 8': [l[7] for l in train_labels],
    'Label 9': [l[8] for l in train_labels],
    'Label 10': [l[9] for l in train_labels],
    'Label 11': [l[10] for l in train_labels],
    'Label 12': [l[11] for l in train_labels],
    'Label 13': [l[12] for l in train_labels],
    'Label 14': [l[13] for l in train_labels],
    'Label 15': [l[14] for l in train_labels],
    'Label 16': [l[15] for l in train_labels],
})

data_val = pd.DataFrame({
    'Image': val_id,
    'Label 1': [l[0] for l in validation_labels],
    'Label 2': [l[1] for l in validation_labels],
    'Label 3': [l[2] for l in validation_labels],
    'Label 4': [l[3] for l in validation_labels],
    'Label 5': [l[4] for l in validation_labels],
    'Label 6': [l[5] for l in validation_labels],
    'Label 7': [l[6] for l in validation_labels],
    'Label 8': [l[7] for l in validation_labels],
    'Label 9': [l[8] for l in validation_labels],
    'Label 10': [l[9] for l in validation_labels],
    'Label 11': [l[10] for l in validation_labels],
    'Label 12': [l[11] for l in validation_labels],
    'Label 13': [l[12] for l in validation_labels],
    'Label 14': [l[13] for l in validation_labels],
    'Label 15': [l[14] for l in validation_labels],
    'Label 16': [l[15] for l in validation_labels],
})

data_test = pd.DataFrame({
    'Image': test_id,
    'Label 1': [l[0] for l in test_labels],
    'Label 2': [l[1] for l in test_labels],
    'Label 3': [l[2] for l in test_labels],
    'Label 4': [l[3] for l in test_labels],
    'Label 5': [l[4] for l in test_labels],
    'Label 6': [l[5] for l in test_labels],
    'Label 7': [l[6] for l in test_labels],
    'Label 8': [l[7] for l in test_labels],
    'Label 9': [l[8] for l in test_labels],
    'Label 10': [l[9] for l in test_labels],
    'Label 11': [l[10] for l in test_labels],
    'Label 12': [l[11] for l in test_labels],
    'Label 13': [l[12] for l in test_labels],
    'Label 14': [l[13] for l in test_labels],
    'Label 15': [l[14] for l in test_labels],
    'Label 16': [l[15] for l in test_labels],
})


In [None]:
h,w = 369,496

#ImageDataGenerator objects to extract the images and rescale them
train_gen = ImageDataGenerator(rescale=1./255)
val_gen = ImageDataGenerator(rescale=1./255)
test_gen = ImageDataGenerator(rescale=1./255)

#Flowing the images at a batch_size of 32 through the dataframes
train_data = train_gen.flow_from_dataframe(
    dataframe= data_train,
    directory='medium',
    x_col='Image',
    y_col=['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6', 'Label 7', 'Label 8', 'Label 9', 'Label 10', 'Label 11', 'Label 12', 'Label 13', 'Label 14', 'Label 15', 'Label 16'],
    target_size=(h,w),
    class_mode='raw',
    batch_size=32,
    shuffle = True
)

val_data = val_gen.flow_from_dataframe(
    dataframe= data_val,
    directory='medium',
    x_col='Image',
    y_col=['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6', 'Label 7', 'Label 8', 'Label 9', 'Label 10', 'Label 11', 'Label 12', 'Label 13', 'Label 14', 'Label 15', 'Label 16'],
    target_size=(h,w),
    class_mode='raw',
    batch_size=32,
    shuffle = False
)

test_data = test_gen.flow_from_dataframe(
    dataframe= data_test,
    directory='medium',
    x_col='Image',
    y_col=['Label 1', 'Label 2', 'Label 3', 'Label 4', 'Label 5', 'Label 6', 'Label 7', 'Label 8', 'Label 9', 'Label 10', 'Label 11', 'Label 12', 'Label 13', 'Label 14', 'Label 15', 'Label 16'],
    target_size=(h,w),
    class_mode='raw',
    batch_size=32,
    shuffle = False
)

In [None]:
#Setting the dataframes for the features
feat_train = pd.DataFrame({
    'Feature 1': [l[0] for l in features_train],
    'Feature 2': [l[1] for l in features_train],
    'Feature 3': [l[2] for l in features_train],
    'Feature 4': [l[3] for l in features_train],
    'Feature 5': [l[4] for l in features_train],
    'Feature 6': [l[5] for l in features_train],
    'Feature 7': [l[6] for l in features_train],
    'Feature 8': [l[7] for l in features_train],
    'Feature 9': [l[8] for l in features_train],
    'Feature 10': [l[9] for l in features_train],
    'Feature 11': [l[10] for l in features_train],
    'Feature 12': [l[11] for l in features_train],
    'Feature 13': [l[12] for l in features_train],
    'Feature 14': [l[13] for l in features_train],
    'Feature 15': [l[14] for l in features_train],
    'Feature 16': [l[15] for l in features_train],
    'Feature 17': [l[16] for l in features_train],
    'Feature 18': [l[17] for l in features_train],
    'Feature 19': [l[18] for l in features_train],
    'Feature 20': [l[19] for l in features_train],
    'Feature 21': [l[20] for l in features_train],
    'Feature 22': [l[21] for l in features_train],
    'Feature 23': [l[22] for l in features_train],
    'Feature 24': [l[23] for l in features_train],
    'Feature 25': [l[24] for l in features_train],
    'Feature 26': [l[25] for l in features_train],
    'Feature 27': [l[26] for l in features_train],
    'Feature 28': [l[27] for l in features_train],
    'Feature 29': [l[28] for l in features_train],
    'Feature 30': [l[29] for l in features_train],
    'Feature 31': [l[30] for l in features_train],
})

feat_val = pd.DataFrame({
    'Feature 1': [l[0] for l in features_val],
    'Feature 2': [l[1] for l in features_val],
    'Feature 3': [l[2] for l in features_val],
    'Feature 4': [l[3] for l in features_val],
    'Feature 5': [l[4] for l in features_val],
    'Feature 6': [l[5] for l in features_val],
    'Feature 7': [l[6] for l in features_val],
    'Feature 8': [l[7] for l in features_val],
    'Feature 9': [l[8] for l in features_val],
    'Feature 10': [l[9] for l in features_val],
    'Feature 11': [l[10] for l in features_val],
    'Feature 12': [l[11] for l in features_val],
    'Feature 13': [l[12] for l in features_val],
    'Feature 14': [l[13] for l in features_val],
    'Feature 15': [l[14] for l in features_val],
    'Feature 16': [l[15] for l in features_val],
    'Feature 17': [l[16] for l in features_val],
    'Feature 18': [l[17] for l in features_val],
    'Feature 19': [l[18] for l in features_val],
    'Feature 20': [l[19] for l in features_val],
    'Feature 21': [l[20] for l in features_val],
    'Feature 22': [l[21] for l in features_val],
    'Feature 23': [l[22] for l in features_val],
    'Feature 24': [l[23] for l in features_val],
    'Feature 25': [l[24] for l in features_val],
    'Feature 26': [l[25] for l in features_val],
    'Feature 27': [l[26] for l in features_val],
    'Feature 28': [l[27] for l in features_val],
    'Feature 29': [l[28] for l in features_val],
    'Feature 30': [l[29] for l in features_val],
    'Feature 31': [l[30] for l in features_val],
})

feat_test = pd.DataFrame({
    'Feature 1': [l[0] for l in features_test],
    'Feature 2': [l[1] for l in features_test],
    'Feature 3': [l[2] for l in features_test],
    'Feature 4': [l[3] for l in features_test],
    'Feature 5': [l[4] for l in features_test],
    'Feature 6': [l[5] for l in features_test],
    'Feature 7': [l[6] for l in features_test],
    'Feature 8': [l[7] for l in features_test],
    'Feature 9': [l[8] for l in features_test],
    'Feature 10': [l[9] for l in features_test],
    'Feature 11': [l[10] for l in features_test],
    'Feature 12': [l[11] for l in features_test],
    'Feature 13': [l[12] for l in features_test],
    'Feature 14': [l[13] for l in features_test],
    'Feature 15': [l[14] for l in features_test],
    'Feature 16': [l[15] for l in features_test],
    'Feature 17': [l[16] for l in features_test],
    'Feature 18': [l[17] for l in features_test],
    'Feature 19': [l[18] for l in features_test],
    'Feature 20': [l[19] for l in features_test],
    'Feature 21': [l[20] for l in features_test],
    'Feature 22': [l[21] for l in features_test],
    'Feature 23': [l[22] for l in features_test],
    'Feature 24': [l[23] for l in features_test],
    'Feature 25': [l[24] for l in features_test],
    'Feature 26': [l[25] for l in features_test],
    'Feature 27': [l[26] for l in features_test],
    'Feature 28': [l[27] for l in features_test],
    'Feature 29': [l[28] for l in features_test],
    'Feature 30': [l[29] for l in features_test],
    'Feature 31': [l[30] for l in features_test],
})


In [None]:
#Setting the labels for predictions and evaluation

final_x_labels = []
final_y_labels = []
final_v_labels = []

for l in train_labels:
    final_x_labels.append(l.index(1))

for v in validation_labels:
    final_v_labels.append(v.index(1))

for m in test_labels:
    final_y_labels.append(m.index(1))

In [None]:
#Using all three base models and their predictions for the meta-model
from sklearn.linear_model import LogisticRegression

model1 = keras.models.load_model('models/medium_model/')
model2 = svm.SVC(kernel='poly', probability=True, degree=3, C=1)
model3 = RandomForestClassifier(n_estimators=100, random_state=42)

model2.fit(feat_train,final_x_labels)
model3.fit(feat_train,final_x_labels)

pred1 = model1.predict(train_data)
pred2 = model2.predict(feat_train)
pred3 = model3.predict(feat_train)

stacked_data = np.column_stack((pred1,pred2,pred3))

meta = LogisticRegression(solver="lbfgs", max_iter=200)
meta.fit(stacked_data, final_x_labels)

base1 = model1.predict(test_data)
base2 = model2.predict(feat_test)
base3 = model3.predict(feat_test)

stacked_test_data = np.column_stack((base1,base2,base3))

final_pred = meta.predict(stacked_test_data)

accuracy = accuracy_score(final_y_labels, final_pred)
print("Accuracy: ", accuracy)

In [None]:
#Generating a Confusion Matrix for the predictions

import matplotlib.pyplot as plt
import numpy
from sklearn import metrics

confusion_matrix = metrics.confusion_matrix(final_y_labels, final_pred)

cm_display = metrics.ConfusionMatrixDisplay(confusion_matrix = confusion_matrix)

cm_display.plot()
plt.show()