# Modelisation - Second Phase

In this notebook, we're going to try different classification methods in order to select the one to be used at the end.

In [1]:
## Importing data

# Import libraries

import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import os
import statsmodels.api
from  sklearn.model_selection  import train_test_split
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
%matplotlib inline

sns.set()

# Extraction of a dataset with more features

The measures used for the features are extracted 12 times during the 30 seconds of each sound file. Previously these 12 measures were summarized by their mean only.
We want to see if a supplement of information provided by some additional features such as the standard deviation and median of
the variables could enhance the accuracy of our predictions. 

In [None]:
import librosa
import csv
# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# generating a dataset
header = 'filename tempo chroma_stft_mean chroma_stft_std chroma_stft_median rmse_mean rmse_std rmse_median \
          spectral_centroid_mean spectral_centroid_std spectral_centroid_median spectral_bandwidth_mean \
          spectral_bandwidth_std spectral_bandwidth_median rolloff_mean rolloff_std rolloff_median zero_crossing_rate_mean \
          zero_crossing_rate_std zero_crossing_rate_median'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

file = open('dataB.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        rmse = librosa.feature.rms(y=y)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {tempo} {np.mean(chroma_stft)} {np.std(chroma_stft)} {np.median(chroma_stft)} \
                      {np.mean(rmse)} {np.std(rmse)} {np.median(rmse)} {np.mean(spec_cent)} {np.std(spec_cent)} \
                      {np.median(spec_cent)} {np.mean(spec_bw)} {np.std(spec_bw)} {np.median(spec_bw)} \
                      {np.mean(rolloff)} {np.std(rolloff)} {np.median(rolloff)} {np.mean(zcr)} {np.std(zcr)} \
                      {np.median(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('dataB.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())
            

In [2]:
# Reading data
music_df = pd.read_csv("dataB.csv")

In [4]:
# Removing the filename column
print("The filename column is not useful for the machine learning algorithm. So we decide to remove it.")
music_df = music_df.drop(["filename"], axis = 1)

The filename column is not useful for the machine learning algorithm. So we decide to remove it.


In [5]:
target = music_df.label
data = music_df.drop('label',axis=1)

## Preparing the features

### Suppressing the highly correlated features

In [6]:
# Function to remove highly correlated columns

def correlation(dataset, threshold):
    col_corr = set() # Set of all the names of deleted columns
    corr_matrix = dataset.corr()
    for i in range(len(corr_matrix.columns)):
        for j in range(i):
            if (corr_matrix.iloc[i, j] >= threshold) and (corr_matrix.columns[j] not in col_corr):
                colname = corr_matrix.columns[i] # getting the name of column
                col_corr.add(colname)
                if colname in dataset.columns:
                    del dataset[colname] # deleting the column from the dataset

In [7]:
# Removing the columns 

correlation(data, 0.85)
print(data.columns)

Index(['tempo', 'chroma_stft_mean', 'chroma_stft_std', 'rmse_mean', 'rmse_std',
       'spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_bandwidth_std', 'zero_crossing_rate_std',
       'zero_crossing_rate_median', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4',
       'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11',
       'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18',
       'mfcc19', 'mfcc20'],
      dtype='object')


Although the distributions of many variables are not totally symetric, we can see that the median doesn't bring much additional information to the mean. So the suppression of highly correlated features only keeps one of them for each feature.

There are no missing values in our data set, so no other treatment is needed.

## Preparing the target column

In [8]:
# Encode the label classes using LabelEncoder
encoder = preprocessing.LabelEncoder()
target = encoder.fit_transform(target)

## Separation between train, validation and test sets

In [9]:
# Split for the train and test datasets
X_train, X_test, y_train, y_test = train_test_split (data, target,
                                                    test_size = 0.2 ,
                                                    random_state = 42 ,
                                                    stratify = target)

In [10]:
# Scale the X data using StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Trying the models with the new augmented dataset

## Logistic regression

In [15]:
from sklearn.linear_model import LogisticRegression
from sklearn import model_selection

# Creation of the clasifier lr  (LogisticRegression)
lr = LogisticRegression(max_iter = 1000, multi_class='auto')

# Parameters on which to train the lr model
parametres_lr = {'C' : [10**i for i in range(-2,5,1)],
                 'solver' : ['newton-cg', 'lbfgs', 'liblinear']}

# Creation of the grid to train the lr model
grid_lr = model_selection.GridSearchCV(lr, param_grid=parametres_lr,cv=10)

In [16]:
# Training of the grid_lr on X_train and y_train
grille = grid_lr.fit(X_train,y_train)



In [17]:
# Prediction using lr on X_test
y_pred_lr = grid_lr.predict(X_test)
pd.crosstab(y_test, y_pred_lr, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,13,0,1,1,0,1,1,0,2,1
1,0,19,0,0,0,1,0,0,0,0
2,2,0,13,0,0,2,0,0,1,2
3,0,1,0,10,4,0,0,2,1,2
4,0,0,0,0,14,1,1,1,3,0
5,2,1,0,1,0,16,0,0,0,0
6,1,0,0,0,2,0,17,0,0,0
7,0,0,0,2,0,0,0,16,2,0
8,0,0,2,0,1,0,0,1,16,0
9,1,0,7,4,0,0,0,0,3,5


In [18]:
print(classification_report(y_test,y_pred_lr))

              precision    recall  f1-score   support

           0       0.68      0.65      0.67        20
           1       0.90      0.95      0.93        20
           2       0.57      0.65      0.60        20
           3       0.56      0.50      0.53        20
           4       0.67      0.70      0.68        20
           5       0.76      0.80      0.78        20
           6       0.89      0.85      0.87        20
           7       0.80      0.80      0.80        20
           8       0.57      0.80      0.67        20
           9       0.50      0.25      0.33        20

    accuracy                           0.69       200
   macro avg       0.69      0.70      0.69       200
weighted avg       0.69      0.69      0.69       200



As there is a uniform distribution across the ten different classes, we'll base our evaluation of the models mostly on
the accuracy, and on the f1-score.

In [19]:
grid_lr.best_params_

{'C': 1, 'solver': 'newton-cg'}

## Decision Tree

In [19]:
from sklearn.tree import DecisionTreeClassifier
# Creation of the clasifier dt (DecisionTreeClassifier)
dt = DecisionTreeClassifier()
# Fit of dt on (X_train,y_train)
dt.fit(X_train, y_train)
# Prediction using dt on X_test
y_pred_dt = dt.predict(X_test)
pd.crosstab(y_test, y_pred_dt, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,11,1,0,1,1,0,2,0,2,2
1,1,12,2,0,0,5,0,0,0,0
2,2,1,5,0,1,3,0,3,3,2
3,2,0,1,6,0,0,0,3,4,4
4,0,0,0,1,7,2,1,3,4,2
5,2,1,4,3,0,9,0,0,1,0
6,0,0,0,0,4,0,14,0,1,1
7,1,0,1,1,3,1,0,11,0,2
8,2,0,2,1,3,1,0,1,6,4
9,4,0,0,3,2,2,0,3,1,5


In [21]:
print(classification_report(y_test,y_pred_dt))

              precision    recall  f1-score   support

           0       0.44      0.55      0.49        20
           1       0.80      0.60      0.69        20
           2       0.33      0.25      0.29        20
           3       0.38      0.30      0.33        20
           4       0.33      0.35      0.34        20
           5       0.39      0.45      0.42        20
           6       0.82      0.70      0.76        20
           7       0.46      0.55      0.50        20
           8       0.27      0.30      0.29        20
           9       0.23      0.25      0.24        20

    accuracy                           0.43       200
   macro avg       0.45      0.43      0.43       200
weighted avg       0.45      0.43      0.43       200



## SVM

In [22]:
from sklearn import svm

# Creation of a SVM classifier
clf = svm.SVC()

# Parameters to test
parametres = {'C' : [0.1,1,10], 'kernel':['rbf', 'linear','poly'],'gamma' : [0.001, 0.1, 0.5]}

# Creation of the grid to test the SVM model
grid_clf = model_selection.GridSearchCV(clf, param_grid=parametres,cv=5)

In [23]:
# Training of the grid_svm on X_train and y_train
grille_svm = grid_clf.fit(X_train,y_train)

In [24]:
# Prediction using grid_clf on X_test
y_pred_svm = grid_clf.predict(X_test)
pd.crosstab(y_test, y_pred_svm, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,14,0,0,0,2,1,2,0,0,1
1,1,17,1,0,0,1,0,0,0,0
2,0,0,14,2,0,0,0,0,0,4
3,0,0,0,12,1,0,1,0,0,6
4,0,0,1,1,14,0,0,1,3,0
5,0,2,3,1,0,13,0,0,1,0
6,1,0,0,1,1,0,17,0,0,0
7,0,0,2,3,0,0,0,15,0,0
8,0,0,1,2,4,1,0,1,11,0
9,0,0,1,7,0,2,0,0,1,9


In [25]:
print(classification_report(y_test,y_pred_svm))

              precision    recall  f1-score   support

           0       0.88      0.70      0.78        20
           1       0.89      0.85      0.87        20
           2       0.61      0.70      0.65        20
           3       0.41      0.60      0.49        20
           4       0.64      0.70      0.67        20
           5       0.72      0.65      0.68        20
           6       0.85      0.85      0.85        20
           7       0.88      0.75      0.81        20
           8       0.69      0.55      0.61        20
           9       0.45      0.45      0.45        20

    accuracy                           0.68       200
   macro avg       0.70      0.68      0.69       200
weighted avg       0.70      0.68      0.69       200



In [26]:
grid_clf.best_params_

{'C': 1, 'gamma': 0.1, 'kernel': 'rbf'}

## KNN

In [27]:
from sklearn import neighbors

# Creation of a KNN classifier
knn = neighbors.KNeighborsClassifier()

# Parameters to test
parametres_knn = {'n_neighbors' : [1,2,5,10,20,50,100], 'metric':['minkowski', 'manhattan']}

# Creation of the grid to test the KNN model
grid_knn = model_selection.GridSearchCV(knn, param_grid=parametres_knn,cv=10)

In [28]:
# Training of the grid_knn on X_train and y_train
grille_knn = grid_knn.fit(X_train,y_train)

In [29]:
# Prediction using grid_knn on X_test
y_pred_knn = grid_knn.predict(X_test)
pd.crosstab(y_test, y_pred_knn, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,15,0,1,0,0,0,2,0,0,2
1,1,19,0,0,0,0,0,0,0,0
2,0,2,14,0,0,2,0,1,0,1
3,1,0,0,14,1,0,0,0,1,3
4,0,0,2,3,10,0,0,2,1,2
5,0,3,3,1,0,11,0,2,0,0
6,2,0,1,1,1,0,14,0,0,1
7,0,0,2,3,0,0,0,14,0,1
8,0,0,2,1,4,1,0,1,11,0
9,0,0,1,7,0,3,0,0,0,9


In [30]:
print(classification_report(y_test,y_pred_knn))

              precision    recall  f1-score   support

           0       0.79      0.75      0.77        20
           1       0.79      0.95      0.86        20
           2       0.54      0.70      0.61        20
           3       0.47      0.70      0.56        20
           4       0.62      0.50      0.56        20
           5       0.65      0.55      0.59        20
           6       0.88      0.70      0.78        20
           7       0.70      0.70      0.70        20
           8       0.85      0.55      0.67        20
           9       0.47      0.45      0.46        20

    accuracy                           0.66       200
   macro avg       0.68      0.66      0.66       200
weighted avg       0.68      0.66      0.66       200



In [31]:
grid_knn.best_params_

{'metric': 'manhattan', 'n_neighbors': 5}

## Random Forest

In [32]:
from sklearn import ensemble

# Creation of a RF classifier
RF = ensemble.RandomForestClassifier(n_jobs = -1, random_state = 321)

# Parameters to test
parametres_RF = {'n_estimators' : [10,20,100,200,500], 'criterion':['gini', 'entropy']}

# Creation of the grid to test the KNN model 
grid_RF = model_selection.GridSearchCV(RF, param_grid=parametres_RF,cv=5)

In [33]:
# Training of the grid_RF on X_train and y_train
grille_RF = grid_RF.fit(X_train,y_train)

In [34]:
# Prediction using grid_RF on X_test
y_pred_RF = grid_RF.predict(X_test)
pd.crosstab(y_test, y_pred_RF, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,12,0,0,2,0,1,3,0,1,1
1,0,17,2,0,0,1,0,0,0,0
2,0,0,12,1,0,4,0,1,1,1
3,0,0,1,12,2,0,2,0,1,2
4,0,0,0,1,11,0,0,4,4,0
5,0,2,2,2,0,14,0,0,0,0
6,0,0,0,0,1,0,18,0,1,0
7,0,0,2,1,0,1,0,16,0,0
8,0,1,0,1,3,1,1,1,12,0
9,1,0,0,5,0,1,0,3,1,9


In [35]:
print(classification_report(y_test,y_pred_RF)) 

              precision    recall  f1-score   support

           0       0.92      0.60      0.73        20
           1       0.85      0.85      0.85        20
           2       0.63      0.60      0.62        20
           3       0.48      0.60      0.53        20
           4       0.65      0.55      0.59        20
           5       0.61      0.70      0.65        20
           6       0.75      0.90      0.82        20
           7       0.64      0.80      0.71        20
           8       0.57      0.60      0.59        20
           9       0.69      0.45      0.55        20

    accuracy                           0.67       200
   macro avg       0.68      0.66      0.66       200
weighted avg       0.68      0.67      0.66       200



In [36]:
grid_RF.best_params_

{'criterion': 'gini', 'n_estimators': 500}

## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
# Creation of the clasifier gnb (GaussianNaivesBayes)
gnb = GaussianNB()
# Fit of gnb on (X_train,y_train)
gnb.fit(X_train, y_train)
# Prediction using gnb on X_test
y_pred_gnb = gnb.predict(X_test)
pd.crosstab(y_val, y_pred_gnb, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
blues,6,0,1,1,0,3,4,0,0,1
classical,0,13,2,0,0,1,0,0,0,0
country,2,0,5,0,0,2,1,4,0,2
disco,0,0,3,2,0,0,4,4,1,2
hiphop,0,0,0,0,4,0,2,9,1,0
jazz,3,4,0,0,0,7,0,2,0,0
metal,0,0,0,0,2,0,14,0,0,0
pop,0,0,0,0,0,2,0,14,0,0
reggae,0,0,1,0,2,2,1,3,6,1
rock,0,0,2,0,0,1,6,5,1,1


In [None]:
print(classification_report(y_val,y_pred_gnb))

              precision    recall  f1-score   support

       blues       0.55      0.38      0.44        16
   classical       0.76      0.81      0.79        16
     country       0.36      0.31      0.33        16
       disco       0.67      0.12      0.21        16
      hiphop       0.50      0.25      0.33        16
        jazz       0.39      0.44      0.41        16
       metal       0.44      0.88      0.58        16
         pop       0.34      0.88      0.49        16
      reggae       0.67      0.38      0.48        16
        rock       0.14      0.06      0.09        16

    accuracy                           0.45       160
   macro avg       0.48      0.45      0.42       160
weighted avg       0.48      0.45      0.42       160



## Stochastic Gradient Descent

In [None]:
from sklearn.linear_model import SGDClassifier
# Creation of the clasifier sgdc (StohasticGradientDescentClassifier)
sgdc = SGDClassifier()
# Fit of sgdc on (X_train,y_train)
sgdc.fit(X_train, y_train)
# Prediction using sgdc on X_val
y_pred_sgdc = sgdc.predict(X_val)
pd.crosstab(y_val, y_pred_sgdc, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
blues,7,0,0,1,1,2,3,0,0,2
classical,0,13,1,0,0,0,0,0,1,1
country,2,0,7,1,0,0,0,1,2,3
disco,0,0,2,5,1,0,0,0,1,7
hiphop,0,0,0,1,4,1,0,5,5,0
jazz,2,1,1,1,0,11,0,0,0,0
metal,0,0,1,0,1,1,13,0,0,0
pop,0,0,0,1,0,1,0,13,0,1
reggae,0,1,0,1,0,1,0,2,9,2
rock,0,0,2,4,0,1,1,0,1,7


In [None]:
print(classification_report(y_val,y_pred_sgdc))

              precision    recall  f1-score   support

       blues       0.64      0.44      0.52        16
   classical       0.87      0.81      0.84        16
     country       0.50      0.44      0.47        16
       disco       0.33      0.31      0.32        16
      hiphop       0.57      0.25      0.35        16
        jazz       0.61      0.69      0.65        16
       metal       0.76      0.81      0.79        16
         pop       0.62      0.81      0.70        16
      reggae       0.47      0.56      0.51        16
        rock       0.30      0.44      0.36        16

    accuracy                           0.56       160
   macro avg       0.57      0.56      0.55       160
weighted avg       0.57      0.56      0.55       160



## Artificial Neural Network

In [21]:
from sklearn.neural_network import MLPClassifier
# Creation of the clasifier ann  (MLPClassifier)
ann = MLPClassifier(max_iter = 1000)
# Fit of ann on (X_train,y_train)
ann.fit(X_train, y_train)
# Prediction using ann on X_val
y_pred_ann = ann.predict(X_test)
pd.crosstab(y_test, y_pred_ann, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,13,0,1,1,1,1,1,0,0,2
1,0,18,0,0,0,2,0,0,0,0
2,2,0,13,0,0,2,0,0,2,1
3,0,0,1,8,3,1,2,3,1,1
4,0,0,0,0,14,0,1,1,4,0
5,3,1,2,0,0,14,0,0,0,0
6,4,0,0,0,0,1,14,0,0,1
7,0,0,0,1,1,0,0,13,4,1
8,0,0,2,1,1,0,0,0,15,1
9,1,0,5,1,1,0,1,0,1,10


In [23]:
print(classification_report(y_test,y_pred_ann))

              precision    recall  f1-score   support

           0       0.57      0.65      0.60        20
           1       0.95      0.90      0.92        20
           2       0.54      0.65      0.59        20
           3       0.67      0.40      0.50        20
           4       0.67      0.70      0.68        20
           5       0.67      0.70      0.68        20
           6       0.74      0.70      0.72        20
           7       0.76      0.65      0.70        20
           8       0.56      0.75      0.64        20
           9       0.59      0.50      0.54        20

    accuracy                           0.66       200
   macro avg       0.67      0.66      0.66       200
weighted avg       0.67      0.66      0.66       200



## Algorithme de boosting AdaBoost avec Arbres de décision

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Creation of the decision tree classifier dtc  
dtc = DecisionTreeClassifier(max_depth = 5)
dtc.fit(X_train, y_train)

DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=5,
                       max_features=None, max_leaf_nodes=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, presort=False,
                       random_state=None, splitter='best')

In [None]:
from sklearn.ensemble import AdaBoostClassifier

# Creation of the clasifier ac  (AdaBoost Classifier)
ac = AdaBoostClassifier(base_estimator = dtc, n_estimators = 400)
# Training of the model
ac.fit(X_train,y_train)
ac.score(X_val,y_val)

0.6375

In [None]:
# Prediction using ac on X_val
y_pred_ac = ac.predict(X_val)
pd.crosstab(y_val, y_pred_ac, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
blues,10,0,1,0,0,0,3,0,0,2
classical,0,12,2,0,0,2,0,0,0,0
country,0,0,9,0,0,0,0,0,4,3
disco,0,0,0,9,1,0,0,0,0,6
hiphop,0,0,0,4,8,0,0,0,4,0
jazz,1,0,1,1,0,13,0,0,0,0
metal,0,0,0,0,2,0,13,0,0,1
pop,0,0,3,3,0,0,0,10,0,0
reggae,0,0,1,3,1,1,0,0,9,1
rock,0,0,1,4,0,1,0,0,1,9


In [None]:
print(classification_report(y_val,y_pred_ac)) 

              precision    recall  f1-score   support

       blues       0.91      0.62      0.74        16
   classical       1.00      0.75      0.86        16
     country       0.50      0.56      0.53        16
       disco       0.38      0.56      0.45        16
      hiphop       0.67      0.50      0.57        16
        jazz       0.76      0.81      0.79        16
       metal       0.81      0.81      0.81        16
         pop       1.00      0.62      0.77        16
      reggae       0.50      0.56      0.53        16
        rock       0.41      0.56      0.47        16

    accuracy                           0.64       160
   macro avg       0.69      0.64      0.65       160
weighted avg       0.69      0.64      0.65       160



## Gradient Boosting Classification

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

# Here we choose arbitrary hyperparameters as a GridSearchCV optimization takes an extremely high amount of time 
# on our computers

gbc = GradientBoostingClassifier(n_estimators=100, max_depth = 5, learning_rate = 0.1)

In [None]:
# Training of the model
gbc.fit(X_train,y_train)

# Prediction on the validation set
y_pred_gbc = gbc.predict(X_val)
pd.crosstab(y_val, y_pred_gbc, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
blues,11,0,0,1,0,0,1,0,1,2
classical,1,12,1,1,0,1,0,0,0,0
country,0,0,10,0,0,0,0,1,1,4
disco,0,0,1,11,2,0,0,0,1,1
hiphop,0,0,0,1,8,0,1,2,1,3
jazz,0,1,0,1,0,13,0,0,0,1
metal,2,0,0,0,1,1,12,0,0,0
pop,0,0,2,2,0,1,0,10,0,1
reggae,0,0,0,2,2,1,1,0,9,1
rock,0,1,1,4,2,1,0,0,1,6


In [None]:
print(classification_report(y_val,y_pred_gbc))  

              precision    recall  f1-score   support

       blues       0.79      0.69      0.73        16
   classical       0.86      0.75      0.80        16
     country       0.67      0.62      0.65        16
       disco       0.48      0.69      0.56        16
      hiphop       0.53      0.50      0.52        16
        jazz       0.72      0.81      0.76        16
       metal       0.80      0.75      0.77        16
         pop       0.77      0.62      0.69        16
      reggae       0.64      0.56      0.60        16
        rock       0.32      0.38      0.34        16

    accuracy                           0.64       160
   macro avg       0.66      0.64      0.64       160
weighted avg       0.66      0.64      0.64       160



# Gradient Boosting Classifier avec GridSearchCV

In [38]:
from sklearn.ensemble import GradientBoostingClassifier

In [39]:
gbc = GradientBoostingClassifier()
parameters = {
    "n_estimators":[5,50,250,500],
    "max_depth":[1,3,5,7,9],
    "learning_rate":[0.01,0.1,1,10,100]
}

In [40]:
from sklearn.model_selection import GridSearchCV

cv_gbc = GridSearchCV(gbc,parameters,cv=5)
cv_gbc.fit(X_train, y_train) 

GridSearchCV(cv=5, error_score=nan,
             estimator=GradientBoostingClassifier(ccp_alpha=0.0,
                                                  criterion='friedman_mse',
                                                  init=None, learning_rate=0.1,
                                                  loss='deviance', max_depth=3,
                                                  max_features=None,
                                                  max_leaf_nodes=None,
                                                  min_impurity_decrease=0.0,
                                                  min_impurity_split=None,
                                                  min_samples_leaf=1,
                                                  min_samples_split=2,
                                                  min_weight_fraction_leaf=0.0,
                                                  n_estimators=100,
                                                  n_iter_no_change=None,
         

In [41]:
cv_gbc.best_params_

{'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 250}

In [43]:
y_pred_cv_gbc = cv_gbc.predict(X_test)
pd.crosstab(y_test, y_pred_cv_gbc, rownames = ['Real class'], colnames=['Predicted class'])

Predicted class,0,1,2,3,4,5,6,7,8,9
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,12,0,0,3,0,1,2,0,1,1
1,0,17,1,0,0,1,0,0,0,1
2,0,1,12,1,0,2,0,0,1,3
3,0,0,2,12,2,0,0,0,1,3
4,0,0,1,2,10,0,2,2,2,1
5,0,2,2,0,0,14,0,0,1,1
6,1,0,0,0,1,1,17,0,0,0
7,0,0,2,2,0,1,0,13,0,2
8,0,1,0,2,3,1,0,0,13,0
9,1,1,1,5,2,0,0,0,1,9


In [44]:
print(classification_report(y_test,y_pred_cv_gbc))  

              precision    recall  f1-score   support

           0       0.86      0.60      0.71        20
           1       0.77      0.85      0.81        20
           2       0.57      0.60      0.59        20
           3       0.44      0.60      0.51        20
           4       0.56      0.50      0.53        20
           5       0.67      0.70      0.68        20
           6       0.81      0.85      0.83        20
           7       0.87      0.65      0.74        20
           8       0.65      0.65      0.65        20
           9       0.43      0.45      0.44        20

    accuracy                           0.65       200
   macro avg       0.66      0.65      0.65       200
weighted avg       0.66      0.65      0.65       200



# Tuning of the Artificial Neural Network (MLP) with a GridSearchCV

In [25]:
from sklearn.model_selection import GridSearchCV

from sklearn.neural_network import MLPClassifier
# Creation of the classifier ann  (MLPClassifier)
mlp = MLPClassifier(max_iter = 1000)

parameter_space = {
    'hidden_layer_sizes': [(50,50,50), (50,100,50), (100,)],
    'activation': ['tanh', 'relu'],
    'solver': ['sgd', 'adam'],
    'alpha': [0.0001, 0.05],
    'learning_rate': ['constant','adaptive'],
}

grid_mlp = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=3)
grid_mlp.fit(X_train, y_train)  




GridSearchCV(cv=3, error_score='raise-deprecating',
             estimator=MLPClassifier(activation='relu', alpha=0.0001,
                                     batch_size='auto', beta_1=0.9,
                                     beta_2=0.999, early_stopping=False,
                                     epsilon=1e-08, hidden_layer_sizes=(100,),
                                     learning_rate='constant',
                                     learning_rate_init=0.001, max_iter=1000,
                                     momentum=0.9, n_iter_no_change=10,
                                     nesterovs_momentum=True, power_t=0.5,
                                     random_st...
                                     solver='adam', tol=0.0001,
                                     validation_fraction=0.1, verbose=False,
                                     warm_start=False),
             iid='warn', n_jobs=-1,
             param_grid={'activation': ['tanh', 'relu'],
                         'alph

In [26]:
grid_mlp.best_params_

{'activation': 'tanh',
 'alpha': 0.0001,
 'hidden_layer_sizes': (100,),
 'learning_rate': 'constant',
 'solver': 'adam'}

In [27]:

# Prediction using ann on X_test
y_pred_mlp = grid_mlp.predict(X_test)
pd.crosstab(encoder.inverse_transform(y_test), encoder.inverse_transform(y_pred_mlp), rownames = ['Real class'], colnames=['Predicted class']) 

Predicted class,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
Real class,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
blues,14,0,1,0,0,0,1,0,2,2
classical,0,20,0,0,0,0,0,0,0,0
country,2,0,14,0,0,0,0,1,2,1
disco,0,1,1,11,3,0,0,1,1,2
hiphop,0,0,0,1,14,0,1,0,3,1
jazz,2,2,0,0,0,15,0,0,1,0
metal,2,0,0,0,0,1,17,0,0,0
pop,0,0,0,2,1,1,0,12,4,0
reggae,0,0,1,0,2,0,0,1,14,2
rock,1,0,5,0,0,1,0,1,3,9


In [28]:
print(classification_report(encoder.inverse_transform(y_test), encoder.inverse_transform(y_pred_mlp))  )  

              precision    recall  f1-score   support

       blues       0.67      0.70      0.68        20
   classical       0.87      1.00      0.93        20
     country       0.64      0.70      0.67        20
       disco       0.79      0.55      0.65        20
      hiphop       0.70      0.70      0.70        20
        jazz       0.83      0.75      0.79        20
       metal       0.89      0.85      0.87        20
         pop       0.75      0.60      0.67        20
      reggae       0.47      0.70      0.56        20
        rock       0.53      0.45      0.49        20

    accuracy                           0.70       200
   macro avg       0.71      0.70      0.70       200
weighted avg       0.71      0.70      0.70       200



We can see that this latest model is the best, so we are going to save it for further utilization. Before saving, we train it again on the whole data (X_train + X_test) to benefit from a maximum of data.

In [30]:
# Creation of the clasifier ann  (MLPClassifier)
ann_final = MLPClassifier(max_iter = 1000, activation = 'tanh', alpha = 0.0001, hidden_layer_sizes = (100,),
                   learning_rate = 'constant', solver = 'adam') 
# Fit of ann on (X_train,y_train)
ann_final.fit(data, target)
# Sauvegarde du modèle

from joblib import dump, load

dump(ann_final, 'md.joblib')

['md.joblib']

# Using the saved model on songs to be classified

This is a first trial with a recent gothik rock song. It will be interesting if it is correctly classified.

In [80]:
import librosa

# 1. Get the file path to an included audio example

songname = "DRAB MAJESTY - The Demonstration - 09 Kissing The Ground.wav"


# 2. Load the audio as a waveform `y`
#    Store the sampling rate as `sr`
 
y, sr = librosa.load(songname, mono = True, duration=30)
tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
rmse = librosa.feature.rms(y=y)
spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
zcr = librosa.feature.zero_crossing_rate(y)
mfcc = librosa.feature.mfcc(y=y, sr=sr)
to_append = f'{tempo} {np.mean(chroma_stft)} {np.std(chroma_stft)} {np.median(chroma_stft)} \
                {np.mean(rmse)} {np.std(rmse)} {np.median(rmse)} {np.mean(spec_cent)} {np.std(spec_cent)} \
                {np.median(spec_cent)} {np.mean(spec_bw)} {np.std(spec_bw)} {np.median(spec_bw)} \
                {np.mean(rolloff)} {np.std(rolloff)} {np.median(rolloff)} {np.mean(zcr)} {np.std(zcr)} \
                {np.median(zcr)}'    
for e in mfcc:
            to_append += f' {np.mean(e)}'

X_song = pd.DataFrame(to_append.split())            

In [49]:
header = 'tempo chroma_stft_mean chroma_stft_std chroma_stft_median rmse_mean rmse_std rmse_median \
          spectral_centroid_mean spectral_centroid_std spectral_centroid_median spectral_bandwidth_mean \
          spectral_bandwidth_std spectral_bandwidth_median rolloff_mean rolloff_std rolloff_median zero_crossing_rate_mean \
          zero_crossing_rate_std zero_crossing_rate_median'
for i in range(1, 21):
    header += f' mfcc{i}'
header = header.split()
type(header)

list

In [81]:
X_song_array = np.array([to_append.split()]) 

In [62]:
X_song_array.shape

(1, 39)

In [82]:
X_song = pd.DataFrame(X_song_array, columns = header)

In [66]:
X_song.columns

Index(['tempo', 'chroma_stft_mean', 'chroma_stft_std', 'chroma_stft_median',
       'rmse_mean', 'rmse_std', 'rmse_median', 'spectral_centroid_mean',
       'spectral_centroid_std', 'spectral_centroid_median',
       'spectral_bandwidth_mean', 'spectral_bandwidth_std',
       'spectral_bandwidth_median', 'rolloff_mean', 'rolloff_std',
       'rolloff_median', 'zero_crossing_rate_mean', 'zero_crossing_rate_std',
       'zero_crossing_rate_median', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4',
       'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11',
       'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18',
       'mfcc19', 'mfcc20'],
      dtype='object')

In [83]:
X_song = X_song[['tempo', 'chroma_stft_mean', 'chroma_stft_std', 'rmse_mean', 'rmse_std',
       'spectral_centroid_mean', 'spectral_centroid_std',
       'spectral_bandwidth_std', 'zero_crossing_rate_std',
       'zero_crossing_rate_median', 'mfcc1', 'mfcc2', 'mfcc3', 'mfcc4',
       'mfcc5', 'mfcc6', 'mfcc7', 'mfcc8', 'mfcc9', 'mfcc10', 'mfcc11',
       'mfcc12', 'mfcc13', 'mfcc14', 'mfcc15', 'mfcc16', 'mfcc17', 'mfcc18',
       'mfcc19', 'mfcc20']]

In [84]:
loaded_model = load('md.joblib')
genre_pred = loaded_model.predict(X_song)
pred = encoder.inverse_transform(genre_pred)
pred

array(['rock'], dtype=object)

The song is correctly classified in the "rock category".

# Annex: Test of a Dense Neural Network 

In [59]:
import keras
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])
              
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)
                    
# calculate accuracy
test_loss, test_acc = model.evaluate(X_test,y_test)
print('test_acc: ',test_acc)

# predictions
predictions = model.predict(X_test)
np.argmax(predictions[0])

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
test_acc:  0.6800000071525574


1

In [64]:
pred_classes = predictions.argmax(axis = 1)
print(classification_report(y_test,pred_classes)) 

              precision    recall  f1-score   support

           0       0.70      0.70      0.70        20
           1       0.90      0.90      0.90        20
           2       0.70      0.70      0.70        20
           3       0.50      0.75      0.60        20
           4       0.72      0.65      0.68        20
           5       0.68      0.65      0.67        20
           6       0.77      0.85      0.81        20
           7       0.68      0.85      0.76        20
           8       0.50      0.45      0.47        20
           9       0.75      0.30      0.43        20

    accuracy                           0.68       200
   macro avg       0.69      0.68      0.67       200
weighted avg       0.69      0.68      0.67       200



This model doesn't bring any improvement to our classification.