# Model training and Evaluation

In [76]:
import numpy as np
import pandas as pd

import librosa
import librosa.display

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

## Data loading

The preprocessed data from the previous notebook is loaded.

In [79]:
%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r label_encoder

## 1. K-Nearest Neighbors

In the first place, a K-Nearest Neighbor algorithm is tested. In order to find k, GridSearch is used with a series of parameters to evaluate. Using GridSearch we can evaluate all the possible combinations of the hyperparameters values using cross-validation.

In [80]:
#Defining grid parameters for the algorithm to test

grid_params = {
    'n_neighbors': [3, 5, 7, 9, 11, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

In [81]:
model_knn = GridSearchCV(KNeighborsClassifier(), grid_params, scoring='accuracy',cv=5)
model_knn.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [3, 5, 7, 9, 11, 15], 'metric': ['euclidean', 'manhattan'], 'weights': ['uniform', 'distance']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

We can check the accuracy of our model and also the confusion matrix

In [82]:
y_predict_knn = model_knn.predict(x_test)

#Confusion matrix
conf_matrix = confusion_matrix(y_predict_knn, y_test)
print(conf_matrix)

[[15  0  2  1  0  1  1  0  0  0]
 [ 1 12  0  0  0  2  0  0  0  1]
 [ 0  0 14  2  2  1  1  0  2  3]
 [ 0  0  2  9  2  1  0  2  3  2]
 [ 1  0  1  3  3  0  2  1  4  1]
 [ 1  1  3  0  0 13  0  1  2  0]
 [ 1  0  1  1  2  1 20  0  0  1]
 [ 0  0  1  1  0  0  0  6  1  1]
 [ 0  0  2  2  6  1  0  2  8  5]
 [ 1  0  1  2  0  2  1  1  3  7]]


In [83]:
#Best parameters found by GridSearch
model_knn.best_params_

{'metric': 'manhattan', 'n_neighbors': 5, 'weights': 'distance'}

In [84]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_knn.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_knn)))

Best score on validation set (accuracy) = 0.5525
Best score on test set (accuracy) = 0.5350


It can be observed that the first model have an accuracy of 0.53 for the test set. This is quite far from promising, so we will try to improve. Let's try with another models before changing some parameters in the preprocessing notebook.

## 2. Decision Tree

We will use a simple Decision Tree classifier.

In [85]:
# Define parameters for a Grid Search

grid_params_tree = {
    "criterion": ["gini", "entropy"],
    "splitter": ["best", "random"],
}

# Train a decision tree model

model_tree = GridSearchCV(DecisionTreeClassifier(random_state=10),grid_params_tree, scoring='accuracy', cv=5)
model_tree.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=10,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'splitter': ['best', 'random'], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

Now, we are going to predict the labels for our test data

In [86]:
y_predict_tree = model_tree.predict(x_test)

#class_rep_tree = classification_report(y_test, predict_labels_tree)
conf_matrix_tree = confusion_matrix(y_predict_tree, y_test)
print(conf_matrix_tree)

[[11  1  6  1  0  1  1  0  0  4]
 [ 0 11  3  0  0  0  0  0  0  0]
 [ 0  0  8  0  1  4  0  0  4  0]
 [ 1  0  2  5  2  1  1  2  2  3]
 [ 0  0  1  3  2  1  2  0  6  3]
 [ 3  1  2  0  0 10  0  0  2  3]
 [ 2  0  1  2  2  0 18  0  2  1]
 [ 0  0  0  1  2  0  0  8  1  0]
 [ 0  0  0  3  5  2  1  1  5  1]
 [ 3  0  4  6  1  3  2  2  1  6]]


In [87]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_tree.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_tree)))

Best score on validation set (accuracy) = 0.4325
Best score on test set (accuracy) = 0.4200


We can observe that Accuracy has not improved in this case. We have a value of 0.42 for the test set. Let's try some additional models.

## 3. Random Forests

Let's see what can we do with a Random Forest model.

In [88]:
# Determine some parameters for a Grid Search

grid_params_forest = {
    "n_estimators": [100, 250, 500, 1000],
    "criterion": ["gini", "entropy"],
    "max_depth": [5, 7, None]
}

# Defining the model

model_forest = GridSearchCV(RandomForestClassifier(),grid_params_forest, scoring='accuracy', cv=5)

model_forest.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_estimators': [100, 250, 500, 1000], 'criterion': ['gini', 'entropy'], 'max_depth': [5, 7, None]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [89]:
y_predict_forest = model_forest.predict(x_test)
conf_matrix = confusion_matrix(y_test, y_predict_forest)
print(conf_matrix)
#print(classification_report(y_test, y_predict_forest))

[[14  0  0  0  0  2  2  0  0  2]
 [ 0 13  0  0  0  0  0  0  0  0]
 [ 6  1  9  2  0  4  1  2  1  1]
 [ 1  0  0 12  1  0  0  2  1  4]
 [ 0  0  0  1  6  0  1  2  4  1]
 [ 4  0  2  1  0 13  1  0  1  0]
 [ 1  0  0  1  1  0 21  0  0  1]
 [ 0  0  1  1  0  1  0  8  2  0]
 [ 0  1  2  2  2  1  2  2  9  2]
 [ 2  1  2  3  1  0  1  2  1  8]]


In [90]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_forest.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_forest)))

Best score on validation set (accuracy) = 0.5787
Best score on test set (accuracy) = 0.5650


We achieved a better accuracy using this model than the Decision Tree model. Random Forest is a more robust algorithm, so it's expected having better metrics than the previous model. Let's try another one.

## 4. Logistic Regression

Finally, we will compare the performance of our models to a Logistic Regression:

In [91]:
grid_params_log = {
    "penalty": ["l1", "l2"],
    "C": [0.5, 1, 2, 5],
    "max_iter": [500]
}

model_logreg = GridSearchCV(LogisticRegression(random_state=10),grid_params_log, scoring='accuracy', cv=5)
model_logreg.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=10, solver='warn',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'penalty': ['l1', 'l2'], 'C': [0.5, 1, 2, 5], 'max_iter': [500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [92]:
y_predict_logreg = model_logreg.predict(x_test)
#print(classification_report(y_test, y_predict_logreg))

conf_matrix = confusion_matrix(y_test, y_predict_logreg)
print(conf_matrix)

[[12  0  0  0  1  2  2  0  0  3]
 [ 0 13  0  0  0  0  0  0  0  0]
 [10  0  8  1  1  1  0  3  1  2]
 [ 2  0  0  8  4  0  0  2  3  2]
 [ 2  0  1  1  4  0  1  2  4  0]
 [ 4  1  3  2  0  7  0  0  5  0]
 [ 1  0  0  1  0  0 22  0  0  1]
 [ 0  0  1  1  0  0  0  9  2  0]
 [ 4  0  2  4  1  1  0  1  9  1]
 [ 7  0  1  2  1  2  1  2  1  4]]


In [93]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_logreg.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_logreg)))

Best score on validation set (accuracy) = 0.5663
Best score on test set (accuracy) = 0.4800


Similar to the previous models, we have an accuracy of 0.48 for the test set. We can try to make some modifications to the preprocessing (more number of mel coefficients for example, as we are using only 12 and the default is 20) to see if we can achieve better metrics.