# Model training and Evaluation

In [21]:
import numpy as np
import pandas as pd

import librosa
import librosa.display

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn import metrics

## Data loading

The preprocessed data from the previous notebook is loaded.

In [23]:
%store -r x_train 
%store -r x_test 
%store -r y_train 
%store -r y_test 
%store -r yy 
%store -r label_encoder

## 1. K-Nearest Neighbors

In the first place, a K-Nearest Neighbor algorithm is tested. In order to find k, GridSearch is used with a series of parameters to evaluate. Using GridSearch we can evaluate all the possible combinations of the hyperparameters values using cross-validation.

In [24]:
#Defining grid parameters for the algorithm to test

grid_params = {
    'n_neighbors': [3, 5, 7, 9, 11, 15],
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan']
}

In [25]:
model_knn = GridSearchCV(KNeighborsClassifier(), grid_params, scoring='accuracy',cv=5)
model_knn.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=None, n_neighbors=5, p=2,
           weights='uniform'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_neighbors': [3, 5, 7, 9, 11, 15], 'metric': ['euclidean', 'manhattan'], 'weights': ['uniform', 'distance']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

We can check the accuracy of our model and also the confusion matrix.

In [26]:
y_predict_knn = model_knn.predict(x_test)

#Confusion matrix
conf_matrix = confusion_matrix(y_predict_knn, y_test)
print(conf_matrix)

[[15  0  0  0  0  2  0  0  0  1]
 [ 0 12  1  0  0  0  0  0  0  0]
 [ 0  0 20  2  3  2  0  1  1  3]
 [ 0  0  3 12  0  1  0  0  1  1]
 [ 0  0  1  2  8  0  2  3  7  1]
 [ 2  1  1  0  0 12  0  0  1  1]
 [ 1  0  0  0  0  1 21  0  0  0]
 [ 0  0  1  2  3  0  0  9  2  1]
 [ 0  0  0  2  1  1  0  0  9  2]
 [ 2  0  0  1  0  3  2  0  2 11]]


In [27]:
#Best parameters found by GridSearch
model_knn.best_params_

{'metric': 'euclidean', 'n_neighbors': 5, 'weights': 'distance'}

In [28]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_knn.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_knn)))

Best score on validation set (accuracy) = 0.5813
Best score on test set (accuracy) = 0.6450


We have an improved accuracy of 0.64 here, in relation to the previous models. This is a good step on our model, and it's also the highest value achieved. Let's see the performance of the remaining models.

## 2. Decision Tree

We will use a simple Decision Tree classifier.

In [29]:
# Define parameters for a Grid Search

grid_params_tree = {
    "criterion": ["gini", "entropy"],
    "splitter": ["best", "random"],
}

# Train a decision tree model

model_tree = GridSearchCV(DecisionTreeClassifier(random_state=10),grid_params_tree, scoring='accuracy', cv=5)
model_tree.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=None,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=10,
            splitter='best'),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'splitter': ['best', 'random'], 'criterion': ['gini', 'entropy']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

Now, we are going to predict the labels for our test data

In [30]:
y_predict_tree = model_tree.predict(x_test)

#class_rep_tree = classification_report(y_test, predict_labels_tree)
conf_matrix_tree = confusion_matrix(y_predict_tree, y_test)
print(conf_matrix_tree)

[[11  1  2  1  0  1  0  0  1  3]
 [ 0 10  1  0  1  2  0  1  1  0]
 [ 3  0 10  1  2  0  0  0  0  1]
 [ 1  0  1  6  3  2  2  1  2  2]
 [ 0  1  1  3  5  0  0  1  5  2]
 [ 2  1  7  0  0 13  0  0  2  1]
 [ 3  0  1  1  0  1 21  0  0  0]
 [ 0  0  1  1  0  1  0  8  2  2]
 [ 0  0  2  2  2  2  0  1  7  3]
 [ 0  0  1  6  2  0  2  1  3  7]]


In [31]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_tree.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_tree)))

Best score on validation set (accuracy) = 0.4425
Best score on test set (accuracy) = 0.4900


In this case, the MinMaxScaler have null impact on the Decision Tree classifier. 

## 3. Random Forests

Let's see what can we do with a Random Forest model.

In [32]:
# Determine some parameters for a Grid Search

grid_params_forest = {
    "n_estimators": [100, 250, 500, 1000],
    "criterion": ["gini", "entropy"],
    "max_depth": [5, 7, None]
}

# Defining the model

model_forest = GridSearchCV(RandomForestClassifier(),grid_params_forest, scoring='accuracy', cv=5)

model_forest.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators='warn', n_jobs=None,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'n_estimators': [100, 250, 500, 1000], 'criterion': ['gini', 'entropy'], 'max_depth': [5, 7, None]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [33]:
y_predict_forest = model_forest.predict(x_test)
conf_matrix = confusion_matrix(y_test, y_predict_forest)
print(conf_matrix)
#print(classification_report(y_test, y_predict_forest))

[[10  1  0  1  0  2  4  0  0  2]
 [ 0 13  0  0  0  0  0  0  0  0]
 [ 0  0 16  1  1  3  1  2  1  2]
 [ 0  0  0 12  3  0  0  3  2  1]
 [ 0  0  0  2  4  0  1  3  4  1]
 [ 1  0  1  0  0 15  1  0  2  2]
 [ 0  0  0  0  2  0 21  0  0  2]
 [ 0  0  1  1  0  0  0  8  2  1]
 [ 1  1  2  2  5  1  1  2  7  1]
 [ 2  0  3  4  1  0  0  1  2  8]]


In [34]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_forest.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_forest)))

Best score on validation set (accuracy) = 0.6125
Best score on test set (accuracy) = 0.5700


Here, the accuracy of the model on the test set does not improve. The major improvement was seen on the previous version (v1.1).

## 4. Logistic Regression

Finally, we will compare the performance of our models to a Logistic Regression:

In [35]:
grid_params_log = {
    "penalty": ["l1", "l2"],
    "C": [0.5, 1, 2, 5],
    "max_iter": [500]
}

model_logreg = GridSearchCV(LogisticRegression(random_state=10),grid_params_log, scoring='accuracy', cv=5)
model_logreg.fit(x_train, y_train)



GridSearchCV(cv=5, error_score='raise-deprecating',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='warn',
          n_jobs=None, penalty='l2', random_state=10, solver='warn',
          tol=0.0001, verbose=0, warm_start=False),
       fit_params=None, iid='warn', n_jobs=None,
       param_grid={'penalty': ['l1', 'l2'], 'C': [0.5, 1, 2, 5], 'max_iter': [500]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score='warn',
       scoring='accuracy', verbose=0)

In [36]:
y_predict_logreg = model_logreg.predict(x_test)
#print(classification_report(y_test, y_predict_logreg))

conf_matrix = confusion_matrix(y_test, y_predict_logreg)
print(conf_matrix)

[[ 8  0  1  0  0  1  6  0  0  4]
 [ 0 13  0  0  0  0  0  0  0  0]
 [ 9  0 11  1  1  2  0  1  0  2]
 [ 3  0  0 10  2  0  0  1  4  1]
 [ 0  0  2  1  4  1  1  2  3  1]
 [ 2  2  2  1  1 14  0  0  0  0]
 [ 1  0  0  0  1  0 22  0  0  1]
 [ 0  0  0  1  1  0  0 10  0  1]
 [ 2  0  2  2  4  1  1  1  8  2]
 [ 5  0  2  4  2  1  1  2  1  3]]


In [37]:
print("Best score on validation set (accuracy) = {:.4f}".format(model_logreg.best_score_))
print("Best score on test set (accuracy) = {:.4f}".format(accuracy_score(y_test, y_predict_logreg)))

Best score on validation set (accuracy) = 0.5887
Best score on test set (accuracy) = 0.5150


Here we don't see an improvement in relation to the previous version.

Overall, using 20 Mel-Coefficients have a impact on the accuracy scored by some of the models. MinMaxScaler have an influence on some of the models. This scaling is intended for a Neural Network input, so let's try to analyse some models.

## 5. Neural Network