# 1- Importing the necessary libraries

In [None]:
import IPython.display as ipd
import os
import pandas as pd
import librosa
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score

In [None]:
import soundfile

# 2- Loading data

In [None]:
# Loading the extracted features data
features_label = np.load('features_label.npy', allow_pickle=True)

In [None]:
# We create an empty list where we will concatenate all the features into one long feature
features = []
for i in range(0, len(features_label)):
    features.append(np.concatenate((features_label[i][0], features_label[i][1], 
                features_label[i][2], features_label[i][3],
                features_label[i][4]), axis=0))

In [None]:
# Similarly, we create a list where we will store all the labels
labels = []
for i in range(0, len(features_label)):
    labels.append(features_label[i][5])

In [None]:
# Splitting the data to target and features
X = np.array(features)
y = np.array(labels)

# 3- Preprocessing

In [None]:
# Scaling the data using StandardScaler
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
X.shape

(13125, 193)

In [None]:
# Encoding the data
lb = LabelEncoder()
y = lb.fit_transform(y)

In [None]:
y.shape

(13125,)

## Methods for predicting on new data

In [None]:
def extract_features(files):
    # Loads the audio file as a floating point time series and assigns the default sample rate
    # Sample rate is set to 22050 by default
    X, sample_rate = librosa.load(files, res_type='kaiser_fast') 

    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series 
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)

    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))

    # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)

    # Computes a mel-scaled spectrogram.
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)

    # Computes spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)

    # Computes the tonal centroid features (tonnetz)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    

    return mfccs, chroma, mel, contrast, tonnetz

def predict(filename,model):
  feat = extract_features(filename)
  features = np.concatenate((feat[0], feat[1], feat[2], feat[3],feat[4]),axis=0)
  c = scaler.transform([features])
  return model.predict(c)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

print("Train set -> " , len(X_train))
print("Test set -> " , len(X_test))

Train set ->  10500
Test set ->  2625


# 4- Training

## Building SVC model

In [None]:
from sklearn.svm import SVC

mod = SVC()

mod.fit(X_train,y_train)

In [None]:
# Accuracy
print("Accuracy Train ->", accuracy_score(y_train,mod.predict(X_train)))
print("Accuracy Test ->", accuracy_score(y_test,mod.predict(X_test)))
# F1-score
print("F1-score Train ->", f1_score(y_train,mod.predict(X_train)))
print("F1-score Test ->", f1_score(y_test,mod.predict(X_test)))
# Confusion Matrix
print("Confusion Matrix Train ->\n", confusion_matrix(y_train,mod.predict(X_train)))
print("Confusion Matrix Test ->\n", confusion_matrix(y_test,mod.predict(X_test)))

##Building LogisticRegression model

In [None]:
from sklearn.linear_model import LogisticRegression
mod_LogisticRegression = LogisticRegression(max_iter=400)

mod_LogisticRegression.fit(X_train,y_train)

In [None]:
# Accuracy
print("Accuracy Train ->", accuracy_score(y_train,mod_LogisticRegression.predict(X_train)))
print("Accuracy Test ->", accuracy_score(y_test,mod_LogisticRegression.predict(X_test)))
# F1-score
print("F1-score Train ->", f1_score(y_train,mod_LogisticRegression.predict(X_train)))
print("F1-score Test ->", f1_score(y_test,mod_LogisticRegression.predict(X_test)))
# Confusion Matrix
print("Confusion Matrix Train ->\n", confusion_matrix(y_train,mod_LogisticRegression.predict(X_train)))
print("Confusion Matrix Test ->\n", confusion_matrix(y_test,mod_LogisticRegression.predict(X_test)))

##Building AdaBoostClassifier model

In [None]:
from sklearn.ensemble import AdaBoostClassifier

mod_AdaBoostClassifier = AdaBoostClassifier()

mod_AdaBoostClassifier.fit(X_train,y_train)

In [None]:
print("Accuracy Train ->", accuracy_score(y_train,mod_AdaBoostClassifier.predict(X_train)))
print("Accuracy Test ->", accuracy_score(y_test,mod_AdaBoostClassifier.predict(X_test)))
# F1-score
print("F1-score Train ->", f1_score(y_train,mod_AdaBoostClassifier.predict(X_train)))
print("F1-score Test ->", f1_score(y_test,mod_AdaBoostClassifier.predict(X_test)))
# Confusion Matrix
print("Confusion Matrix Train ->\n", confusion_matrix(y_train,mod_AdaBoostClassifier.predict(X_train)))
print("Confusion Matrix Test ->\n", confusion_matrix(y_test,mod_AdaBoostClassifier.predict(X_test)))

## Building BaggingClassifier model

In [None]:
from sklearn.ensemble import BaggingClassifier

mod_BaggingClassifier = BaggingClassifier()

mod_BaggingClassifier.fit(X_train,y_train)

In [None]:
print("Accuracy Train ->", accuracy_score(y_train,mod_BaggingClassifier.predict(X_train)))
print("Accuracy Test ->", accuracy_score(y_test,mod_BaggingClassifier.predict(X_test)))
# F1-score
print("F1-score Train ->", f1_score(y_train,mod_BaggingClassifier.predict(X_train)))
print("F1-score Test ->", f1_score(y_test,mod_BaggingClassifier.predict(X_test)))
# Confusion Matrix
print("Confusion Matrix Train ->\n", confusion_matrix(y_train,mod_BaggingClassifier.predict(X_train)))
print("Confusion Matrix Test ->\n", confusion_matrix(y_test,mod_BaggingClassifier.predict(X_test)))

## Building GradientBoostingClassifier model

In [None]:
from sklearn.ensemble import GradientBoostingClassifier

mod_GradientBoostingClassifier = GradientBoostingClassifier()

mod_GradientBoostingClassifier.fit(X_train,y_train)

In [None]:
print("Accuracy Train ->", accuracy_score(y_train,mod_GradientBoostingClassifier.predict(X_train)))
print("Accuracy Test ->", accuracy_score(y_test,mod_GradientBoostingClassifier.predict(X_test)))
# F1-score
print("F1-score Train ->", f1_score(y_train,mod_GradientBoostingClassifier.predict(X_train)))
print("F1-score Test ->", f1_score(y_test,mod_GradientBoostingClassifier.predict(X_test)))
# Confusion Matrix
print("Confusion Matrix Train ->\n", confusion_matrix(y_train,mod_GradientBoostingClassifier.predict(X_train)))
print("Confusion Matrix Test ->\n", confusion_matrix(y_test,mod_GradientBoostingClassifier.predict(X_test)))

## Building RandomForestClassifier model

In [None]:
from sklearn.ensemble import RandomForestClassifier

mod_RandomForestClassifier = RandomForestClassifier()

mod_RandomForestClassifier.fit(X_train,y_train)

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='gini', max_depth=None, max_features='auto',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=1, min_samples_split=2,
                       min_weight_fraction_leaf=0.0, n_estimators=100,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [None]:
print("Accuracy Train ->", accuracy_score(y_train,mod_RandomForestClassifier.predict(X_train)))
print("Accuracy Test ->", accuracy_score(y_test,mod_RandomForestClassifier.predict(X_test)))
# F1-score
print("F1-score Train ->", f1_score(y_train,mod_RandomForestClassifier.predict(X_train)))
print("F1-score Test ->", f1_score(y_test,mod_RandomForestClassifier.predict(X_test)))
# Confusion Matrix
print("Confusion Matrix Train ->\n", confusion_matrix(y_train,mod_RandomForestClassifier.predict(X_train)))
print("Confusion Matrix Test ->\n", confusion_matrix(y_test,mod_RandomForestClassifier.predict(X_test)))

Accuracy Train -> 1.0
Accuracy Test -> 0.9900952380952381
F1-score Train -> 1.0
F1-score Test -> 0.9897314375987362
Confusion Matrix Train ->
 [[5215    0]
 [   0 5285]]
Confusion Matrix Test ->
 [[1346    9]
 [  17 1253]]


# 5- Importing pickle and pick the perfect model

In [None]:
import pickle

pickle.dump(mod_RandomForestClassifier, open('randomforest_model.pkl','wb'))