# Benchmarking our model

In [0]:
import numpy as np
import pandas as pd
import random
from scipy.signal import spectrogram, stft
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import class_weight
from sklearn.metrics import matthews_corrcoef, accuracy_score, f1_score, balanced_accuracy_score, confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.mixture import GaussianMixture

import librosa
import xgboost as xgb

In [0]:
SAMPLE_RATE = 32000
SEED = 42
VAL_SPLIT = 0.1
TEST_SPLIT = 0.2
NFTT = 256
N_TREES = 100

## Load and prepare data

In [0]:
x_niko = np.load("data/raw/x_niko.npy")
x_adrian = np.load("data/raw/x_adrian.npy")
x_toni = np.load("data/raw/x_toni.npy")
x_adrian2 = np.load("data/raw/x_adrian2.npy")
x_adrian3 = np.load("data/raw/x_adrian3.npy")
x_raw = np.concatenate((x_adrian, x_niko,x_toni,x_adrian2,x_adrian3), axis=0)

In [0]:
y_niko = np.load("data/raw/y_niko.npy")
y_adrian = np.load("data/raw/y_adrian.npy")
y_toni = np.load("data/raw/y_toni.npy")
y_adrian2 = np.load("data/raw/y_adrian2.npy")
y_adrian3 = np.load("data/raw/y_adrian3.npy")
y_raw = np.concatenate((y_adrian, y_niko,y_toni,np.squeeze(y_adrian2),y_adrian3))

In [0]:
#fourier transformation
x_four = np.load("x_four.npz")

#log scale 
x_log = 10. * np.log10(x_four+np.finfo(float).eps) # from plt.spectrogram
x_log = x_log[:,:,:-1]

In [0]:
features = np.hstack([np.mean(x_log, axis=2),
                      np.std(x_log, axis=2),
                      np.median(x_log, axis=2),
                      np.min(x_log, axis=2),
                      np.max(x_log, axis=2)])

print(len(features[0]) + "Features created!")

In [0]:
x_train, x_test, y_train, y_test = train_test_split(features, 
                                                    y_raw, 
                                                    test_size=TEST_SPLIT, 
                                                    stratify=y_raw,
                                                    random_state=SEED)

## Define models

In [0]:
xgb_model = xgb.XGBClassifier(objective="binary:logistic", random_state=SEED)
clf = RandomForestClassifier(random_state=SEED)
svc = SVC()
nb = GaussianNB()
model_list = [xgb_model, clf, svc, nb]

In [0]:
xgb_model = xgb.XGBClassifier(n_estimators=N_TREES,
                              objective="binary:logistic", random_state=SEED)
clf = RandomForestClassifier(n_estimators=N_TREES,
                             random_state=SEED)
svc = SVC()
nb = GaussianNB()
model_list = [xgb_model, clf, svc, nb]

In [0]:
res_list = []
for i, model in enumerate(model_list):
    model.fit(x_train, y_train)
    y_pred = model.predict(x_test)
    res = [type(model).__name__,
           balanced_accuracy_score(y_test, y_pred)*100,
           precision_score(y_test, y_pred)*100,
           recall_score(y_test, y_pred)*100,
           f1_score(y_test, y_pred)*100,
           matthews_corrcoef(y_test, y_pred)*100]
    print(res)
    res_list.append(res)

['XGBClassifier', 96.2482024733966, 96.32107023411372, 94.42622950819673, 95.36423841059603, 92.93201738615686]
['RandomForestClassifier', 94.40322116767328, 98.55595667870037, 89.50819672131148, 93.81443298969072, 90.97903859169058]




['SVC', 84.26229508196721, 100.0, 68.52459016393443, 81.32295719844359, 76.5814414606606]
['GaussianNB', 58.34483750359506, 39.12483912483912, 99.672131147541, 56.19223659889094, 25.218060375107747]


## Create metrics

In [0]:
df = pd.DataFrame(res_list, columns=["Model", "Balanced ", "Precision", "Recall", "F1", "MCC"]).round(decimals=2)
df

Unnamed: 0,Model,Balanced,Precision,Recall,F1,MCC
0,XGBClassifier,96.25,96.32,94.43,95.36,92.93
1,RandomForestClassifier,94.4,98.56,89.51,93.81,90.98
2,SVC,84.26,100.0,68.52,81.32,76.58
3,GaussianNB,58.34,39.12,99.67,56.19,25.22


In [0]:
df.round(decimals=2).to_latex(index=False)

'\\begin{tabular}{lrrrrr}\n\\toprule\n                  Model &  Balanced  &  Precision &  Recall &     F1 &    MCC \\\\\n\\midrule\n          XGBClassifier &      96.25 &      96.32 &   94.43 &  95.36 &  92.93 \\\\\n RandomForestClassifier &      94.40 &      98.56 &   89.51 &  93.81 &  90.98 \\\\\n                    SVC &      84.26 &     100.00 &   68.52 &  81.32 &  76.58 \\\\\n             GaussianNB &      58.34 &      39.12 &   99.67 &  56.19 &  25.22 \\\\\n\\bottomrule\n\\end{tabular}\n'