In [23]:
#import all library
import os
import re
import numpy as np
import pandas as pd
import librosa
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import classification_report
from sklearn.metrics import log_loss
from sklearn.metrics import accuracy_score
from sklearn import metrics
import scipy.io.wavfile
import spafe.utils.vis as vis
from spafe.features.gfcc import gfcc
import warnings
warnings.filterwarnings('ignore')

In [16]:
train_csv=pd.read_csv("../../dataset/train.csv") #read the train csv file
test_csv=pd.read_csv("../../dataset/test.csv") #read the test csv file

## USER has to initialize these variables when changing feature extraction methods

In [33]:
# array initialization
x_train=[]
x_test=[]
y_train=[]
y_test=[]

## USER INPUT: You can change sampling rate here. Please input 8000 or 16000.

In [18]:
sr = 16000 #set sampling rate OPTION : 8000 / 16000

# OPTION: USER has to run manually from here to change feature extraction methods
### 4 OPTIONS : 1) MFCC  2) MEL SPECTROGRAM  3) LOG-MEL SPECTROGRAM  4)GTCC
## Run the corresponding cell.

## OPTION 1) Run this cell for MFCC

In [32]:
for idx, file in enumerate(train_csv['file_name']):
    audio, sr = librosa.load(f'../../dataset/audio/{file}', sr=sr) #load each file
    audio = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40) #feature extraction method - MFCC
    x_train.append(audio.flatten()) #flatten
    y_train.append(train_csv['label'].iloc[idx]) #The label values in csv are extracted by index and added to the list.
    
for idx, file in enumerate(test_csv['file_name']):
    audio, sr = librosa.load(f'../../dataset/audio/{file}', sr=sr) #load each file
    audio = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=40) #feature extraction method - MFCC
    x_test.append(audio.flatten()) #flatten
    y_test.append(test_csv['label'].iloc[idx]) #The label values in csv are extracted by index and added to the list.

KeyboardInterrupt: 

## OPTION 2) Run this cell for Mel Spectrogram

In [None]:
for idx, file in enumerate(train_csv['file_name']):
    audio, sr = librosa.load(f'../../dataset/audio/{file}', sr=sr) #load each file
    audio = librosa.feature.melspectrogram(audio, sr) #feature extraction method - mel spectrogram
    x_train.append(audio.flatten()) #flatten
    y_train.append(train_csv['label'].iloc[idx]) #The label values in csv are extracted by index and added to the list.
    
for idx, file in enumerate(test_csv['file_name']):
    audio, sr = librosa.load(f'../../dataset/audio/{file}', sr=sr) #load each file
    audio = librosa.feature.melspectrogram(audio, sr) #feature extraction method - mel spectrogram
    x_test.append(audio.flatten()) #flatten
    y_test.append(test_csv['label'].iloc[idx]) #The label values in csv are extracted by index and added to the list.

## OPTION 3) Run this cell for Log-Mel Spectrogram

In [11]:
for idx, file in enumerate(train_csv['file_name']):
    audio, sr = librosa.load(f'../../dataset/audio/{file}', sr=sr)  #load each file
    audio = librosa.feature.melspectrogram(audio, sr) 
    audio = librosa.power_to_db(audio) #feature extraction method - log-mel spectrogram
    x_train.append(audio.flatten()) #flatten
    y_train.append(train_csv['label'].iloc[idx]) #The label values in csv are extracted by index and added to the list
    
for idx, file in enumerate(test_csv['file_name']):
    audio, sr = librosa.load(f'../../dataset/audio/{file}', sr=sr) #load each file
    audio = librosa.feature.melspectrogram(audio, sr)
    audio = librosa.power_to_db(audio) #feature extraction method - log-mel spectrogram
    x_test.append(audio.flatten()) #flatten
    y_test.append(test_csv['label'].iloc[idx]) #The label values in csv are extracted by index and added to the list.

## OPTION 4) Run this cell for GTCC

In [12]:
for idx, file in enumerate(train_csv['file_name']):
    fs,sig = scipy.io.wavfile.read(f'../../dataset/audio/{file}')  #load each file
    gfccs  = gfcc(sig,num_ceps=200, nfilts =200,fs=sr) #feature extraction method - GTCC
    audio = np.array(gfccs)
    audio = audio.reshape(audio.shape[0], audio.shape[1], 1) #reshape
    x_train.append(audio.flatten()) #flatten
    y_train.append(train_csv['label'].iloc[idx])#The label values in csv are extracted by index and added to the list.

for idx, file in enumerate(test_csv['file_name']):
    fs,sig = scipy.io.wavfile.read(f'../../dataset/audio/{file}') #load each file
    gfccs  = gfcc(sig,num_ceps=200, nfilts =200,fs=sr) #feature extraction method - GTCC
    audio = np.array(gfccs)
    audio = audio.reshape(audio.shape[0], audio.shape[1], 1) #reshape
    x_test.append(audio.flatten()) #flatten
    y_test.append(test_csv['label'].iloc[idx])#The label values in csv are extracted by index and added to the list.

Feature exatraction OPTION END

In [20]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

x_train=pd.DataFrame(x_train)
y_train=pd.DataFrame(y_train)

# OPTION: USER has to run manually from here to change models
### 4 OPTIONS : 1) SVM 2) MLP 3) KNN 4)GNB
## Run the corresponding cell.

## OPTION 1) SVM

In [21]:
#Create a SVM Classifier
model = svm.SVC(kernel = 'rbf', C = 1,verbose=True, probability=True)
#Train the model using the training sets
model = model.fit(x_train,y_train)

[LibSVM]

## OPTION 2) MLP

In [25]:
#Create a MLP Classifier
model = MLPClassifier(hidden_layer_sizes=(10,),activation='logistic',
                    solver='sgd', alpha=0.01, batch_size=10,
                    learning_rate_init=0.001, max_iter=70,
                    early_stopping=True, verbose= True)
#Train the model using the training sets
model = model.fit(x_train, y_train)

Iteration 1, loss = 0.52808969
Validation score: 0.834783
Iteration 2, loss = 0.41441382
Validation score: 0.865217
Iteration 3, loss = 0.35805459
Validation score: 0.878261
Iteration 4, loss = 0.32066045
Validation score: 0.865217
Iteration 5, loss = 0.29370508
Validation score: 0.869565
Iteration 6, loss = 0.27120280
Validation score: 0.856522
Iteration 7, loss = 0.25205152
Validation score: 0.865217
Iteration 8, loss = 0.23472639
Validation score: 0.856522
Iteration 9, loss = 0.21944806
Validation score: 0.865217
Iteration 10, loss = 0.20571244
Validation score: 0.856522
Iteration 11, loss = 0.19320501
Validation score: 0.852174
Iteration 12, loss = 0.18146357
Validation score: 0.852174
Iteration 13, loss = 0.17169639
Validation score: 0.847826
Iteration 14, loss = 0.16240627
Validation score: 0.852174
Validation score did not improve more than tol=0.000100 for 10 consecutive epochs. Stopping.


## OPTION 3) KNN

In [27]:
#Create a KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors = 5)
#Train the model using the training sets
model = model.fit(x_train,y_train)

## OPTION 4) GNB

In [29]:
#Create a Gaussian Classifier
model = GaussianNB()
#Train the model using the training sets
model=model.fit(x_train, y_train)

# Evalution Metrics

In [30]:
y_pred=model.predict(x_test) # model predict
y_prob = model.predict_proba(x_test) # calculate probability
fpr, tpr, thresholds = metrics.roc_curve(y_test, y_prob[:,1])
AUC = metrics.auc(fpr, tpr) # calculate AUC
####
print('accuracy : ', accuracy_score(y_test,y_pred)) # print accuracy
print('AUC : ', AUC) # print AUC
print(classification_report(y_test, y_pred)) # print f1-score

acc :  0.8015665796344648
AUC :  0.8453714511956204
CEL :  6.15587084097439
              precision    recall  f1-score   support

           0       0.81      0.77      0.79       369
           1       0.80      0.83      0.81       397

    accuracy                           0.80       766
   macro avg       0.80      0.80      0.80       766
weighted avg       0.80      0.80      0.80       766

