In [1]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import os
import pickle
import math

from classification.datasets import Dataset_augmented
from classification.utils.audio_student import AudioUtil, Feature_vector_DS
from classification.utils.plots import (
    plot_decision_boundaries,
    plot_specgram,
    show_confusion_matrix,
)
from classification.utils.utils import accuracy

In [2]:
# Hyperparameters other than those of the model
Nft = 512 # number of samples in the Fourier transform
nmel = 20 # number of mel bands
pca = int(math.ceil(11025/Nft) * nmel / 2) # number of components in PCA
# threshold = 0.05 # threshold under which we discard the input

In [3]:
# Loading the dataset and splitting it into training and test sets

# Load the dataset
dataset = Dataset_augmented()
classnames = dataset.list_classes() #['chainsaw', 'fire', 'fireworks', 'gun']

# Splitting the dataset by hand, we will use a 70/30 split
# we have 280 audio files for each class except for gun which has 272
# so we will generate 196 random indexes for each class (except gun) and use them as the training set

random_indexes = [[], [], [], []]
random_indexes[0] = np.random.choice(280, 196, replace=False)
random_indexes[1] = np.random.choice(280, 196, replace=False)
random_indexes[2] = np.random.choice(280, 196, replace=False)
random_indexes[3] = np.random.choice(272, 190, replace=False)

# we will use the remaining indexes as the test set
test_indexes = [[], [], [], []]
test_indexes[0] = np.setdiff1d(np.arange(280), random_indexes[0])
test_indexes[1] = np.setdiff1d(np.arange(280), random_indexes[1])
test_indexes[2] = np.setdiff1d(np.arange(280), random_indexes[2])
test_indexes[3] = np.setdiff1d(np.arange(272), random_indexes[3])

In [4]:
# Computing the mel spectrogram of each audio file and saving in a folder
folder_path = "src/classification/datasets/melspectrograms/"
number_audio_files = [280, 280, 280, 272]
n_win_files = np.zeros(280*3 + 272)

for class_index in range (len(classnames)):
    for audio_index in range(number_audio_files[class_index]):
        current_sound = dataset[classnames[class_index], audio_index]
        current_audio = AudioUtil.open(current_sound)
        current_audio = AudioUtil.resample(current_audio, 11025)
        
        # we will split the audio into 1 second window, and compute the mel spectrogram of each clip
        n_win = (len(current_audio[0]) // 11025) + 1
        n_win_files[class_index * 280 + audio_index] = n_win
        for window in range(n_win):
            sub_aud = (current_audio[0][window * 11025 :], current_audio[1])
            sub_aud = AudioUtil.pad_trunc(sub_aud, 950)
            sgram = AudioUtil.melspectrogram(sub_aud, Nmel=nmel, Nft=Nft)
            ncol = int(11025 / Nft)
            sgram = sgram[:, :ncol]
            fv = sgram.reshape(-1)
            # saving the mel spectrogram in .npy format
            np.save(folder_path + classnames[class_index] + str(audio_index) + "_" + str(window) + ".npy", fv)

fv_len = len(fv)

In [5]:
# Creating the training set and computing the PCA
total_number_window_training = np.sum(n_win_files[random_indexes[0]]) + np.sum(n_win_files[random_indexes[1]]) + np.sum(n_win_files[random_indexes[2]]) + np.sum(n_win_files[random_indexes[3]])
X_train = np.zeros((int(total_number_window_training), int(fv_len)))
y_train = np.zeros(int(total_number_window_training))

# we will use the indexes to load the mel spectrograms and compute the PCA
index = 0
for class_index in range (len(classnames)):
    for audio_index in random_indexes[class_index]:
        for window in range(int(n_win_files[class_index * 280 + audio_index])):
            X_train[index, :] = np.load(folder_path + classnames[class_index] + str(audio_index) + "_" + str(window) + ".npy")
            y_train[index] = class_index
            index += 1
            
# we will compute the PCA
from sklearn.decomposition import PCA
pca = PCA(n_components=pca)
X_train = pca.fit_transform(X_train)
X_train = X_train / np.linalg.norm(X_train, axis=0)

In [6]:
# Model training
# we will test three models, CNN, SVM and Random Forest
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
# TO DO : implement the CNN model

# 5-fold cross validation
from sklearn.model_selection import KFold
kf = KFold(n_splits=5)

# Random Forest
n_estimators = [10, 50, 100, 200]
max_depth = [5, 10, 20, 50, 100]
min_samples_split = [2, 5, 10, 15]

results_RF = pd.DataFrame(columns=["model", "n_estimators", "max_depth", "min_samples_split", "accuracy"])

for n in n_estimators:
    for d in max_depth:
        for s in min_samples_split:
            clf = RandomForestClassifier(n_estimators=n, max_depth=d, min_samples_split=s)
            acc = 0
            for train_index, test_index in kf.split(X_train):
                X_train_kf, X_test_kf = X_train[train_index], X_train[test_index]
                y_train_kf, y_test_kf = y_train[train_index], y_train[test_index]
                clf.fit(X_train_kf, y_train_kf)
                acc += accuracy(y_test_kf, clf.predict(X_test_kf))
            acc /= 5
            results_RF = results_RF._append({"model": "Random Forest", "n_estimators": n, "max_depth": d, "min_samples_split": s, "accuracy": acc}, ignore_index=True)
        print(results_RF.loc[results_RF["accuracy"].idxmax()])
        
# save the results in a .csv file
results_RF.to_csv("results_RF.csv")


# SVM
kernel = ["rbf"]
C = [0.001, 0.01, 0.1, 1000]
gamma = ["scale", "auto", 0.1, 0.5, 1, 5, 10]

# the results will be saved in a pandas dataframe
results_SVM = pd.DataFrame(columns=["model", "kernel", "C", "gamma", "accuracy"])

for k in kernel:
    for c in C:
        for g in gamma:
            clf = SVC(kernel=k, C=c, gamma=g)
            acc = 0
            for train_index, test_index in kf.split(X_train):
                X_train_kf, X_test_kf = X_train[train_index], X_train[test_index]
                y_train_kf, y_test_kf = y_train[train_index], y_train[test_index]
                clf.fit(X_train_kf, y_train_kf)
                acc += accuracy(y_test_kf, clf.predict(X_test_kf))
            acc /= 5
            results_SVM = results_SVM._append({"model": "SVM", "kernel": k, "C": c, "gamma": g, "accuracy": acc}, ignore_index=True)
        print(results_SVM.loc[results_SVM["accuracy"].idxmax()])

# save the results in a .csv file
results_SVM.to_csv("results_SVM.csv")

  results_RF = results_RF._append({"model": "Random Forest", "n_estimators": n, "max_depth": d, "min_samples_split": s, "accuracy": acc}, ignore_index=True)


model                Random Forest
n_estimators                    10
max_depth                        5
min_samples_split                2
accuracy                  0.506209
Name: 0, dtype: object
model                Random Forest
n_estimators                    10
max_depth                       10
min_samples_split                5
accuracy                  0.632123
Name: 5, dtype: object
model                Random Forest
n_estimators                    10
max_depth                       10
min_samples_split                5
accuracy                  0.632123
Name: 5, dtype: object
model                Random Forest
n_estimators                    10
max_depth                       10
min_samples_split                5
accuracy                  0.632123
Name: 5, dtype: object
model                Random Forest
n_estimators                    10
max_depth                       10
min_samples_split                5
accuracy                  0.632123
Name: 5, dtype: object
model     

  results_SVM = results_SVM._append({"model": "SVM", "kernel": k, "C": c, "gamma": g, "accuracy": acc}, ignore_index=True)


model            SVM
kernel           rbf
C              0.001
gamma          scale
accuracy    0.477298
Name: 0, dtype: object
model            SVM
kernel           rbf
C              0.001
gamma          scale
accuracy    0.477298
Name: 0, dtype: object
model            SVM
kernel           rbf
C              0.001
gamma          scale
accuracy    0.477298
Name: 0, dtype: object
model            SVM
kernel           rbf
C             1000.0
gamma             10
accuracy    0.619546
Name: 27, dtype: object
