In [1]:
# Required modules

import os
import wave
import glob
import tqdm
import numpy as np
import pandas as pd
import seaborn as sns
import scipy.signal as sps

from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import GridSearchCV

In [2]:
# Load the 30 sec data features

features30 = pd.read_csv("./Data/features_30_sec.csv")
features30.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.wav,661794,0.350088,0.088757,0.130228,0.002827,1784.16585,129774.064525,2002.44906,85882.761315,...,52.42091,-1.690215,36.524071,-0.408979,41.597103,-2.303523,55.062923,1.221291,46.936035,blues
1,blues.00001.wav,661794,0.340914,0.09498,0.095948,0.002373,1530.176679,375850.073649,2039.036516,213843.755497,...,55.356403,-0.731125,60.314529,0.295073,48.120598,-0.283518,51.10619,0.531217,45.786282,blues
2,blues.00002.wav,661794,0.363637,0.085275,0.17557,0.002746,1552.811865,156467.643368,1747.702312,76254.192257,...,40.598766,-7.729093,47.639427,-1.816407,52.382141,-3.43972,46.63966,-2.231258,30.573025,blues
3,blues.00003.wav,661794,0.404785,0.093999,0.141093,0.006346,1070.106615,184355.942417,1596.412872,166441.494769,...,44.427753,-3.319597,50.206673,0.636965,37.31913,-0.619121,37.259739,-3.407448,31.949339,blues
4,blues.00004.wav,661794,0.308526,0.087841,0.091529,0.002303,1835.004266,343399.939274,1748.172116,88445.209036,...,86.099236,-5.454034,75.269707,-0.916874,53.613918,-4.404827,62.910812,-11.703234,55.19516,blues


In [3]:
# Load the 3 sec data features

features3 = pd.read_csv("./Data/features_3_sec.csv")
features3.head()

Unnamed: 0,filename,length,chroma_stft_mean,chroma_stft_var,rms_mean,rms_var,spectral_centroid_mean,spectral_centroid_var,spectral_bandwidth_mean,spectral_bandwidth_var,...,mfcc16_var,mfcc17_mean,mfcc17_var,mfcc18_mean,mfcc18_var,mfcc19_mean,mfcc19_var,mfcc20_mean,mfcc20_var,label
0,blues.00000.0.wav,66149,0.335406,0.091048,0.130405,0.003521,1773.065032,167541.630869,1972.744388,117335.771563,...,39.687145,-3.24128,36.488243,0.722209,38.099152,-5.050335,33.618073,-0.243027,43.771767,blues
1,blues.00000.1.wav,66149,0.343065,0.086147,0.112699,0.00145,1816.693777,90525.690866,2010.051501,65671.875673,...,64.748276,-6.055294,40.677654,0.159015,51.264091,-2.837699,97.03083,5.784063,59.943081,blues
2,blues.00000.2.wav,66149,0.346815,0.092243,0.132003,0.00462,1788.539719,111407.437613,2084.565132,75124.921716,...,67.336563,-1.76861,28.348579,2.378768,45.717648,-1.938424,53.050835,2.517375,33.105122,blues
3,blues.00000.3.wav,66149,0.363639,0.086856,0.132565,0.002448,1655.289045,111952.284517,1960.039988,82913.639269,...,47.739452,-3.841155,28.337118,1.218588,34.770935,-3.580352,50.836224,3.630866,32.023678,blues
4,blues.00000.4.wav,66149,0.335579,0.088129,0.143289,0.001701,1630.656199,79667.267654,1948.503884,60204.020268,...,30.336359,0.664582,45.880913,1.689446,51.363583,-3.392489,26.738789,0.536961,29.146694,blues


In [4]:
# Seperating features and labels

X = features3.iloc[:, 2:-1]
y = features3["label"]

In [5]:
# Split the data into features and target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=88)

In [6]:
# Encode the target

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)

In [7]:
# Standardize the data

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)

In [8]:
# Train the model

model = RandomForestClassifier()
model.fit(X_train, y_train)

In [9]:
# Test the model

X_test = scaler.transform(X_test)
y_test = label_encoder.transform(y_test)

y_pred = model.predict(X_test)

In [10]:
# Evaluate the model

accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")
cr = classification_report(y_test, y_pred)
print(cr)

Accuracy: 0.8743743743743744
              precision    recall  f1-score   support

           0       0.86      0.90      0.88       195
           1       0.91      0.98      0.94       208
           2       0.76      0.89      0.82       187
           3       0.86      0.86      0.86       204
           4       0.94      0.82      0.88       205
           5       0.90      0.94      0.92       191
           6       0.89      0.95      0.92       194
           7       0.91      0.90      0.90       188
           8       0.82      0.87      0.84       200
           9       0.92      0.67      0.78       226

    accuracy                           0.87      1998
   macro avg       0.88      0.88      0.87      1998
weighted avg       0.88      0.87      0.87      1998



In [12]:
# Optimize the model using GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 150, 200, 250],
    'max_depth': [5, 10, 15, None],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, n_jobs=4, verbose=2)
grid_search.fit(X_train, y_train)

print(grid_search.best_params_)
print(grid_search.best_score_)

Fitting 5 folds for each of 180 candidates, totalling 900 fits
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=2, n_estimators=50; total time=   1.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=2, n_estimators=50; total time=   1.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   3.0s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=2, n_estimators=100; total time=   3.0s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=2, n_estimators=150; total time=   4.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=50; total time=   1.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   3.0s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=150; total time=   4.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=150; total time=   4.5s
[CV] END max_depth=15, min_samp

[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   3.0s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=100; total time=   3.1s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=5, n_estimators=150; total time=   4.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   1.5s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   2.9s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=150; total time=   4.4s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   2.8s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total tim

[CV] END max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=150; total time=   2.3s
[CV] END max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=200; total time=   3.1s
[CV] END max_depth=5, min_samples_leaf=4, min_samples_split=10, n_estimators=250; total time=   3.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=50; total time=   1.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=100; total time=   2.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=150; total time=   3.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=200; total time=   5.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=2, n_estimators=250; total time=

[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=50; total time=   1.5s[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=200; total time=   5.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=250; total time=   6.5s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=100; total time=   2.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=100; total time=   2.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=150; total time=   3.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=200; total time=   5.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=250; total time=   6.5s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=50; total time=   1.3s
[CV] END max_depth=10, min_samples_leaf=2, min_samples_split=2, n_estimators=50; total t

[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=100; total time=   2.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=150; total time=   3.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=200; total time=   5.2s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=5, n_estimators=250; total time=   6.5s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=50; total time=   1.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=50; total time=   1.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=50; total time=   1.3s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=100; total time=   2.6s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=150; total time=   3.9s
[CV] END max_depth=10, min_samples_leaf=1, min_samples_split=10, n_estimators=150; total 

[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   2.9s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=100; total time=   2.9s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=150; total time=   4.4s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=200; total time=   5.8s
[CV] END max_depth=15, min_samples_leaf=2, min_samples_split=10, n_estimators=250; total time=   7.3s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=50; total time=   1.4s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total time=   2.8s
[CV] END max_depth=15, min_samples_leaf=4, min_samples_split=2, n_estimators=100; total t