In [73]:
import matplotlib.pyplot as plt
import numpy as np
import librosa as rs
from tqdm import tqdm
import pandas as pd
import librosa.display
import IPython.display as ipd

from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import train_test_split , KFold ,  cross_val_score
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.preprocessing import StandardScaler




In [56]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('val.csv')

train_df = train_df.drop(train_df.index[4101]) 

In [85]:
features = []
labels = []

def parser_mfcc(df , path):
    for i in tqdm(range(df.shape[0])):
        try:
            file_name = f'{path}/{df["Name"][i]}'
            data, sr = librosa.load(file_name) 
            n_fft = min(2048, len(data))
            mfcc_mean = np.mean(librosa.feature.mfcc(y=data, sr=sr, n_fft=n_fft), axis=1)  
            mfcc_std = np.std(librosa.feature.mfcc(y=data, sr=sr, n_fft=n_fft), axis=1)
            mfcc_max = np.max(librosa.feature.mfcc(y=data, sr=sr, n_fft=n_fft), axis=1)
            mfcc_min = np.min(librosa.feature.mfcc(y=data, sr=sr, n_fft=n_fft), axis=1)    
            zcr = np.mean(librosa.feature.zero_crossing_rate(data))
            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=data, sr=sr))  
            features.append(np.concatenate([mfcc_mean, mfcc_std, mfcc_max, mfcc_min, [zcr], [spectral_centroid] ]))
            labels.append(df["ClassId"][i])
        except Exception as e:
            print(f"Error loading {file_name}: {e}")
            continue

    return features, labels


In [86]:
x_train, y_train = parser_mfcc(train_df , 'train')
x_test , y_test = parser_mfcc(test_df , 'val')

  data, sr = librosa.load(file_name)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)
 84%|████████▍ | 4104/4860 [03:00<00:26, 28.09it/s]

Error loading train/Laughter/Laughter_67.wav: 4101


100%|██████████| 4860/4860 [03:38<00:00, 22.23it/s]
100%|██████████| 1209/1209 [00:53<00:00, 22.65it/s]


In [87]:
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)


In [91]:
kfold = KFold(n_splits=5, shuffle=True, random_state=42)
x_total = np.concatenate((x_train, x_test), axis=0)
y_total = np.concatenate((y_train, y_test), axis=0)
model = GaussianNB()
cv_results = cross_val_score(model, x_total, y_total, cv=kfold, scoring='accuracy')
np.mean(cv_results)

np.float64(0.6582894659137422)

In [88]:
model = GaussianNB()
model.fit(x_train , y_train)
y_pred = model.predict(x_test)

print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.59      0.54      0.56       363
           1       0.61      0.71      0.66       265
           2       0.94      0.70      0.81       213
           3       0.95      0.55      0.70       700
           4       0.65      0.78      0.71       429
           5       0.84      0.85      0.84       561
           6       0.39      0.52      0.44       217
           7       0.44      0.83      0.58       209
           8       0.79      0.63      0.70       684
           9       0.65      0.72      0.68       560
          10       0.69      0.49      0.57       800
          11       0.46      0.59      0.52       368
          12       0.62      0.80      0.70       699

    accuracy                           0.66      6068
   macro avg       0.66      0.67      0.65      6068
weighted avg       0.70      0.66      0.66      6068

