In [4]:
import os
import csv
import time
import numpy as np
import librosa
import matplotlib.pyplot as plt
from scipy.io import wavfile
from tqdm import tqdm
from sklearn import preprocessing, neighbors, model_selection
from sklearn.model_selection import GridSearchCV, cross_val_score

In [5]:
train_frames = []
for i in range(12000):
    #./train/train/train_{i} based on where my files are located on my laptop
    _, frames = wavfile.read(f'./train/train/train_{i}.wav')
    train_frames.append(frames)
train_x = np.array(train_frames)

test_frames = []
for i in range(3000):
    #./test/test/test_{i} based on where my files are located on my laptop
    _, frames = wavfile.read(f'./test/test/test_{i}.wav')
    test_frames.append(frames)
test_x = np.array(test_frames)

with open('train.csv', 'r') as g:
    reader = csv.reader(g)
    ls = list(reader)
    train_y = np.array(ls[1:], dtype=int)[:, 1]

In [15]:
train_x.shape, train_y.shape, test_x.shape

((12000, 24000), (12000,), (3000, 24000))

In [7]:
train_x_split, test_x_split, train_y_split, test_y_split = model_selection.train_test_split(
    train_x, train_y, stratify=train_y, test_size=0.25, random_state=42)

In [8]:
def extract_mfcc_features(y, sr=44100, n_mfcc=13):
    return librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc).mean(axis=1)

In [9]:
train_x_split_mfcc = np.array([extract_mfcc_features(sample.astype(float)) for sample in tqdm(train_x_split)])
train_x_split_mfcc = preprocessing.StandardScaler().fit_transform(train_x_split_mfcc)
test_x_split_mfcc = np.array([extract_mfcc_features(sample.astype(float)) for sample in tqdm(test_x_split)])
test_x_split_mfcc = preprocessing.StandardScaler().fit_transform(test_x_split_mfcc)

print(f"train_x_split_mfcc shape: {train_x_split_mfcc.shape}")

Extracting MFCC features...


100%|█████████████████████████████████████████████████████████████████████████████| 9000/9000 [01:28<00:00, 101.97it/s]
100%|█████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:26<00:00, 114.56it/s]

train_x_split_mfcc shape: (9000, 13)





In [10]:
param_grid = {
    'n_neighbors': [3, 5, 10, 20],
    'weights': ['uniform', 'distance'],
}
knn = neighbors.KNeighborsClassifier()
grid_search = GridSearchCV(knn, param_grid, cv=5, scoring='accuracy', verbose=1, n_jobs=-1)
grid_search.fit(train_x_split_mfcc, train_y_split)

Performing Grid Search...
Fitting 5 folds for each of 8 candidates, totalling 40 fits


In [11]:
print(f"Best parameters: {grid_search.best_params_}")
best_knn = grid_search.best_estimator_

train_accuracy = best_knn.score(train_x_split_mfcc, train_y_split)
test_accuracy = best_knn.score(test_x_split_mfcc, test_y_split)
print(f"Training accuracy: {train_accuracy:.4f}")
print(f"Validation accuracy: {test_accuracy:.4f}")

Best parameters: {'n_neighbors': 5, 'weights': 'distance'}
Training accuracy: 1.0000
Validation accuracy: 0.7567


In [12]:
cv_scores = cross_val_score(best_knn, train_x_split_mfcc, train_y_split, cv=5)
print(f"Mean CV Accuracy: {cv_scores.mean():.4f}")

Mean CV Accuracy: 0.7342


In [13]:
final_test_mfcc = np.array([extract_mfcc_features(sample.astype(float)) for sample in tqdm(test_x)])
final_test_mfcc = preprocessing.StandardScaler().fit_transform(final_test_mfcc)

predictions = best_knn.predict(final_test_mfcc)

Preparing test data...


100%|██████████████████████████████████████████████████████████████████████████████| 3000/3000 [00:30<00:00, 97.41it/s]


In [14]:
submission = [[i, pred] for i, pred in enumerate(predictions)]
with open("submission.csv", mode="w", newline="") as file:
    writer = csv.writer(file)
    writer.writerow(["# ID", "Label"])
    writer.writerows(submission)

print("Submission file created.")

Submission file created.
