In [9]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

def extract_features(file_path):
    # Load the audio file
    audio, sample_rate = librosa.load(file_path)
    # Extract MFCCs
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    # Average MFCCs across time
    mfccs_processed = np.mean(mfccs.T,axis=0)
    return mfccs_processed

def load_data_and_labels(audio_dir):
    features = []
    labels = []
    for label in os.listdir(audio_dir):
        # label is a directory
        path = os.path.join(audio_dir, label)
        if os.path.isdir(path):
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                # Extract features for each audio file
                extracted_features = extract_features(file_path)
                features.append(extracted_features)
                labels.append(label)
    return np.array(features), np.array(labels)

# Directory containing the dataset
audio_dir = '../data/recorded_audio'

# Load data and labels
features, labels = load_data_and_labels(audio_dir)

# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

# Encode the labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)


  audio, sample_rate = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, encoded_labels, test_size=0.2, random_state=74)

# Initialize the kNN model
# You might want to experiment with different values of 'k'
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model
knn.fit(X_train, y_train)

# Predict on the test set
y_pred = knn.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.90625
Confusion Matrix:
 [[ 7  1  0]
 [ 2  9  0]
 [ 0  0 13]]
Classification Report:
               precision    recall  f1-score   support

           0       0.78      0.88      0.82         8
           1       0.90      0.82      0.86        11
           2       1.00      1.00      1.00        13

    accuracy                           0.91        32
   macro avg       0.89      0.90      0.89        32
weighted avg       0.91      0.91      0.91        32



In [13]:
from sklearn.model_selection import GridSearchCV

# Define the parameter grid
param_grid = {
    'n_neighbors': [3, 7, 11, 13, 15, 18],  # Different values for k
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
    'algorithm': ['ball_tree', 'kd_tree', 'brute', 'auto']
}

# Initialize the kNN model
knn = KNeighborsClassifier()

# Initialize the GridSearch with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, verbose=1, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Best parameters found
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate the best model on the test set
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)
print("Test Set Accuracy:", accuracy_score(y_test, y_pred))


Fitting 5 folds for each of 96 candidates, totalling 480 fits
Best Parameters: {'algorithm': 'ball_tree', 'metric': 'manhattan', 'n_neighbors': 15, 'weights': 'distance'}
Best Cross-Validation Score: 0.8587692307692307
Test Set Accuracy: 0.875


## More features

In [29]:
import os
import librosa
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

def extract_features(file_path):
    # Load the audio file
    audio, sample_rate = librosa.load(file_path)
    
    # Extracting different types of features
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
    mel = librosa.feature.melspectrogram(y=audio, sr=sample_rate)
    contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate)
    zero_crossing_rate = librosa.feature.zero_crossing_rate(audio)

    # Averaging across time
    mfccs_processed = np.mean(mfccs.T, axis=0)
    chroma_processed = np.mean(chroma.T, axis=0)
    mel_processed = np.mean(mel.T, axis=0)
    contrast_processed = np.mean(contrast.T, axis=0)
    zero_crossing_rate_processed = np.mean(zero_crossing_rate)

    # Concatenating all features
    return np.hstack([mfccs_processed, chroma_processed, mel_processed, contrast_processed, zero_crossing_rate_processed])


def load_data_and_labels(audio_dir):
    features = []
    labels = []
    for label in os.listdir(audio_dir):
        # label is a directory
        path = os.path.join(audio_dir, label)
        if os.path.isdir(path):
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                # Extract features for each audio file
                extracted_features = extract_features(file_path)
                features.append(extracted_features)
                labels.append(label)
    return np.array(features), np.array(labels)

# Directory containing the dataset
audio_dir = '../data/recorded_audio'

# Load data and labels
features, labels = load_data_and_labels(audio_dir)

# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)

# Encode the labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)


  audio, sample_rate = librosa.load(file_path)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


In [23]:
normalized_features.shape

(159, 176)

In [30]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
from sklearn.model_selection import GridSearchCV

# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, encoded_labels, test_size=0.2, random_state=74)

# Define the parameter grid
param_grid = {
    'n_neighbors': [3, 5, 7, 9, 11, 13, 15],  # Different values for k
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
    'algorithm': ['ball_tree', 'kd_tree', 'brute', 'auto']
}

# Initialize the kNN model
knn = KNeighborsClassifier()

# Initialize the GridSearch with cross-validation
grid_search = GridSearchCV(knn, param_grid, cv=5, verbose=1, scoring='accuracy')

# Fit the grid search to the data
grid_search.fit(X_train, y_train)

# Best parameters found
best_params = grid_search.best_params_
best_score = grid_search.best_score_

print("Best Parameters:", best_params)
print("Best Cross-Validation Score:", best_score)

# Evaluate the best model on the test set
best_knn = grid_search.best_estimator_
y_pred = best_knn.predict(X_test)
print("Test Set Accuracy:", accuracy_score(y_test, y_pred))

Fitting 5 folds for each of 112 candidates, totalling 560 fits
Best Parameters: {'algorithm': 'ball_tree', 'metric': 'euclidean', 'n_neighbors': 11, 'weights': 'uniform'}
Best Cross-Validation Score: 0.8812307692307693
Test Set Accuracy: 0.90625


In [17]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# X_train, X_test, y_train, y_test are already defined
original_model = KNeighborsClassifier(n_neighbors=5)
original_model.fit(X_train, y_train)
original_accuracy = accuracy_score(y_test, original_model.predict(X_test))

# Define the size of each feature group
feature_sizes = {
    'MFCCs': 40,       # 40 MFCC features
    'Chroma': 12,      # 12 Chroma features
    'Mel': 128,        # 128 Mel features
    'Contrast': 7,     # 7 Contrast features
    'Zero Crossing Rate': 1  # 1 feature for Zero Crossing Rate
}

feature_accuracies = []
start_index = 0

for feature_name, size in feature_sizes.items():
    # Create a new training and testing set excluding one feature group
    X_train_reduced = np.delete(X_train, slice(start_index, start_index + size), axis=1)
    X_test_reduced = np.delete(X_test, slice(start_index, start_index + size), axis=1)

    # Train and evaluate the model
    model = KNeighborsClassifier(n_neighbors=5)
    model.fit(X_train_reduced, y_train)
    accuracy = accuracy_score(y_test, model.predict(X_test_reduced))
    feature_accuracies.append((feature_name, accuracy))

    # Update the start index for the next feature group
    start_index += size

# Compare accuracies
print("Original Accuracy:", original_accuracy)
for feature, accuracy in feature_accuracies:
    print(f"Accuracy without {feature}: {accuracy}")


Original Accuracy: 0.90625
Accuracy without MFCCs: 0.8125
Accuracy without Chroma: 0.9375
Accuracy without Mel: 0.9375
Accuracy without Contrast: 0.90625
Accuracy without Zero Crossing Rate: 0.90625


## Saving the model

In [40]:
from joblib import dump

dump(best_knn, '../app/models/knn_model.joblib')
dump(scaler, '../app/models/knn_scaler.joblib')


['../app/models/knn_scaler.joblib']