In [1]:
# Importing necessary libraries
import os
import pickle
import numpy as np
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
# RandomizedSearchCV
# GridSearchCV

In [2]:
def preprocessAudioFiles(directory, outputFile):
    """
    Preprocess audio files to extract MFCC features and save to a file.
    This function iterates through all the files in the specified directory,
    extracts their MFCC features, and saves these features along with covariance
    and mean matrix for each file.
    """
    
    
    with open(outputFile, 'wb') as f:
        for i, folder in enumerate(filter(lambda f: f != ".DS_Store", os.listdir(directory)), start=1):
            if i == 11:
                break
            for file in filter(lambda f: f != ".DS_Store", os.listdir(os.path.join(directory, folder))):

                # Read the WAV file.
                rate, sig = wav.read(os.path.join(directory, folder, file))
                # Calculate the number of samples per 10-second chunk.
                samples_per_chunk = 10 * rate
                for j in range(0, len(sig), samples_per_chunk):
                    # Get the current chunk of audio.
                    chunk = sig[j:j + samples_per_chunk]
                    if len(chunk) == samples_per_chunk:
                            # Compute MFCC features from the audio chunk.
                            mfcc_feat = mfcc(chunk, rate, winlen=0.025, appendEnergy=False, nfft=1200)
                            # Skip covariance calculation if there are not enough frames.
                            # if mfcc_feat.shape[0] > mfcc_feat.shape[1]:
                            # Calculate the covariance matrix of the MFCC features.
                            covariance = np.cov(mfcc_feat.T)
                            inverse_covariance = np.linalg.inv(covariance)
                            # Calculate the mean of the MFCC features.
                            mean_vector = mfcc_feat.mean(axis=0)
                            # Store the features and the genre label.
                            feature = (mean_vector, covariance, inverse_covariance, i)
                            print(f'The folder: {i}')
                            pickle.dump(feature, f)


In [3]:
def load_dataset(filename):
    """
    Load and process a dataset from a pickle file.
    Each entry in the file is a tuple containing a mean vector, a covariance matrix,
    and a genre identifier. This function combines these elements into a format
    suitable for use with machine learning models.
    """

    mean_data, cov_data, cov_inv_data, genre_data= [], [], [], []

    # Load data from the file
    with open(filename, 'rb') as f:
        while True:
            try:
                mean, cov, cov_inv, genre = pickle.load(f)
            except EOFError:
                break
            cov_data.append(cov.flatten())
            mean_data.append(mean)
            genre_data.append(genre)
            cov_inv_data.append(cov_inv.flatten())
            
            
     
     
    # Convert the lists to NumPy arrays and combine covariance data, their inverse, and mean data.
    flattened_cov_data = np.array(cov_data)
    flattened_cov_inv_data = np.array(cov_inv_data)
    mean_data_array = np.array(mean_data)
    
    combined_data = np.column_stack([flattened_cov_data, flattened_cov_inv_data, mean_data_array])

    return combined_data, np.array(genre_data)

In [4]:
def compute_modified_mahalanobis_distance(X1_flat,X2_flat, n_features=13):
    """
    Compute a modified Mahalanobis distance between two instances.
    This is a custom distance function used for KNN classification.
    """

        
    # Reshape the first part of the instances into covariance matrices.
    cov1, cov2 = X1_flat[:n_features**2].reshape(-1, n_features), X2_flat[:n_features**2].reshape(-1, n_features)
        # Reshape the first part of the instances into the inverses of the covariance matrices.
    inv_cov1, inv_cov2 = X1_flat[n_features**2:2*n_features**2].reshape(-1, n_features), X2_flat[n_features**2:2*n_features**2].reshape(-1, n_features)
    # Extract the mean vectors from the instances.
    mean1, mean2 = X1_flat[-n_features:], X2_flat[-n_features:]




    # Compute the distance using the modified Mahalanobis formula
    trace_term = np.trace(np.dot(inv_cov2, cov1)) + np.trace(np.dot(inv_cov1, cov2))
    quadratic_term2 = np.dot((mean1 - mean2).T, np.dot(inv_cov1, mean1 - mean2))
    quadratic_term1 = np.dot((mean2 - mean1).T, np.dot(inv_cov2, mean2 - mean1))
    # log_det_term = np.log(np.linalg.det(cov2)) - np.log(np.linalg.det(cov1)) + np.log(np.linalg.det(cov1)) - np.log(np.linalg.det(cov2))
    
    dist = trace_term + quadratic_term1 + quadratic_term2
    # + log_det_term 

    return dist



In [5]:
## Use RandomizedSearchCV or GridSearchCV to do hyper parameter tuning

# def main():
#     # Directory containing the audio files and output file name
#     directory = "./Data/genres_original/"
#     output_file = "my_mfcc_Mahalanobis.dat"

#     # Preprocess the audio files and extract features.
#     preprocessAudioFiles(directory, output_file)
    
#     # Load the dataset from the output file
#     cov_mean_data, genre_data = load_dataset(output_file)
    

    
#     # Define parameters for grid search
#     # param_grid = {'n_neighbors': range(1, 7),
#     #               'metric': [compute_modified_mahalanobis_distance]
#     #               }
#     param_dist = {'n_neighbors': range(1, 7),
#                   }
    
#     # knn = KNeighborsClassifier()
#     knn = KNeighborsClassifier(metric=compute_modified_mahalanobis_distance)
    
#     # grid_search = GridSearchCV(estimator=knn, param_grid=param_grid, cv=5, scoring='accuracy')
#     random_search = RandomizedSearchCV(estimator=knn, param_distributions=param_dist, n_iter=100, cv=5, scoring='accuracy', random_state=43)

#     # Perform grid search to find the best KNN parameters
#     # grid_search.fit(cov_mean_data, genre_data)
#     random_search.fit(cov_mean_data, genre_data)
    
#     # Get the best parameters and model from the grid search
#     # best_k = grid_search.best_params_['n_neighbors']
#     # best_model = grid_search.best_estimator_
#     best_k = random_search.best_params_['n_neighbors']
#     best_model = random_search.best_estimator_

#     # Split the dataset into training and testing sets
#     train_X, test_X, train_y, test_y = train_test_split(cov_mean_data, genre_data, test_size=0.2, random_state=130)

#     # Train the best model on the training data
#     best_model.fit(train_X, train_y)

#     # Evaluate the model on the test data
#     accuracy = best_model.score(test_X, test_y)

#     # Print the results
#     print(f'Best K: {best_k}')
#     print(f'Test Accuracy: {accuracy}')


In [10]:
# Do not use RandomizedSearchCV or GridSearchCV

def main():
    # Directory containing the audio files and output file name
    directory = "./Data/genres_original/"
    output_file = "my_mfcc_Mahalanobis_chopped_inverse.dat"


    # preprocessAudioFiles(directory, output_file)
    # print("Finished preprocessing.")
    
    # Check if the output file already exists
    if not os.path.exists(output_file):
        # If the file does not exist, preprocess the audio files to extract features
        print("Preprocessing audio files and extracting features...")
        preprocessAudioFiles(directory, output_file)
        
    else:
        # If the file exists, skip preprocessing and use the existing file
        print("Found existing processed file, loading data...")

    
    # Load the dataset from the output file
    cov_data, genre_data = load_dataset(output_file)
    print("Finished loading data.")
    



    for n_neighbors in range(1, 11):
        # Split the dataset into training and testing sets
        train_X, test_X, train_y, test_y, = train_test_split(
            cov_data, genre_data, test_size=0.2
            # , random_state=130
            )

        
        knn = KNeighborsClassifier(
            n_neighbors=n_neighbors, 
            metric=compute_modified_mahalanobis_distance)        

        # Train the best model on the training data
        knn.fit(train_X, train_y)

        print("knn fit begins")
        # Evaluate the model on the test data
        accuracy = knn.score(test_X, test_y)
        print("finish evaluating the model on the test data")


        # Print the results
        print(f'K Value: {n_neighbors}')
        print(f'Test Accuracy: {accuracy}')
        print()

In [11]:
if __name__ == "__main__":
    main()

Found existing processed file, loading data...
Finished loading data.
knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 1
Test Accuracy: 0.9482470784641068

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 2
Test Accuracy: 0.8564273789649416

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 3
Test Accuracy: 0.8464106844741235

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 4
Test Accuracy: 0.8330550918196995

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 5
Test Accuracy: 0.8080133555926544

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 6
Test Accuracy: 0.7813021702838063

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 7
Test Accuracy: 0.8030050083472454

knn fit begins
knn fit ends
finish evaluating the model on the test data
K Value: 8
Test Accuracy