In [37]:
# Importing necessary libraries
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import numpy as np
import os
import pickle
import operator
from sklearn.model_selection import train_test_split

In [38]:
# Function to calculate a modified Mahalanobis distance between two instances
def distance(instance1, instance2, k):
    mm1, cm1 = instance1[0], instance1[1]
    mm2, cm2 = instance2[0], instance2[1]

    dist = np.trace(np.dot(np.linalg.inv(cm2), cm1))
    dist += np.dot(np.dot((mm2 - mm1).T, np.linalg.inv(cm2)), mm2 - mm1)
    dist += np.log(np.linalg.det(cm2)) - np.log(np.linalg.det(cm1))
    dist -= k
    return dist




In [39]:
# Function to find the k nearest neighbors of an instance
def getNeighbors(trainingSet, instance, k):
    distances = [(train_instance[2], distance(train_instance, instance, k) + distance(instance, train_instance, k))
                 for train_instance in trainingSet]
    distances.sort(key=operator.itemgetter(1))
    return [distances[x][0] for x in range(k)]

In [40]:
# Function to determine the most frequent class in the neighbors
def nearestClass(neighbors):
    classVote = {}
    for response in neighbors:
        classVote[response] = classVote.get(response, 0) + 1
    return sorted(classVote.items(), key=operator.itemgetter(1), reverse=True)[0][0]


In [41]:
# Preprocessing to extract features from audio files and save to a file
def preprocessAudioFiles(directory, outputFile):
    f = open(outputFile, 'wb')
    for i, folder in enumerate(filter(lambda f: f != ".DS_Store", os.listdir(directory)), start=1):
        if i == 11: break
        for file in filter(lambda f: f != ".DS_Store", os.listdir(os.path.join(directory, folder))):
            rate, sig = wav.read(os.path.join(directory, folder, file))
            mfcc_feat = mfcc(sig, rate, winlen=0.020, appendEnergy=False, nfft=1024)
            covariance = np.cov(mfcc_feat.T)
            mean_matrix = mfcc_feat.mean(axis=0)
            feature = (mean_matrix, covariance, i)
            pickle.dump(feature, f)
    f.close()

In [42]:
# Function to load dataset and split into training and test sets
def loadDataset(filename, split):
    dataset = []
    with open(filename, 'rb') as f:
        while True:
            try:
                dataset.append(pickle.load(f))
            except EOFError:
                break
    return train_test_split(dataset, test_size=split, shuffle=True, random_state=321)

In [43]:
# Function to calculate the accuracy of predictions
def getAccuracy(testSet, predictions):
    correct = sum(1 for x in range(len(testSet)) if testSet[x][-1] == predictions[x])
    return correct / len(testSet)

In [44]:
# Main execution block
if __name__ == "__main__":
    directory = "../datasets/raw_data/GTZAN_Dataset/genres_original/"
    preprocessAudioFiles(directory, "my.dat")
    
    trainingSet, testSet = loadDataset("my.dat", 0.15)
    predictions = [nearestClass(getNeighbors(trainingSet, testInstance, 5)) for testInstance in testSet]
    
    accuracy = getAccuracy(testSet, predictions)
    print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.67
