In [22]:
from python_speech_features import mfcc # Used to classify these speech features (library that supports the speech features) and the most common is Mel Frequency Cepstral Coefficients -> Used to extract the low frequency and time related features within the wav files that we can use to train the model
import scipy.io.wavfile as wav # Used to extract and read the .wav files
import numpy as np

from tempfile import TemporaryFile # Used to create temporary files and directories

import os # Portable way of using Operating System Dependent functionalities
import pickle # Pickle model implements binary protocols for serializing and de-serializing a python obejct structure
import random
import operator

import math

In [23]:
# function to get the distance between feature vectors and find neighbors (Calculates the distance between all the points in the dataset)
def getNeighbors(trainingSet, instance, k): #instance is the input that we get and k is the hyperparameter (number of nearest neighbours)
    distances = []
    for x in range(len(trainingSet)):
        dist = distance(trainingSet[x], instance, k) + distance(instance, trainingSet[x], k) # To get the distance between two neighbours
        distances.append((trainingSet[x][2], dist)) # Appending them to distances list for storing them all

    distances.sort(key=operator.itemgetter(1)) # Sorting the data (1st value will be nearest neighbour, 2nd one will be next nearest neighbour)
    neighbors = []
    for x in range(k): # We are only interested in the K nearest neighbour hence this loop is running upto k
        neighbors.append(distances[x][0]) # Appends the same to the new list neighbours[]
    
    return neighbors # Returning the list back

In [24]:
# Identify the class of the instance(Neighbours) (Those having the maximum frequency count)
def nearestClass(neighbors):
    classVote = {}

# We will be having k classes, one for each neighbour

    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVote:
            classVote[response] += 1
        else:
            classVote[response] = 1
# The class with the maximum count will be selected and will move up in the sorted arraay

    sorter = sorted(classVote.items(), key = operator.itemgetter(1), reverse=True)

    return sorter[0][0] # Will be chosen as the identified class for this instance

In [25]:
# Function to evaluate the model(getting the accuracy i.e those which were correct)
def getAccuracy(testSet, prediction):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == predictions[x]:
            correct += 1
    
    return (1.0 * correct) / len(testSet)

In [26]:
# Directory that holds the wav files
directory = "D:/deep-learning-music-genre-classification/Data/genres_original/"

In [27]:
# Binary file where we will collect all the features extracted using mfcc (Mel Frequency Cepstral Coefficients)
f = open("my.dat", 'wb')

# The .wav files were read and was getting converted into a data file called as my.dat, which captures the features that we need
i = 0

for folder in os.listdir(directory):
    i += 1
    if i == 11:
        break
    for file in os.listdir(directory+folder):  # For each file within a given directory      
        try:
            # Going inside the folder and trying to identify the rate and the signature
            (rate, sig) = wav.read(directory+folder+"/"+file)
# Using the Mel Frequency Cepstral Coefficients function with the signature and rate to identify the specific features based on which we will be categorising the files
            mfcc_feat = mfcc(sig, rate, winlen=0.020, appendEnergy=False)
            covariance = np.cov(np.matrix.transpose(mfcc_feat))
            mean_matrix = mfcc_feat.mean(0)
            feature = (mean_matrix, covariance, i)
# For each file we are dumping the features into the my.dat datafile
            pickle.dump(feature, f)
        except Exception as e:
            print('Got an exception: ', e, ' in folder: ', folder, ' filename: ', file)        

f.close()
# We will have the collection of all the features of all the different files irrespective of the genre in the my.dat data file

In [28]:
# Split the dataset into training and testing sets respectively

dataset = []

# As the Data file is now created we will load the Data as this it the data which will be used to train the model

def loadDataset(filename, split, trSet, teSet): # Loading th dataset 
    with open('my.dat', 'rb') as f:
        while True:
            try:
                dataset.append(pickle.load(f))
            except EOFError:
                f.close()
                break

# Splitting it on a random basis into a training set and a testing set, Ratio is passed as a value to the dataset
# 66% Training Data and rest 34% Testing Data

    for x in range(len(dataset)):
        if random.random() < split:
            trSet.append(dataset[x])
        else:
            teSet.append(dataset[x])
trainingSet = []
testSet = []
loadDataset('my.dat', 0.66, trainingSet, testSet)


In [29]:
# Function to get the distance between neighbours
def distance(instance1 , instance2 , k ):
    distance =0 
    mm1 = instance1[0] 
    cm1 = instance1[1]
    mm2 = instance2[0]
    cm2 = instance2[1]
    distance = np.trace(np.dot(np.linalg.inv(cm2), cm1)) 
    distance+=(np.dot(np.dot((mm2-mm1).transpose() , np.linalg.inv(cm2)) , mm2-mm1 )) 
    distance+= np.log(np.linalg.det(cm2)) - np.log(np.linalg.det(cm1))
    distance-= k
    return distance

In [30]:
# Making predictions using KNN

leng = len(testSet)
predictions = []
for x in range(leng):
    predictions.append(nearestClass(getNeighbors(trainingSet, testSet[x], 5))) # Nearest class function is called which in turn calls the distance function, Nested functions play an important role here

accuracy1 = getAccuracy(testSet, predictions)
print(accuracy1)

0.7017543859649122


In [31]:
# For each of the different genre based on the folder name a new key value pair is created where the id of the genre will be linked to the genre name

from collections import defaultdict 
results = defaultdict(int) #For Storing the key value pair

i=1
for folder in os.listdir(directory):
    results[i] = folder
    i += 1

print(results)

defaultdict(<class 'int'>, {1: 'blues', 2: 'classical', 3: 'country', 4: 'disco', 5: 'hiphop', 6: 'jazz', 7: 'metal', 8: 'pop', 9: 'reggae', 10: 'rock'})


In [49]:
# testing the code with external samples
# URL: https://uweb.engr.arizona.edu/~429rns/audiofiles/audiofiles.html

test_dir = "D:/deep-learning-music-genre-classification/Test/"
test_file = test_dir + "test.wav"
# test_file = test_dir + "test2.wav"
# test_file = test_dir + "test4.wav"

In [50]:
# Extracting the feature form the test file and use the nearest class prediction function to predict the class
(rate, sig) = wav.read(test_file)
mfcc_feat = mfcc(sig, rate, winlen=0.020, appendEnergy=False)
covariance = np.cov(np.matrix.transpose(mfcc_feat))
mean_matrix = mfcc_feat.mean(0)
feature = (mean_matrix, covariance, i)



In [51]:
pred = nearestClass(getNeighbors(dataset, feature, 5))
print(results[pred])

classical
