In [1]:
import numpy as np
import pandas as pd
import scipy.io.wavfile as wav
from python_speech_features import mfcc
from tempfile import TemporaryFile
from pydub import AudioSegment
import os
import math
import pickle
import random
import operator

In [2]:
# Calculating the distance between 2 instances (points)
def distance(instance1, instance2, k):
    distance = 0
    mm1 = instance1[0]
    cm1 = instance1[1]
    mm2 = instance2[0]
    cm2 = instance2[1]
    distance = np.trace(np.dot(np.linalg.inv(cm2), cm1))
    distance += (np.dot(np.dot((mm2-mm1).transpose(), np.linalg.inv(cm2)), mm2-mm1))
    distance += np.log(np.linalg.det(cm2)) - np.log(np.linalg.det(cm1))
    distance -= k
    return distance

In [3]:
#define a function to get distance between feature vectors and find neighbors
def getNeighbors(trainingset, instance, k):
    distances = []
    for x in range(len(trainingset)):
        dist = distance(trainingset[x], instance, k) + distance(instance,trainingset[x],k)
        distances.append((trainingset[x][2], dist))
    distances.sort(key=operator.itemgetter(1))
    neighbors = []
    for x in range(k):
        neighbors.append(distances[x][0])
    return neighbors

In [4]:
#function to identify the nearest neighbors
def nearestclass(neighbors):
    classVote = {}
    
    for x in range(len(neighbors)):
        response = neighbors[x]
        if response in classVote:
            classVote[response] += 1
        else:
            classVote[response] = 1
            
    sorter = sorted(classVote.items(), key=operator.itemgetter(1), reverse=True)
    return sorter[0][0]

In [5]:
#function that evaluates model and checks accuracy and performance of the algorithm
def getAccuracy(testSet, prediction):
    correct = 0
    for x in range(len(testSet)):
        if testSet[x][-1] == prediction[x]:
            correct += 1
    return 1.0 * correct / len(testSet)

In [6]:
# Change the file path to your file location in the GTZAN folder
directory = "C:\\Users\\omega\\Desktop\\MusicData\\genres_original"
# mydataset.dat is in the same folder as this ipynb file
f = open("mydataset.dat", "wb")
i = 0
for folder in os.listdir(directory):
    #print(folder)
    i += 1
    if i == 11:
        break
    for file in os.listdir(directory+"/"+folder):
        #print(file)
        try:
            (rate, sig) = wav.read(directory+"/"+folder+"/"+file)
            mfcc_feat = mfcc(sig, rate, winlen = 0.020, appendEnergy=False)
            covariance = np.cov(np.matrix.transpose(mfcc_feat))
            mean_matrix = mfcc_feat.mean(0)
            feature = (mean_matrix, covariance, i)
            pickle.dump(feature, f)
        except Exception as e:
            print("Got an exception: ", e, 'in folder: ', folder, ' filename: ', file)
f.close()

Got an exception:  File format b'\xcb\x15\x1e\x16' not understood. Only 'RIFF' and 'RIFX' supported. in folder:  jazz  filename:  jazz.00054.wav


In [7]:
dataset = []

def loadDataset(filename, split, trset, teset):
    with open('mydataset.dat','rb') as f:
        while True:
            try:
                dataset.append(pickle.load(f))
            except EOFError:
                f.close()
                break
    for x in range(len(dataset)):
        if random.random() < split:
            trset.append(dataset[x])
        else:
            teset.append(dataset[x])

trainingSet = []
testSet = []
loadDataset('my.dat', 0.68, trainingSet, testSet)

In [8]:
# Make the prediction using KNN
length = len(testSet)
predictions = []
for x in range(length):
    predictions.append(nearestclass(getNeighbors(trainingSet, testSet[x], 5)))

accuracy1 = getAccuracy(testSet, predictions)
print(accuracy1)

0.7147239263803681


In [10]:
from collections import defaultdict
results = defaultdict(int)

directory = "C:\\Users\\omega\\Desktop\\MusicData\\genres_original"

i = 1
for folder in os.listdir(directory):
    results[i] = folder
    i += 1

pred = nearestclass(getNeighbors(dataset, feature, 5))
# print("Dataset ", dataset)
# print("Feature ", feature)
print(results[pred])

rock


In [20]:
# input_file = "C:\\Users\\omega\\Downloads\\Classical.mp3"
# input_file = "C:\\Users\\omega\\Downloads\\Espresso.mp3"
# input_file = "C:\\Users\\omega\\Downloads\\Pyramids.mp3"
input_file = "C:\\Users\\omega\\Downloads\\EvilJordan.mp3"
# input_file = "C:\\Users\\omega\\Downloads\\Straight Outta Compton.mp3"
# input_file = "C:\\Users\\omega\\Downloads\\WhiskeyGlasses.mp3"
output_file = "results.wav"

sound = AudioSegment.from_mp3(input_file)
sound.export(output_file, format="wav")

<_io.BufferedRandom name='results.wav'>

In [21]:
def classify_audio(file_path, k=5):
    try:
        rate, sig = wav.read(file_path)

        mfcc_feat = mfcc(sig, rate, winlen=0.020, nfft=1024, appendEnergy=False)
        covariance = np.cov(np.matrix.transpose(mfcc_feat))
        mean_matrix = mfcc_feat.mean(0)
        new_feature = (mean_matrix, covariance)

        neighbors = getNeighbors(dataset, new_feature, k)
        pred_label = nearestclass(neighbors)

        genre = results[pred_label]
        return genre
    except Exception as e:
        print("Error: ", e)

# new_song = "C:\\Users\\omega\\Desktop\\MusicData\\genres_original\\country\\country.00001.wav"
new_song = output_file
predicted_genre = classify_audio(new_song)
print("predicted Genre: ", predicted_genre)

predicted Genre:  pop
