In [53]:
import os
from python_speech_features import mfcc
import scipy.io.wavfile as wav
import numpy as np
import pickle

dataset_dir = "Dataset/"
i = 0
f = open("features.dat","wb")
for genre_folder in os.listdir(dataset_dir):
  i += 1
  if i == 11:
    break
  for audio_file in os.listdir(dataset_dir+genre_folder):
    (sampl_rate,sig) = wav.read(dataset_dir+genre_folder+"/"+audio_file)
    mfcc_features = mfcc(sig,sampl_rate,winlen=0.020,appendEnergy=False)
    covariance_matrix = np.cov(np.matrix.transpose(mfcc_features))
    mean = mfcc_features.mean(0)
    feature = (mean,covariance_matrix,i)
    pickle.dump(feature,f)

f.close()

Splitting the dataset into training and testing set

In [54]:
import random

dataset = []
with open("features.dat","rb") as f:
  while True:
    try:
      dataset.append(pickle.load(f))
    except EOFError:
      f.close()
      break
train_ratio = 0.7
total_samples = len(dataset)
training_samples = int(train_ratio*total_samples)
random.shuffle(dataset)
train_set = dataset[:training_samples]
test_set = dataset[training_samples:]

Defining a function to calculate distance

In [55]:
def distance(x1,x2,k):
  m1 = x1[0]
  m2 = x2[0]
  cm1 = x1[1]
  cm2 = x2[1]
  distance = np.trace(np.dot(np.linalg.inv(cm2),cm1))
  distance += (np.dot(np.dot((m2-m1).transpose(), np.linalg.inv(cm2)), m2-m1))
  distance += np.log(np.linalg.det(cm2)) - np.log(np.linalg.det(cm1))
  distance -= k
  return distance

Getting k nearest neighbours

In [56]:
import operator

def k_nearest_neighbours(train_set,x,k):
  distances = []
  for i in range(len(train_set)):
    d = distance(train_set[i],x,k) + distance(x,train_set[i],k)
    distances.append((train_set[i][2],d))
  distances.sort(key=operator.itemgetter(1))
  neighbours = []
  for i in range(k):
    neighbours.append(distances[i][0])
  return neighbours

Performing class vote

In [57]:
def find_class(neighbours):
  class_vote = {}
  for i in range(len(neighbours)):
    curr_class = neighbours[i]
    if curr_class in class_vote:
      class_vote[curr_class] += 1
    else:
      class_vote[curr_class] = 1
  final_class = sorted(class_vote.items(),key=operator.itemgetter(1),reverse=True)
  return final_class[0][0]

Making predictions

In [58]:
predictions = []
for i in range(len(test_set)):
  nbrs = k_nearest_neighbours(train_set,test_set[i],5)
  cls = find_class(nbrs)
  predictions.append(cls)

Getting accuracy

In [59]:
correct = 0
for i in range(len(test_set)):
  if test_set[i][-1] == predictions[i]:
    correct += 1
accuracy = (correct*1.0) / len(test_set)
print(f"The accuracy is: {accuracy}")

The accuracy is: 0.6766666666666666


Testing the model with audio inputs

In [71]:
from collections import defaultdict

res = defaultdict(int)
path="Dataset"
i = 1
for folder_name in os.listdir(path):
  res[i] = folder_name
  i += 1

test_inputs = ["Dataset\hiphop\hiphop.00045.wav","Dataset\country\country.00096.wav","Dataset\jazz\jazz.00058.wav","Dataset\classical\classical.00014.wav","Dataset\disco\disco.00066.wav"]

for audio_input in test_inputs:
  (sr,sig) = wav.read(audio_input)
  mfcc_feat = mfcc(sig,sr,winlen=0.020,appendEnergy=False)
  cov_mat = np.cov(np.matrix.transpose(mfcc_feat))
  mean_mat = mfcc_feat.mean(0)
  feat = (mean_mat,cov_mat,0)
  pred=find_class(k_nearest_neighbours(dataset,feat,5))
  print(f"Genre of this audio sample is {res[pred]}")

Genre of this audio sample is hiphop
Genre of this audio sample is country
Genre of this audio sample is jazz
Genre of this audio sample is classical
Genre of this audio sample is disco
