In [1]:
#Importing all the packages we will need
#Note Keras and tensorflow were installed seperately 
#through the anaconda command line and then imported here
import librosa
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
import csv

#Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

#Keras
import keras

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
#We’ll process dataset as per our requirements. 
#We’ll create a CSV file with the data we required.
header = 'Filename Chromagram RootMeanSquare LowEnergyBrightness SpectralCentroid Flatness Bandwidth InHarmonicity Rolloff ZeroCrossingRate'
for i in range(1,21):
        header += f' mfcc{i}'
header += ' label'
header = header.split()

In [None]:
#If you have read the blog of features extraction 
#we’ll get 20 mfcc for given sampling rate because
#it is calculated for each frame so mfcc has 20 columns.
#Now, we’ll calculate all the features.
file = open('data_test_two.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'./genres/{g}'):
        songname = f'./genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        y = librosa.effects.harmonic(y)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)#chromagram
        rms = librosa.feature.rms(y=y)#root-mean square
        spec_cont = librosa.feature.spectral_contrast(y=y, sr=sr)#low energy/brightness
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)#spectral centroid
        spec_flat = librosa.feature.spectral_flatness(y=y)#flatness
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)#bandwidth
        tone = librosa.feature.tonnetz(y=y, sr=sr)#In-Harmonicity
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)#rolloff
        zcr = librosa.feature.zero_crossing_rate(y)#zero-crossing rate
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rms)} {np.mean(spec_cont)} {np.mean(spec_cent)} {np.mean(spec_flat)} {np.mean(spec_bw)} {np.mean(tone)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data_test_two.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):
  if not j.flags.writeable or j.dtype not in (np.int32, np.int64):


In [None]:
#Reading a dataset
data = pd.read_csv('data_test_two.csv')
#data.shape
data

In [23]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1) #Filename is not required
data.head()

Unnamed: 0,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,mfcc4,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,0.34999,0.130226,1777.622262,0.34999,3793.610394,0.083018,-114.124756,122.296272,-19.881247,43.043388,...,8.950809,-3.716766,5.700787,-5.02247,0.535928,-1.405333,-0.77095,-1.867216,0.726691,blues
1,0.341012,0.095935,1524.012083,0.341012,3534.295295,0.055981,-207.849747,124.415886,8.530385,36.257298,...,5.440571,-2.250589,4.166593,-5.910428,0.796328,-0.540235,0.076023,-0.027564,0.238245,blues
2,0.36363,0.175585,1548.210862,0.36363,3032.714995,0.076282,-91.328018,141.263138,-29.895861,32.443363,...,5.959805,-8.979352,-1.115788,-9.090822,2.247295,-7.440529,-2.182649,-2.988307,-2.738291,blues
3,0.404791,0.141238,1066.077607,0.404791,2177.695165,0.033288,-199.578796,150.304657,5.445837,27.064386,...,6.103318,-2.462841,-1.126237,-2.793778,0.68404,-3.204532,0.503686,-0.461397,-3.583985,blues
4,0.308612,0.091554,1832.85003,0.308612,3577.12837,0.101515,-160.4254,126.420204,-35.821861,22.358387,...,-2.798175,-6.910803,-7.622658,-9.079248,-4.623607,-5.325811,-1.073966,-4.23572,-11.893796,blues


In [25]:
#Now, we’ll encode genres into integers
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)
print(y)
#Here, we created a mapping between genres and integers. 
#Each integer represents the specific genre.

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 

In [27]:
#Normalizing the dataset
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [37]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [42]:
len(X_train)

800

In [41]:
len(y_train)

800

In [40]:
len(X_test)

200

In [44]:
len(y_test)

200

In [51]:
X_train[10]

array([-0.1526722 ,  0.88520053,  0.03613114, -0.1526722 ,  0.38840883,
       -0.63549088,  0.49136129, -0.35698062,  0.7236401 ,  0.44013744,
       -0.5240214 ,  0.4511319 ,  0.54904569,  0.54726858, -0.34994908,
       -0.05625836,  0.35863136,  0.27861726,  0.13777474,  1.33501346,
        0.02560469,  1.34641347, -0.03641568,  0.7037113 ,  0.22471961,
        1.25141414])

In [53]:
#Creating a Model

from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))
#Using keras sequential model. 
#Layers 1 is an input layer, input size has to be given. Containing 256 neurons with an output space of 256 neurons.
#Layer 2 and 3 are hidden layers.
#Layer 4 is an output layer. Containing 10 neurons as we are classifying is into 10 genres.

In [55]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [57]:
history = model.fit(X_train, y_train, epochs=20, batch_size=128)
#Using fit function we’ll train the model for given training input and output.

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [58]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print('test_acc: ',test_acc)
#We’ll get the accuracy by the model can predict the genre of given music based on the features
#extracted. This model achieved the accuracy of 67% which is not that good but we can modify the
#model to achieve higher accuracy.

test_acc:  0.7350000143051147


In [59]:
predictions = model.predict(X_test)
np.argmax(predictions[0])
#predict function gives us percentage by how much that music matches to each genre.
#The highest percentage for the given genre is our final result which is calculated by argmax .

9