# Music Genre Classification
### Dataset: GITZAN genre collection 


## Importing all libraries

In [12]:
!pipenv install tensorflow

[39m[1mInstalling [32m[1mtensorflow[39m[22m…[39m[22m
[K[39m[1mAdding[39m[22m [32m[1mtensorflow[39m[22m [39m[1mto Pipfile's[39m[22m [31m[1m[packages][39m[22m[39m[1m…[39m[22m
[K[?25h✔ Installation Succeeded[0m 
[31m[1mPipfile.lock (6d4002) out of date, updating to (37cf4c)…[39m[22m
[39m[22mLocking[39m[22m [31m[22m[dev-packages][39m[22m [39m[22mdependencies…[39m[22m
[39m[22mLocking[39m[22m [31m[22m[packages][39m[22m [39m[22mdependencies…[39m[22m
[K[?25h[32m[22m✔ Success![39m[22m[0m 
[39m[1mUpdated Pipfile.lock (6d4002)![39m[22m
[39m[1mInstalling dependencies from Pipfile.lock (6d4002)…[39m[22m
  🐍   [32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m[32m[1m▉[39m[22m

In [13]:
import librosa as lb
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import os
from PIL import Image
import pathlib
import csv

# Preprocesing tools
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Keras
import keras

import warnings
warnings.filterwarnings('ignore')

Using TensorFlow backend.


## Extracting spectrogram for every audio

In [18]:
cmap = plt.get_cmap('inferno')
plt.figure(figsize= (10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok = True)
    for filename in os.listdir(f'./MIR/genres/{g}'):
        songname = f'./MIR/genres/{g}/{filename}'
        y, sr = lb.load(songname, mono = True, duration = 5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap = 128, cmap = cmap, sides = 'default', mode = 'default', scale = 'dB');
        plt.axis('off')
        plt.savefig(f'img_data/{g}/{filename[:3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

## Extracting features from these spectrograms

In [20]:
header = 'filename shroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1,21):
    header+= f'mfcc{i}'
header += 'label'
header = header.split()

## Write data into csv
Here we are writing all the features as a part of the csv.

In [23]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
for g in genres:
    for filename in os.listdir(f'./MIR/genres/{g}'):
        songname = f'./MIR/genres/{g}/{filename}'
        y, sr = lb.load(songname, mono = True, duration = 30)
        chroma_stft = lb.feature.chroma_stft(y=y, sr=sr)
        spec_cent = lb.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = lb.feature.spectral_bandwidth(y=y, sr=sr)
        zcr = lb.feature.zero_crossing_rate(y)
        rolloff = lb.feature.spectral_rolloff(y=y, sr=sr)
        mfcc = lb.feature.mfcc(y=y, sr=sr)
    
        to_append = f'{filename}{np.mean(chroma_stft)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

## Analysing the csv using Pandas

In [24]:
data = pd.read_csv('data.csv')
data.head()

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,filename,shroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_ratemfcc1mfcc2mfcc3mfcc4mfcc5mfcc6mfcc7mfcc8mfcc9mfcc10mfcc11mfcc12mfcc13mfcc14mfcc15mfcc16mfcc17mfcc18mfcc19mfcc20label
blues.00043.au0.39902548209621946,2155.654923,2372.403604,5012.019693,0.087165,-109.165355,100.6215,-8.614721,47.358475,-6.55596,39.613615,-8.518134,29.564672,-16.575962,13.165634,-24.209793,6.585774,-8.642621,4.912259,-15.442804,1.53875,-6.732474,1.417774,-3.96175,3.28746,blues
blues.00012.au0.2693200216144126,1361.045467,1567.804596,2739.625101,0.069124,-207.20808,132.799175,-15.438986,60.986727,0.725807,12.437828,1.183836,-1.540779,-17.888154,8.358496,-2.452068,-0.613248,0.384877,2.605128,-5.188924,-9.527455,-9.244394,-2.848274,-1.418707,-5.932607,blues
blues.00026.au0.2784844616742252,1198.607665,1573.308974,2478.37668,0.051988,-284.819504,108.785628,9.131956,51.25903,18.111256,7.621317,8.781747,2.372426,0.011787,2.250417,4.200278,-3.303735,1.601561,2.660517,3.323455,3.25892,-4.551106,0.493845,5.937066,3.231544,blues
blues.00077.au0.4088756181758112,2206.771246,2191.473506,4657.388504,0.111526,-29.01099,104.532914,-30.974207,38.156392,-7.991167,30.888571,-20.597831,25.829106,-12.953564,16.941485,-12.265393,10.786454,-10.558812,6.877709,-10.294858,6.967845,-10.2561,0.705014,-6.000722,1.348955,blues
blues.00084.au0.39625831632076247,2061.150735,2085.159448,4221.149475,0.113397,-38.965941,112.039843,-31.817035,38.240835,-6.320413,23.331537,-18.778456,23.931796,-10.936108,13.233851,-14.381296,13.327049,-10.921602,9.795615,-5.031277,7.200982,-6.754969,2.663612,-4.38043,0.414055,blues


In [26]:
data.shape

(1000, 7)

In [27]:
data.drop(['filename'], axis = 1)

Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12,Unnamed: 13,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,shroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_ratemfcc1mfcc2mfcc3mfcc4mfcc5mfcc6mfcc7mfcc8mfcc9mfcc10mfcc11mfcc12mfcc13mfcc14mfcc15mfcc16mfcc17mfcc18mfcc19mfcc20label
blues.00043.au0.39902548209621946,2155.654923,2372.403604,5012.019693,0.087165,-109.165355,100.621500,-8.614721,47.358475,-6.555960,39.613615,-8.518134,29.564672,-16.575962,13.165634,-24.209793,6.585774,-8.642621,4.912259,1.538750,-6.732474,1.417774,-3.961750,3.287460,blues
blues.00012.au0.2693200216144126,1361.045467,1567.804596,2739.625101,0.069124,-207.208080,132.799175,-15.438986,60.986727,0.725807,12.437828,1.183836,-1.540779,-17.888154,8.358496,-2.452068,-0.613248,0.384877,2.605128,-9.527455,-9.244394,-2.848274,-1.418707,-5.932607,blues
blues.00026.au0.2784844616742252,1198.607665,1573.308974,2478.376680,0.051988,-284.819504,108.785628,9.131956,51.259030,18.111256,7.621317,8.781747,2.372426,0.011787,2.250417,4.200278,-3.303735,1.601561,2.660517,3.258920,-4.551106,0.493845,5.937066,3.231544,blues
blues.00077.au0.4088756181758112,2206.771246,2191.473506,4657.388504,0.111526,-29.010990,104.532914,-30.974207,38.156392,-7.991167,30.888571,-20.597831,25.829106,-12.953564,16.941485,-12.265393,10.786454,-10.558812,6.877709,6.967845,-10.256100,0.705014,-6.000722,1.348955,blues
blues.00084.au0.39625831632076247,2061.150735,2085.159448,4221.149475,0.113397,-38.965941,112.039843,-31.817035,38.240835,-6.320413,23.331537,-18.778456,23.931796,-10.936108,13.233851,-14.381296,13.327049,-10.921602,9.795615,7.200982,-6.754969,2.663612,-4.380430,0.414055,blues
blues.00094.au0.3766867027528341,1239.337228,1659.466470,2517.618110,0.050908,-206.278431,126.627468,10.585205,43.223168,12.501488,19.425002,0.357382,12.738808,-4.572330,4.050817,-4.413953,3.659409,-1.448481,-1.695150,1.171114,-6.221975,0.600703,-1.605095,-0.528774,blues
blues.00067.au0.3379221685307677,2258.538419,2176.031189,4755.429578,0.112765,-95.424423,101.368652,-20.682497,48.655476,-30.258639,20.802527,-18.303758,14.134917,-12.787580,13.433701,-14.072367,7.723721,-19.224149,2.856372,6.911756,-11.303164,8.940704,-2.959639,-0.975428,blues
blues.00036.au0.23323036568766967,1247.244815,1908.052722,2620.592487,0.036904,-200.220732,116.345181,18.060785,25.288819,0.315825,24.176778,-22.223080,11.183146,-26.841913,-7.643656,-24.872036,-12.015285,-17.009242,-8.115376,-12.617977,-14.187781,-8.204476,-15.024769,-10.322150,blues
blues.00002.au0.363602838496103,1552.481958,1747.165985,3040.514948,0.076301,-90.754394,140.459907,-29.109965,31.689014,-13.987036,25.754761,-13.649586,11.629271,-11.780589,9.706442,-13.123111,5.789265,-8.905224,-1.083720,2.455805,-7.726901,-1.815724,-3.433434,-2.226821,blues
blues.00053.au0.41250076438084543,2171.221742,1954.383785,4237.132712,0.132802,-76.717959,105.753530,-43.181755,60.945850,-8.758631,21.227079,-12.864846,18.603844,-11.384939,14.541766,-6.047970,12.726959,-9.366809,1.350859,6.997609,-4.053008,-0.271427,-3.696092,-0.504041,blues


## Encoding the labels

In [28]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

## Scaling the feature column

In [30]:
scaler = StandardScaler()
X= scaler.fit_transform(np.array(data.iloc[:,:-1], dtype= float))

In [31]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [32]:
len(y_train)

800

In [33]:
len(y_test)

200

In [34]:
X_train[10]

array([ 1.28500583,  0.13680951,  1.1667351 ,  0.80767323,  1.92866955,
       -0.40594764])

# Classification with Keras

## Building the network

In [37]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation = 'relu', input_shape = (X_train.shape[1],)))
model.add(layers.Dense(128, activation = 'relu'))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(10, activation = 'softmax'))

In [38]:
model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [39]:
history = model.fit(X_train, y_train, epochs = 20, batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [40]:
test_loss, test_acc = model.evaluate(X_test, y_test)



In [41]:
print('test_acc:', test_acc)

test_acc: 0.38


## Validating the approach

In [50]:
x_val = X_train[:200]
partial_x_train = X_train[200:]

y_val = y_train[:200]
partial_y_train = y_train[200:]

model = models.Sequential()
model.add(layers.Dense(512, activation ='relu', input_shape = (X_train.shape[1],)))
model.add(layers.Dense(256, activation = 'relu'))
model.add(layers.Dense(128, activation = 'relu'))
model.add(layers.Dense(64, activation = 'relu'))
model.add(layers.Dense(10, activation = 'softmax'))

model.compile(optimizer = 'adam', loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])


In [51]:
model.fit(partial_x_train, partial_y_train, epochs = 30, batch_size=512, validation_data =(x_val, y_val))

Train on 600 samples, validate on 200 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x12ed0dd30>

In [53]:
results = model.evaluate(X_test, y_test)



In [54]:
results

[1.925166392326355, 0.325]

In [55]:
predictions = model.predict(X_test)

In [57]:
predictions[0].shape

(10,)

In [58]:
np.sum(predictions[0])

1.0000001

In [59]:
np.argmax(predictions[0])

8