<a href="https://colab.research.google.com/github/AryanMethil/Music-Genre-Classification/blob/main/Music_Genre_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import librosa, librosa.display
import numpy as np
import matplotlib.pyplot as plt
import os
from math import *
import json
from sklearn.model_selection import train_test_split
import tensorflow as tf


In [None]:
#Initializing mfcc variables

n_mfcc=13                                                                       #number of mfcc coefficients
n_fft=2048                                                                      #number of samples per fft
hop_length=512                                                                  #frame 2 will begin from 512th sample and so on
num_segments=10                                                                 #number of segments we want to divide the track into
sample_rate=22050                                                               #sampling rate
duration=30                                                                     #each track in every genre is 30 seconds long
samples_per_track=sample_rate*duration                                          
samples_per_segment=samples_per_track//num_segments
expected_num_mfcc_vectors_per_segment= ceil(samples_per_segment/hop_length)

In [None]:
#Dictionary to store data

data={
    'mfcc':[],        #input
    'labels':[],      #output in terms of integers
    'mapping':[]      #maps the output integers to corresponding genre name
}

In [None]:
dataset_path='/content/drive/My Drive/genres'

#Loop over the different genres

for i,(directory,genres_directories,filenames) in enumerate(os.walk(dataset_path)):

  #ensure that we are not in the root level
  if directory is not dataset_path:
    
    directory_component=directory.split("/")    #genres/blues => ['genres','blues'] where blues is the genre we want to save in the mapping list
    semantic_label=directory_component[-1]
    data['mapping'].append(semantic_label)
    print(f'\nProcessing {semantic_label}')
    for f in filenames:

      #The filepath of an audio in one particular genre
      filepath=os.path.join(directory,f)
      signal,sr=librosa.load(filepath,sr=sample_rate)

      for s in range(num_segments):
        start_segment=samples_per_segment*s
        end_segment=start_segment+samples_per_segment

        
        mfcc=librosa.feature.mfcc(signal[start_segment:end_segment],
                                  sr=sr,
                                  n_fft=n_fft,
                                  n_mfcc=n_mfcc,
                                  hop_length=hop_length)
        mfcc=mfcc.T

        #store mfcc for segment if it has expected length
        if len(mfcc)==expected_num_mfcc_vectors_per_segment:
          data['mfcc'].append(mfcc.tolist())
          data['labels'].append(i-1)
          print(f'{filepath}, segment:{s+1}')


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
/content/drive/My Drive/genres/metal/metal.00000.wav, segment:9
/content/drive/My Drive/genres/metal/metal.00000.wav, segment:10
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:1
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:2
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:3
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:4
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:5
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:6
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:7
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:8
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:9
/content/drive/My Drive/genres/metal/metal.00003.wav, segment:10
/content/drive/My Drive/genres/metal/metal.00006.wav, segment:1
/content/drive/My Drive/genres/metal/metal.00006.wav, segment:2
/content/drive/My Drive/genres/metal/

In [None]:
with open('dataset.json','w') as f:
  json.dump(data,f,indent=4)

In [None]:
with open('dataset.json','r') as f:
  data=json.load(f)
  inputs=np.array(data['mfcc'])
  targets=np.array(data['labels'])

In [None]:
X_train,X_test,y_train,y_test=train_test_split(inputs,targets,test_size=0.2)
X_train,X_validation,y_train,y_validation=train_test_split(X_train,y_train,test_size=0.2)

In [None]:
input_shape=(X_train.shape[1],X_train.shape[2])

In [None]:
music_model=tf.keras.Sequential([
            
            tf.keras.layers.LSTM(128,input_shape=input_shape,return_sequences=True),
            tf.keras.layers.LSTM(64),
            tf.keras.layers.Dense(512,activation='relu'),
            tf.keras.layers.Dropout(0.2),
            tf.keras.layers.Dense(10,activation='softmax')
])

In [None]:
music_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
                    loss='sparse_categorical_crossentropy',
                    metrics=['accuracy'])

In [None]:
music_model.fit(X_train,y_train,validation_data=(X_validation,y_validation),batch_size=32,epochs=50)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<tensorflow.python.keras.callbacks.History at 0x7f1a0231c668>

In [None]:
test_loss,test_acc=music_model.evaluate(X_test,y_test,verbose=2)
print(f'Test loss : {test_loss}\nTest Accuracy : {test_acc}')

63/63 - 1s - loss: 1.0399 - accuracy: 0.6995
Test loss : 1.0398794412612915
Test Accuracy : 0.6995000243186951


In [None]:
music_model.save('my_music_model.h5')