<a href="https://colab.research.google.com/github/NegiArvind/Music-Genre_Classification_using-tflearn/blob/master/MusicGenreClassifier.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Mounting the drive on google colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/drive


#### config.py

In [0]:

# Data Path
# For google colab
driveLocation='drive/My Drive/MusicGenreClassification/'
# For PC
# driveLocation=''
rawDatasetPath=driveLocation+'data/RawDataset'
spectrogramsPath=driveLocation+'data/Spectograms'
slicesPath=driveLocation+'data/Slices'
datasetPath=driveLocation+'data/Dataset'

#Spectrogram resolution
pixelPerSecond=50

#Slice Parameters
sliceSize=128

# Dataset parameter
validationRatio=0.3
testRatio=0.1

#Model parameters
batchSize=128
learningRate=0.001
nbEpoch=25

#### create_spectrogram_from_audio.py

In [0]:
import os
# from config import rawDatasetPath
# from config import pixelPerSecond
# from config import spectrogramsPath
# from config import sliceSize
from subprocess import Popen,PIPE,STDOUT
from PIL import Image
# from config import slicesPath

# currentPath=os.getcwd()
# print(currentPath)
def createSpectrograms():
	
	genres=os.listdir(rawDatasetPath)
	for genre in genres:
		genrePath=os.path.join(rawDatasetPath,genre)
		print("Start Generating spectrogram for --{}-- genre".format(genre))
		musics=os.listdir(genrePath)
		for i,music in enumerate(musics):
			createSpectrogramFromAudio(os.path.join(genrePath,music),music,genre)
			print("Spectrogram Created {} / {}".format(i+1,len(musics)))

	# creating slices for every spectrogram
	print("Start creating slices..")
	createSlices()

def createSpectrogramFromAudio(musicPath,musicName,genre):

	if not os.path.exists(spectrogramsPath):
		os.makedirs(spectrogramsPath)
	spectrogramGenrePath=os.path.join(spectrogramsPath,genre)

	if not os.path.exists(spectrogramGenrePath):
		os.makedirs(spectrogramGenrePath)

	#This below command is used to create the spectrogram from the audio
	command='sox '+"'"+musicPath+"'"+' -n spectrogram -Y 200 -X {} -m -r -o "{}.png"'.format(pixelPerSecond,
		                      os.path.join(spectrogramGenrePath,musicName[:-4])) 
	#Popen class is used to run the command in the shell
	popen=Popen(command,shell=True,stdin=PIPE,stdout=PIPE,stderr=STDOUT,close_fds=True,cwd=currentPath)

	output,errors=popen.communicate()
	print(output)
	if errors:
		print(errors)

def createSlices():
	if not os.path.exists(slicesPath):
		os.makedirs(slicesPath)
	genres=os.listdir(spectrogramsPath)
	for genre in genres:
		genrePath=os.path.join(slicesPath,genre)
		spectrogramGenrePath=os.path.join(spectrogramsPath,genre)
		if not os.path.exists(genrePath):
			os.makedirs(genrePath)
		for imageName in os.listdir(spectrogramGenrePath):
			createSliceForSpectrogram(imageName,os.path.join(genrePath,imageName[:-4]),spectrogramGenrePath)

def createSliceForSpectrogram(spectrogramName,slicePath,spectrogramGenrePath):
	image=Image.open(os.path.join(spectrogramGenrePath,spectrogramName))
	print(image)
	width,height=image.size

	#compute approximately number of 128*128 samples
	nb_slices=int(width/sliceSize)
	for i in range(nb_slices):
		print("Creating slice: ",(i+1),"/",nb_slices," for ",spectrogramName)
		startPixel=i*sliceSize
		imageCrop=image.crop((startPixel,0,startPixel+sliceSize,sliceSize)) # croping the image into 128*128 pixel
		imageCrop.save(slicePath+"_{}.png".format(i+1)) 

#createSpectrograms()

#### datasetTools.py

In [0]:
# from config import slicesPath
from random import shuffle
# from config import datasetPath
import pickle
# from config import sliceSize
# from config import validationRatio
# from config import testRatio
import os
from PIL import Image
import numpy as np


def getDataset(sliceSize,validationRatio,testRatio,mode):

	if not os.path.exists(datasetPath+"/train_X.pkl"):
		createDatasetFromSlices(sliceSize,validationRatio,testRatio)
	else:
		print("Using existing data")
	return loadDataset(mode)


def loadDataset(mode):
	if mode=="train":
		print("Loading training and validation dataset...")
		with open("{}/train_X.pkl".format(datasetPath),'rb') as f:
			train_X=pickle.load(f)
		with open("{}/train_y.pkl".format(datasetPath),'rb') as f:
			train_y=pickle.load(f)
		with open("{}/validation_X.pkl".format(datasetPath),'rb') as f:
			validation_X=pickle.load(f)
		with open("{}/validation_y.pkl".format(datasetPath),'rb') as f:
			validation_y=pickle.load(f)
		print("Training and validation dataset loaded")
		return train_X,train_y,validation_X,validation_y

	else:
		print("Loading testing dataset...")
		with open("{}/test_X.pkl".format(datasetPath),'rb') as f:
			test_X=pickle.load(f)
		with open("{}/test_y.pkl".format(datasetPath),'rb') as f:
			test_y=pickle.load(f)
		print("Testing dataset loaded")
		return test_X,test_y


def createDatasetFromSlices(sliceSize,validationRatio,testRatio):
  
  print("Start Creating Dataset...")
  genres=os.listdir(slicesPath)
#   data=[]
#   for genre in genres:
#     for imageName in os.listdir(os.path.join(slicesPath,genre)):
#       imgData=getImageData(os.path.join(slicesPath,genre)+"/"+imageName,sliceSize)
#       label=[1 if genre==g else 0 for g in genres]
#       print(label)
#       data.append((imgData,label))
      
#   shuffle(data)
#   X,y=zip(*data)
  
#   #split data
#   validationNb=int(len(X)*validationRatio)
#   testNb=int(len(X)*testRatio)
#   trainNb=len(X)-(validationNb+testNb)
  
#   #prepare for Tflearn at the same time
#   train_X=np.array(X[:trainNb]).reshape([-1,sliceSize,sliceSize,1])
#   train_y=np.array(y[:trainNb])
#   validation_X=np.array(X[trainNb:trainNb+validationNb]).reshape(-1,sliceSize,sliceSize,1)
#   validation_y=np.array(y[trainNb:trainNb+validationNb])
#   test_X=np.array(X[-testNb:]).reshape(-1,sliceSize,sliceSize,1)
#   test_y=np.array(y[-testNb:])
#   print("Dataset successfully created ")
  
  trainSet=[]
  testSet=[]
  validationSet=[]
  
  for genre in genres:
    
    print(genres)
    print(genre)
    images=os.listdir(os.path.join(slicesPath,genre))
    shuffle(images)
    nb_validation=int(len(images)*validationRatio)
    nb_test=int(len(images)*testRatio)
    print("images",len(images))
    print("validation",nb_validation)
    print("test",nb_test)
    
    i=0;
    for _ in range(len(images)-nb_validation-nb_test):
      print("train",str(i))
      imgData=getImageData(os.path.join(slicesPath,genre)+"/"+images[i],sliceSize)
      label=[1 if genre==g else 0 for g in genres]
      print(label)
      trainSet.append((imgData,label))
      i+=1
       
    for _ in range(nb_validation):
      print("validation",str(i))
      imgData=getImageData(os.path.join(slicesPath,genre)+"/"+images[i],sliceSize)
      label=[1 if genre==g else 0 for g in genres]
      print(label)
      validationSet.append((imgData,label))
      i+=1
      
    for _ in range(nb_test):
      print("test",str(i))
      imgData=getImageData(os.path.join(slicesPath,genre)+"/"+images[i],sliceSize)
      label=[1 if genre==g else 0 for g in genres]
      print(label)
      testSet.append((imgData,label))
      i+=1
      
  train_X,train_y=zip(*trainSet)
  validation_X,validation_y=zip(*validationSet)
  test_X,test_y=zip(*testSet)
  
  train_X=np.array(train_X).reshape(-1,sliceSize,sliceSize,1)
  train_y=np.array(train_y)
  validation_X=np.array(validation_X).reshape(-1,sliceSize,sliceSize,1)
  validation_y=np.array(validation_y)
  test_X=np.array(test_X).reshape(-1,sliceSize,sliceSize,1)
  test_y=np.array(test_y)
  print("Dataset successfully created ")
  
  # save the dataset 
  saveDataset(train_X,train_y,validation_X,validation_y,test_X,test_y)

def saveDataset(train_X,train_y,validation_X,validation_y,test_X,test_y):
	if not os.path.exists(datasetPath):
		os.makedirs(datasetPath)

	print("Start Saving the dataset...")
	with open("{}/train_X.pkl".format(datasetPath),'wb') as f:
		pickle.dump(train_X,f)
	with open("{}/train_y.pkl".format(datasetPath),'wb') as f:
		pickle.dump(train_y,f)
	with open("{}/validation_X.pkl".format(datasetPath),'wb') as f:
		pickle.dump(validation_X,f)
	with open("{}/validation_y.pkl".format(datasetPath),'wb') as f:
		pickle.dump(validation_y,f)
	with open("{}/test_X.pkl".format(datasetPath),'wb') as f:
		pickle.dump(test_X,f)
	with open("{}/test_y.pkl".format(datasetPath),'wb') as f:
		pickle.dump(test_y,f)

	print("Dataset successfully Saved")


def getImageData(imagePath,imageSize):
	image=Image.open(imagePath)
	image=image.resize((imageSize,imageSize),resample=Image.ANTIALIAS)
	image=np.asarray(image,dtype=np.uint8).reshape(imageSize,imageSize,1)
	image=image/255
	return image



#### Installing tflearn

In [5]:
!pip install tflearn

Collecting tflearn
[?25l  Downloading https://files.pythonhosted.org/packages/16/ec/e9ce1b52e71f6dff3bd944f020cef7140779e783ab27512ea7c7275ddee5/tflearn-0.3.2.tar.gz (98kB)
[K    10% |███▎                            | 10kB 13.2MB/s eta 0:00:01[K    20% |██████▋                         | 20kB 1.9MB/s eta 0:00:01[K    31% |██████████                      | 30kB 2.2MB/s eta 0:00:01[K    41% |█████████████▎                  | 40kB 2.1MB/s eta 0:00:01[K    51% |████████████████▋               | 51kB 2.2MB/s eta 0:00:01[K    62% |████████████████████            | 61kB 2.6MB/s eta 0:00:01[K    72% |███████████████████████▎        | 71kB 2.9MB/s eta 0:00:01[K    83% |██████████████████████████▋     | 81kB 2.7MB/s eta 0:00:01[K    93% |██████████████████████████████  | 92kB 3.0MB/s eta 0:00:01[K    100% |████████████████████████████████| 102kB 3.2MB/s 
Building wheels for collected packages: tflearn
  Running setup.py bdist_wheel for tflearn ... [?25l- \ done
[?25h  S

#### model.py

In [0]:
import tflearn
from tflearn.layers.conv import conv_2d,max_pool_2d
from tflearn.layers.core import input_data,dropout,fully_connected
from tflearn.layers.estimator import regression

def createModel(nbClasses,imageSize):
  
  print("Creating model...")
  input_layer=input_data(shape=[None,imageSize,imageSize,1],name='input')
  
  convNetwork=conv_2d(incoming=input_layer,nb_filter=32,filter_size=3,activation='elu',weights_init='Xavier')
  convNetwork=max_pool_2d(incoming=convNetwork,kernel_size=[2,2])
  
  convNetwork=conv_2d(incoming=convNetwork,nb_filter=64,filter_size=3,activation='elu',weights_init='Xavier')
  convNetwork=max_pool_2d(incoming=convNetwork,kernel_size=[2,2])
  
  convNetwork=conv_2d(incoming=convNetwork,nb_filter=128,filter_size=3,activation='elu',weights_init='Xavier')
  convNetwork=max_pool_2d(incoming=convNetwork,kernel_size=[2,2])
  
  convNetwork=conv_2d(incoming=convNetwork,nb_filter=256,filter_size=3,activation='elu',weights_init='Xavier')
  convNetwork=max_pool_2d(incoming=convNetwork,kernel_size=[2,2])
  
  convNetwork=conv_2d(incoming=convNetwork,nb_filter=512,filter_size=3,activation='elu',weights_init='Xavier')
  convNetwork=max_pool_2d(incoming=convNetwork,kernel_size=[2,2])
  
  convNetwork=fully_connected(incoming=convNetwork,n_units=1024,activation='elu') # 1024 nodes in hidden layer
  convNetwork=dropout(incoming=convNetwork,keep_prob=0.5)
  
  convNetwork=fully_connected(incoming=convNetwork,n_units=1024,activation='elu') # 1024 nodes in hidden layer
  convNetwork=dropout(incoming=convNetwork,keep_prob=0.5)
  
  convNetwork=fully_connected(incoming=convNetwork,n_units=nbClasses,activation='softmax') # nbClasses nodes in output layer
  convNetwork=regression(incoming=convNetwork,metric='accuracy',optimizer='rmsprop',loss='categorical_crossentropy') # it will optimize the loss
  
  model=tflearn.DNN(convNetwork)
  print("Model successfully created! ")
  return model




#### train_and_test_the_model.py

In [7]:
import argparse
# from model import createModel
# from config import validationRatio,testRatio,sliceSize,nbEpoch,batchSize
# from create_spectrogram_from_audio import createSpectrograms
# from config import slicesPath
# from datasetTools import getDataset
import os
import random
import string 
import numpy as np 


# parser=argparse.ArgumentParser()
# parser.add_argument("mode",help="Train or tests the CNN",nargs='+',choices=["train","test","slice"])
# args=parser.parse_args()

print("--------------------------")
print("| ** Config ** ")
print("| Validation ratio: {}".format(validationRatio))
print("| Test ratio: {}".format(testRatio))
print("| Slice size: {}".format(sliceSize))
print("--------------------------")

type='test'

if "slice"==type:
	print("inside slice")
	createSpectrograms() # it will create slices for every spectrograms
	sys.exit()

nbClasses=len(os.listdir(slicesPath))

#create model
model=createModel(nbClasses,sliceSize)

if "train"==type:
  #create or load new dataset
  train_X,train_y,validation_X,validation_y=getDataset(sliceSize,validationRatio,testRatio,mode="train")
  
  #Define run id for graphs
  run_id = "MusicGenres - "+str(batchSize)+" "+''.join(random.SystemRandom().
                                                       choice(string.ascii_uppercase) for _ in range(nbClasses))
  
  #Train the model
  print(len(train_X))
  print(len(train_y))
  print(len(validation_X))
  print(len(validation_y))
  model.fit(train_X,train_y,n_epoch=nbEpoch,batch_size=batchSize,shuffle=True,
            validation_set=(validation_X,validation_y),snapshot_step=100,show_metric=True,run_id=run_id)
  
  print("Model Trained")
  
  #Save trained model
  print("Saving the weights")
  if not os.path.exists(driveLocation+'TrainedModel'):
    os.makedirs(driveLocation+'TrainedModel')
  model.save(driveLocation+'TrainedModel/genreClassifierModel.tflearn')
  print("Weight saved!")

if "test"==type:
  
  #load the test Dataset
  test_X,test_y=getDataset(sliceSize,validationRatio,testRatio,mode="test")
  
  #Load the weights(model)
  print("Loading weights...")
  model.load(driveLocation+'TrainedModel/genreClassifierModel.tflearn')
  print('weights loaded!')
  
  genres=os.listdir(slicesPath)
  
  predictions=model.predict(test_X) # returns the list of prediction
  best_class_indices=np.argmax(predictions,axis=1)
  best_class_probabilities=predictions[np.arange(len(best_class_indices)),best_class_indices]
  
  y=np.argmax(test_y,axis=1)
  for i in range(len(best_class_indices)):
    genreType=''
    if best_class_probabilities[i]>0.52:
      genreType=genres[best_class_indices[i]]
    else:
      genreType='Unknown'
    print('%4d  %s: %.3f  %s' % (i,genreType, best_class_probabilities[i],genres[y[i]]))
    
  accuracy=np.mean(np.equal(best_class_indices,y))
  print("Accuracy : {}".format(accuracy))
  testAccuracy=model.evaluate(test_X,test_y)[0]
  print("Test Accuracy : {}".format(testAccuracy))




--------------------------
| ** Config ** 
| Validation ratio: 0.3
| Test ratio: 0.1
| Slice size: 128
--------------------------
Creating model...
Instructions for updating:
keep_dims is deprecated, use keepdims instead
Model successfully created! 
Using existing data
Loading testing dataset...
Testing dataset loaded
Loading weights...
INFO:tensorflow:Restoring parameters from /content/drive/My Drive/MusicGenreClassification/TrainedModel/genreClassifierModel.tflearn
weights loaded!
   0  funksoulrnb: 0.987  rock
   1  rock: 0.986  rock
   2  rock: 0.960  rock
   3  jazz: 0.933  rock
   4  jazz: 0.561  rock
   5  rock: 0.994  rock
   6  rock: 0.979  rock
   7  rock: 0.867  rock
   8  rock: 0.638  rock
   9  jazz: 0.973  rock
  10  rock: 1.000  rock
  11  rock: 0.819  rock
  12  pop: 1.000  rock
  13  rock: 0.720  rock
  14  rock: 0.991  rock
  15  rock: 0.959  rock
  16  blues: 0.934  rock
  17  rock: 0.964  rock
  18  rock: 0.999  rock
  19  electronic: 0.627  rock
  20  jazz: 0.945  

#### Installation of sox
To create spectrogram from a music we need to install sox.

In [8]:
!apt-get install sox libsox-dev libsox-fmt-all

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  libao-common libao4 libasound2 libasound2-data libasyncns0 libflac8 libgsm1
  libid3tag0 libltdl7 libmad0 libmagic-mgc libmagic1 libmp3lame0 libogg0
  libopencore-amrnb0 libopencore-amrwb0 libpulse0 libsndfile1 libsox-fmt-alsa
  libsox-fmt-ao libsox-fmt-base libsox-fmt-mp3 libsox-fmt-oss libsox-fmt-pulse
  libsox3 libtwolame0 libvorbis0a libvorbisenc2 libvorbisfile3 libwavpack1
  libwrap0
Suggested packages:
  libaudio2 libsndio6.1 libasound2-plugins alsa-utils file pulseaudio
The following NEW packages will be installed:
  libao-common libao4 libasound2 libasound2-data libasyncns0 libflac8 libgsm1
  libid3tag0 libltdl7 libmad0 libmagic-mgc libmagic1 libmp3lame0 libogg0
  libopencore-amrnb0 libopencore-amrwb0 libpulse0 libsndfile1 libsox-dev
  libsox-fmt-all libsox-fmt-alsa libsox-fmt-ao libsox-fmt-base libsox-fmt-mp3
  libsox-fmt-oss li

#### predict_the_genre_of_music.py
In this script you will enter the name of the music and it will tell you the genre of the music.
First, we will get the spectrogram from the music and then we will divide the spectrogram into fixed sliceSize.
we will pass the slice images of spectrogram into cnn and then it will return the probablity of founding the image in a particular class.
We will select that genre which will get higher vote.

In [10]:
import argparse
# from model import createModel
# from config import validationRatio,testRatio,sliceSize,nbEpoch,batchSize
# from create_spectrogram_from_audio import createSpectrograms
# from config import slicesPath
# from datasetTools import getDataset
import os
import random
import string 
import numpy as np 
# from config import sliceSize
from collections import Counter

currentPath=os.getcwd()

def createSpectrogramForOneMusic(musicPath,sliceSize):
  
  temp=musicPath.split('/')
  musicName=temp[len(temp)-1]
  musicName=musicName[:-4]
  command='sox '+"'"+musicPath+"'"+' -n spectrogram -Y 200 -X {} -m -r -o "{}.png"'.format(pixelPerSecond,
                                                                                           driveLocation+musicName)
  
  popen=Popen(command,shell=True,stdin=PIPE,stdout=PIPE,stderr=STDOUT,close_fds=True,cwd=currentPath)
  
  output,errors=popen.communicate()
  print(output)
  if errors:
    print(errors)
    
  image=Image.open(driveLocation+musicName+".png") #Opening the image
  width,height=image.size
  nbSamples=int(width/sliceSize)
  os.remove(driveLocation+musicName+".png") # Removing the image(no need of it)
  slicedImages=[]
  for i in range(nbSamples):
    print("Creating slice: ",(i+1),"/",nbSamples," for ",musicName+'.png')
    startPixel=i*sliceSize
    imageCrop=image.crop((startPixel,0,startPixel+sliceSize,sliceSize)) # croping the image into 128*128 pixel
    slicedImages.append(imageCrop)
    
  return slicedImages

def getImageDataFromSlice(musicPath,imageSize):
  slicedImages=createSpectrogramForOneMusic(musicPath,imageSize)
  imageData=[]
  for image in slicedImages:
    image=image.resize((imageSize,imageSize),resample=Image.ANTIALIAS)
    image=np.asarray(image,dtype=np.uint8).reshape(imageSize,imageSize,1)
    image=image/255
    imageData.append(image)
  return imageData

def predictGenre(musicPath,imageSize):
	imagesData=getImageDataFromSlice(musicPath,imageSize)
	#Load the weights(model)
	print("Loading weights...")
	model.load(driveLocation+'TrainedModel/genreClassifierModel.tflearn')
	print('weights loaded!')

	predictions=model.predict(imagesData) # returns the list of prediction
	best_class_indices=np.argmax(predictions,axis=1)
	best_class_probabilities=predictions[np.arange(len(best_class_indices)),best_class_indices]

	count=0
	most_common,num_most_common = Counter(best_class_indices).most_common(1)[0] # return value which repeate most and number of times it repeates

	sum=0
	if num_most_common>1:
		for i in range(len(best_class_indices)):
			if most_common==best_class_indices[i]:
				sum+=best_class_probabilities[i]
		meanProbability=sum/len(best_class_indices)
		print('Higher vote ---  %s:  %.3f' % (genres[most_common],meanProbability*100))

	else:
		print('maximum prob --- %s:  %.3f ' %(genres[np.argmax(best_class_probabilities)],best_class_probabilities[np.argmax[best_class_probabilities]]*100))
    
if __name__=='__main__':
  predictGenre(driveLocation+'musics/22-Lonely.mp3',sliceSize)

b''
Creating slice:  1 / 3  for  22-Lonely.png
Creating slice:  2 / 3  for  22-Lonely.png
Creating slice:  3 / 3  for  22-Lonely.png
Loading weights...
INFO:tensorflow:Restoring parameters from /content/drive/My Drive/MusicGenreClassification/TrainedModel/genreClassifierModel.tflearn
weights loaded!
Higher vote ---  electronic:  66.463
