Use dataset uploaded to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

Check if dataset is correctly set up

In [None]:
import os
pathToDirectory='gdrive/MyDrive/DLB/genres/'
genres = [a for a in os.listdir(pathToDirectory) if '.' not in a]
print(genres)

Function to generate spectrograms, which we will use as an input to train our model 80 images for train, 20 images for test

In [4]:
import librosa
import matplotlib.pyplot as plt
import librosa.display

def generateSpectrogram(genre):
  imageNames = os.listdir(pathToDirectory+genre)
  if os.path.exists('spectrogram/train/'+genre)==False:
    os.makedirs('spectrogram/train/'+genre)
  if os.path.exists('spectrogram/test/'+genre)==False:
    os.makedirs('spectrogram/test/'+genre)
  print(genre)
  trainNames = imageNames[:80]
  testNames = imageNames[80:]
  fileCounter = 1
  for fileName in trainNames:
    audioFile , samplingRate = librosa.load(pathToDirectory+genre+'/'+fileName)
    audioFileFourier = librosa.stft(audioFile)
    audioFileDB = librosa.amplitude_to_db(abs(audioFileFourier))
    librosa.display.specshow(audioFileDB)
    plt.savefig('spectrogram/train/'+genre+'/'+str(fileCounter)+'.png')
    plt.close()
    fileCounter+=1  
  fileCounter = 1
  for fileName in testNames:
    fileCounter+=1
    audioFile , samplingRate = librosa.load(pathToDirectory+genre+'/'+fileName)
    audioFileFourier = librosa.stft(audioFile)
    audioFileDB = librosa.amplitude_to_db(abs(audioFileFourier))
    librosa.display.specshow(audioFileDB)
    plt.savefig('spectrogram/test/'+genre+'/'+str(fileCounter)+'.png')
    plt.close()
    fileCounter+=1

Generate spectrogram for every genre

In [None]:
for genre in genres:
  generateSpectrogram(genre)

Imports

In [3]:
import matplotlib.pyplot as plt

import seaborn as sns

import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D , MaxPool2D , Flatten , Dropout 
from keras.optimizers import Adam

from sklearn.metrics import classification_report,confusion_matrix

import tensorflow as tf

import cv2
import os

import numpy as np

Function to get Data from dataset to an array

In [4]:
imageSize = 256
def getData(dataDirectory):
    data = [] 
    for genre in genres: 
        path = os.path.join(dataDirectory, genre)
        genreIndex = genres.index(genre)
        images = [image for image in os.listdir(path) if '.ipynb_checkpoints' not in image]
        for image in images:
          imageArray = cv2.imread(os.path.join(path, image))[...,::-1] 
          resizedImageArray = cv2.resize(imageArray, (imageSize, imageSize)) # Reshape images to normalized size
          data.append([resizedImageArray, genreIndex])
    return np.array(data)

call the function for Train and Validation sets

In [None]:
trainingSet = getData('spectrogram/train')
validationSet = getData('spectrogram/test')

In [39]:
xTraining = []
yTraining = []
xValidation = []
yValidation = []

for feature, label in trainingSet:
  xTraining.append(feature)
  yTraining.append(label)

for feature, label in validationSet:
  xValidation.append(feature)
  yValidation.append(label)

xTraining = np.array(xTraining) / 255
xValidation = np.array(xValidation) / 255

xTraining.reshape(-1, imageSize, imageSize, 1)
yTraining = np.array(yTraining)

xValidation.reshape(-1, imageSize, imageSize, 1)
yValidation = np.array(yValidation)

Based on the lectures we decided that it would be a good structure of our network, convulotional layers, pooling layers, dropout and dense layers

In [None]:
model = Sequential()
model.add(Conv2D(32,3,padding="same", activation="relu", input_shape=(256,256,3)))
model.add(MaxPool2D())
model.add(Conv2D(32, 3, padding="same", activation="relu"))
model.add(MaxPool2D())
model.add(Conv2D(64, 3, padding="same", activation="relu"))
model.add(MaxPool2D())
model.add(Dropout(0.4))
model.add(Flatten())
model.add(Dense(128,activation="relu"))
model.add(Dense(10, activation="softmax"))

model.summary()

We chose Adam as an optimizer with learning rate of 0.001, we might change that value in the future

In [72]:
opt = Adam(learning_rate=0.001)
model.compile(optimizer = opt , loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) , metrics = ['accuracy'])

In [None]:
%load_ext tensorboard

Setting up tensorboard

In [74]:
%reload_ext tensorboard

logFolder = 'logs'

import datetime
logFolder = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

from tensorflow.keras.callbacks import TensorBoard

callbacks = [TensorBoard(log_dir=logFolder,
                         histogram_freq=1,
                         write_graph=True,
                         write_images=True,
                         update_freq='epoch',
                         profile_batch=2,
                         embeddings_freq=1)]

First run training to check if training is working correctly, 50 epochs

In [None]:
history = model.fit(xTraining,yTraining,epochs = 50, validation_data = (xValidation, yValidation), callbacks=callbacks)


In [None]:
import pickle
model.save_weights('50_epoch.cpkt')

pickle.dump(history.history, open('history_50_epoch.pkl','wb'))

Plot training and validation accuracy

In [None]:
accuracy = history.history['accuracy']
validationAccuracy = history.history['val_accuracy']
loss = history.history['loss']
validationLoss = history.history['val_loss']

epochsRange = range(50)

plt.figure(figsize=(25, 15))
plt.subplot(2, 2, 1)
plt.plot(epochsRange, accuracy, label='Training Accuracy')
plt.plot(epochsRange, validationAccuracy, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(2, 2, 2)
plt.plot(epochsRange, loss, label='Training Loss')
plt.plot(epochsRange, validationLoss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()

Plot the accuracy and loss

In [None]:
from keras.metrics.metrics import Accuracy
import pickle
history = pickle.load(open('history_50_epoch.pkl','rb'))
accuracy = history['accuracy']
validationAccuracy = history['validationAccuracy']
loss = history['loss']
validationLoss = history['validationLoss']

epochsRange = range(50)
fig, (ax1, ax2) = plt.subplots(1,2, figsize=(15,6))
plt.rc('xtick', labelsize=10)
plt.rc('ytick', labelsize=10)
ax1.plot(epochsRange, accuracy, label='Training Accuracy', c = 'green', linewidth=4)
ax1.plot(epochsRange, validationAccuracy, label='Validation Accuracy', c='red', linewidth=4)
ax1.legend()
ax1.set_title('Training and Validation Accuracy',fontsize=18)
ax1.set_ylabel('Accuracy',fontsize=18)
ax1.set_xlabel('Epoch',fontsize=18)

ax2.plot(epochsRange, loss, label='Training Loss',c = 'green', linewidth=4)
ax2.plot(epochsRange, validationLoss, label='Validation Loss', c='red', linewidth=4)
ax2.legend()
ax2.set_title('Training and Validation Loss',fontsize=18)
ax2.set_ylabel('Loss',fontsize=18)
ax2.set_xlabel('Epoch',fontsize=18)
fig.tight_layout(pad=3.0)
plt.savefig('plot.png',bbox_inches = 'tight')
plt.clf()
     

Predict and generate classification report

In [None]:
predictions = np.argmax(model.predict(xValidation), axis=-1)
predictions = predictions.reshape(1,-1)[0]
print(classification_report(yValidation, predictions, target_names = genres))

Craete the confusion matrix

In [None]:
import seaborn as sn
import pandas as pd
import matplotlib.pyplot as plt
cm = confusion_matrix(yValidation, predictions)
df_cm = pd.DataFrame(cm, index = [i for i in genres],
              columns = [i for i in genres])
plt.figure(figsize = (10,7))
sn.heatmap(df_cm, annot=True,cmap="RdPu")
plt.savefig('confusion_matrix.png',bbox_inches = 'tight')

In [None]:
%tensorboard --logdir=logs/fit