In [251]:
import numpy as np
import librosa
import itertools
import os
import h5py
%matplotlib inline
import matplotlib.pyplot as plt
from collections import OrderedDict
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix

import torch 
import torchvision
import torch.nn as nn
import torch.nn.functional as F
import torchvision.transforms as transforms

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Activation
from keras.layers import Conv2D
from keras.layers import MaxPooling2D
from keras.layers import Dropout
from keras.layers import Flatten
from keras.layers import BatchNormalization


In [124]:
def melspectro(songs, n_fft = 1024, hop_length=512):
    melspec = lambda x: librosa.feature.melspectrogram(x, n_fft = n_fft,
                                                      hop_length = hop_length)[:,:,np.newaxis]
    
    transformed = map(melspec, songs)
    return np.array(list(transformed))

def split(X, y, window = 0.1, overlap = 0.5):
    temp_X = []
    temp_y = []
    
    xshape = X.shape[0]
    chunk = int(xshape*window)
    offset = int(chunk*(1.-overlap))

    splits = [X[i:i+chunk] for i in range(0, xshape-chunk+offset, offset)]
    for s in splits:
        temp_X.append(s)
        temp_y.append(y)
    return np.array(temp_X), np.array(temp_y)

def read_data(src_dir, genres, song_samples, spec_format, debug = True):    
    arr_specs = []
    arr_genres = []

    for x,_ in genres.items():
        folder = src_dir + x
        
        for root, subdirs, files in os.walk(folder):
            for file in files:
                file_name = folder + "/" + file
                signal, sr = librosa.load(file_name)
                signal = signal[:song_samples]
                if debug:
                    print("Reading file: {}".format(file_name))
                signals, y = split(signal, genres[x])
                specs = spec_format(signals)
                arr_genres.extend(y)
                arr_specs.extend(specs)
                
                
    return np.array(arr_specs), np.array(arr_genres)




In [160]:
source = '/Users/jadhamdan/Desktop/Final Project/genres dataset/genres/'
song_samples = 660000
genres = {'metal': 0, 'disco': 1, 'classical': 2, 'hiphop': 3, 'jazz': 4, 
          'country': 5, 'pop': 6, 'blues': 7, 'reggae': 8, 'rock': 9}

X, y = read_data(source, genres, song_samples, melspectro, debug=False)

In [162]:
# One hot encoding of the labels
y = to_categorical(y)

In [164]:
X_stack = np.squeeze(np.stack((X,) * 3, -1))

In [167]:
print(y.shape)
#use X_stack or X here:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify = y)

(19000, 128, 129, 3)
(19000, 10)


In [169]:
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(13300, 128, 129, 1) (5700, 128, 129, 1) (13300, 10) (5700, 10)


In [254]:
input_shape = X_train[0].shape
model = Sequential()
model.add(Conv2D(10, kernel_size=(3,3), strides=(1,1),
                 activation='relu', input_shape = input_shape))
model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))

model.add(Conv2D(20, kernel_size=(3,3), strides=(1,1),
                 activation='relu', input_shape = input_shape))
model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))

model.add(Conv2D(40, kernel_size=(3,3), strides=(1,1),
                 activation='relu', input_shape = input_shape))
model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))

model.add(Conv2D(80, kernel_size=(3,3), strides=(1,1),
                 activation='relu', input_shape = input_shape))
model.add(MaxPooling2D(pool_size = (2,2), strides=(2,2)))

model.add(Flatten())
model.add(Dense(10, activation='softmax'))
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_5 (Conv2D)            (None, 126, 127, 16)      160       
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 63, 63, 16)        0         
_________________________________________________________________
conv2d_6 (Conv2D)            (None, 61, 61, 32)        4640      
_________________________________________________________________
max_pooling2d_6 (MaxPooling2 (None, 30, 30, 32)        0         
_________________________________________________________________
conv2d_7 (Conv2D)            (None, 28, 28, 64)        18496     
_________________________________________________________________
max_pooling2d_7 (MaxPooling2 (None, 14, 14, 64)        0         
_________________________________________________________________
conv2d_8 (Conv2D)            (None, 12, 12, 128)       73856     
__________

In [257]:
model.compile(loss=keras.losses.categorical_crossentropy,
             optimizer=keras.optimizers.Adam(),
             metrics=['accuracy'])
hist = model.fit(X_train, y_train, batch_size=32, epochs=50, verbose=1, validation_data = (X_test, y_test))

Instructions for updating:
Use tf.cast instead.
Train on 13300 samples, validate on 5700 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50

KeyboardInterrupt: 

In [259]:
y_pred = np.argmax(model.predict(X_test), axis = 1)
y_true = np.argmax(y_test, axis = 1)
mat = confusion_matrix(y_true, y_pred)

sns.heatmap(mat, square=True, annot=True, cbar=False)
plt.xlabel('predicted value')
plt.ylabel('true value');

plt.show()

model.save('/Users/jadhamdan/Desktop/Final Project/genres dataset/cnn.h5')

NameError: name 'sns' is not defined