In [14]:
import pandas as pd
import numpy as np
import tensorflow as tf
import glob
import audioread as ar
import random
from urllib.request import urlretrieve
import tarfile
import math
import librosa
import time
from multiprocessing import Pool
import itertools

In [15]:
class MusicFile():
    def __init__(self, file_path):
        self.file_path = file_path
        self.file_name = file_path.split('/')[-1]
        self.audio, _ = librosa.core.load(file_path, duration=30)
        self.genre = file_path.split('/')[-2]

In [16]:
all_file_paths = glob.glob('genres/*/*.au')

def add_music_file(audio_file_path):
    return MusicFile(audio_file_path)

start_time = time.time()

p = Pool(20)
music_files = p.map(add_music_file, all_file_paths)

print("Took", time.time() - start_time)

Took 160.08117771148682


In [17]:
all_genres = sorted(set(map(lambda music_file: music_file.genre, music_files)))

In [18]:
def genre_to_int(genre):
    return all_genres.index(genre)

In [19]:
def genre_to_onehot(genre):
    array = np.zeros(len(all_genres))
    array[genre_to_int(genre)] = 1
    return array

In [20]:
music_files = sorted(music_files, key=lambda music_file: music_file.file_path)

x_train = []
y_train = []

x_test = []
y_test = []

for genre, genre_music_file_group in itertools.groupby(music_files, key=lambda music_file: music_file.genre):
    one_hot_genre = genre_to_onehot(genre)
    for i, music_file in enumerate(genre_music_file_group):
        if i < 50:
            x_train.append(music_file.audio[:100000])
            y_train.append(one_hot_genre)
        else:
            x_test.append(music_file.audio[:100000])
            y_test.append(one_hot_genre)

In [21]:
def normalize(data):
    for d in data:
        max_d = max(d)
        min_d = min(d)
        max_min = max_d-min_d
        for e in d:
            e += max_min
        max_d = max(d)
        min_d = min(d)
        for e in d:
            e = (e-min_d)/(max_d-min_d)
    return data

In [22]:
normalized_x_train = normalize(x_train)
normalized_x_test = normalize(x_test)

print(type(normalized_x_train), type(normalized_x_test))

<class 'list'> <class 'list'>


In [23]:
normalized_x_train = np.array(normalized_x_train)
normalized_x_test = np.array(normalized_x_test)

print(type(normalized_x_train), type(normalized_x_test))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


In [24]:
print(type(normalized_x_train), type(normalized_x_test))
print(normalized_x_train.shape, normalized_x_train[0].shape)
print(np.hstack(normalized_x_train).shape)

<class 'numpy.ndarray'> <class 'numpy.ndarray'>
(500, 100000) (100000,)
(50000000,)


In [25]:
normalized_x_train = np.hstack(normalized_x_train)
print(normalized_x_train.shape)

(50000000,)


In [27]:
normalized_x_train = normalized_x_train.reshape((500,100,1000,1))
print(normalized_x_train.shape)

(500, 100, 1000, 1)


In [28]:
normalized_x_test = np.hstack(normalized_x_test)
print(normalized_x_test.shape)

(50000000,)


In [29]:
normalized_x_test = normalized_x_test.reshape((500,100,1000,1))
print(normalized_x_test.shape)

(500, 100, 1000, 1)


In [30]:
model = tf.keras.models.Sequential()
# Must define the input shape in the first layer of the neural network
model.add(tf.keras.layers.Conv2D(filters=64, kernel_size=2, padding='same', activation='relu', input_shape=(100,1000,1))) 
model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
model.add(tf.keras.layers.Dropout(0.3))
model.add(tf.keras.layers.Conv2D(filters=32, kernel_size=2, padding='same', activation='relu'))
model.add(tf.keras.layers.MaxPooling2D(pool_size=2))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.Dropout(0.5))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
# Take a look at the model summary
model.summary()

W0624 01:18:08.778198 140342130190080 deprecation.py:506] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 100, 1000, 64)     320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 50, 500, 64)       0         
_________________________________________________________________
dropout (Dropout)            (None, 50, 500, 64)       0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 50, 500, 32)       8224      
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 25, 250, 32)       0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 25, 250, 32)       0         
_________________________________________________________________
flatten (Flatten)            (None, 200000)            0

In [31]:
model.compile(loss='categorical_crossentropy',
             optimizer='adam',
             metrics=['accuracy'])

In [32]:
print(normalized_x_train.shape, np.array(y_train).shape)

model.fit(normalized_x_train,
         np.array(y_train),
         batch_size=100,
         epochs=10,
         validation_split=0.1)

(500, 100, 1000, 1) (500, 10)
Train on 450 samples, validate on 50 samples
Epoch 1/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7fa374d90860>

In [33]:
# Evaluate the model on test set
score = model.evaluate(normalized_x_test, np.array(y_test), verbose=0)
# Print test accuracy
print('\n', 'Test accuracy:', score[1])


 Test accuracy: 0.2
