In [315]:
#!pip install tensorflow librosa

In [316]:
# Imports
import tensorflow as tf
from tensorflow.python.keras.api import keras
from tensorflow.python.keras.api.keras.preprocessing.image import ImageDataGenerator
import os
from pathlib import Path
from IPython.core.display import HTML, display

import librosa

import numpy as np

from typing import Union, Any

In [317]:
# CONFIG

SAMPLE_RATE = None

# Dataset:
DATASET_PATH = "./data"

GENRES_PATH = f"{DATASET_PATH}/genres_original"

GENRES = os.listdir(GENRES_PATH)

# Training
BATCH_SIZE = 20


In [318]:
GENRE_2_LABEL = {k: v for v, k in enumerate(GENRES)}
LABEL_2_GENRE = {v: k for k,v in GENRE_2_LABEL.items()}
print(f"Genre to Label Dict: {GENRE_2_LABEL}")
print(f"Label to Genre Dict: {LABEL_2_GENRE}")

Genre to Label Dict: {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}
Label to Genre Dict: {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}


In [319]:
if not len(list(Path(f"{DATASET_PATH}/genres_original/disco").glob("*.wav"))) >= 100:
  print(f"""
    No subfolder 'genres_original/disco' found under {DATASET_PATH}.
    Please download the datasate from https://www.kaggle.com/andradaolteanu/gtzan-dataset-music-genre-classification and
    unzip it under {DATASET_PATH}.
  """)
  raise IOError


In [320]:
def preprocess_track(y: Union[np.ndarray, Any], sr=SAMPLE_RATE):
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.amplitude_to_db(S)
    return log_S

In [321]:
def load_data(genre_path=GENRES_PATH):
    track_input = []
    labels = []
    for genre in GENRES:
        genre_folder = f"{genre_path}/{genre}"
        label = GENRE_2_LABEL[genre]
        for track in os.listdir(genre_folder):

            if (track == "jazz.00054.wav"):
                # This track is broken 
                continue

            y, sr = librosa.load(f"{genre_folder}/{track}", sr=SAMPLE_RATE)
            
            track = preprocess_track(y, sr)
            
            # Esto es una chapuza: 
            track_input.append(track[:, :1290])
            labels.append(label)



    return np.array(track_input), np.array(labels)

In [322]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [323]:
tracks_, labels_ = load_data()

In [326]:
tracks, labels = tracks_, labels_
print(tracks[0])
print(tracks.shape)
shuffled_tracks , shuffled_labels = unison_shuffled_copies(tracks, labels)

tracks = np.expand_dims(shuffled_tracks, axis=3)
labels = shuffled_labels

print(tracks.shape)

train_datagen = ImageDataGenerator(rescale = 1./255, validation_split=0.2)

train_generator = train_datagen.flow(tracks, labels, batch_size=BATCH_SIZE, subset="training")
validation_generator = train_datagen.flow(tracks, labels, batch_size=BATCH_SIZE, subset="validation")



[[ 10.931824     5.324323    -2.5622883  ...   1.1041492   -3.8531473
    0.20791933]
 [  8.424408    10.457845    14.016918   ...  12.761856    10.764576
    4.328304  ]
 [ -8.4273205   -7.609818     9.51658    ...  11.335823    13.048315
   10.884599  ]
 ...
 [-11.547859   -11.547859   -11.547859   ... -11.547859   -11.547859
  -11.547859  ]
 [-11.547859   -11.547859   -11.547859   ... -11.547859   -11.547859
  -11.547859  ]
 [-11.547859   -11.547859   -11.547859   ... -11.547859   -11.547859
  -11.547859  ]]
(999, 128, 1290)
(999, 128, 1290, 1)


In [327]:
model = keras.models.Sequential([
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(128, 1290, 1)),
    tf.keras.layers.MaxPooling2D(2, 2),                                   
    
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),

    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(512, activation='relu'),
    tf.keras.layers.Dense(26, activation='softmax'),  
])

model.compile(loss = 'sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

history = model.fit(train_generator,
                     validation_data = validation_generator, 
                     validation_steps = len(tracks) / BATCH_SIZE,
                     epochs=20   
                     )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20


KeyboardInterrupt: 