In [19]:
#!pip install tensorflow librosa

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JorgeRuizDev/SpotMyFM/blob/main/Ludwig/notebooks/gtzan/gtzan_demo_train.ipynb)

In [20]:
# Imports
import tensorflow as tf
from tensorflow.python.keras.api import keras
from tensorflow.python.keras.api.keras.preprocessing.image import ImageDataGenerator

import os
from pathlib import Path
from IPython.core.display import HTML, display

import librosa

import numpy as np

from typing import Union, Any

In [21]:
print(tf. __version__)
print(tf.config.list_physical_devices('GPU'))


2.7.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [22]:
# CONFIG

SAMPLE_RATE = None

# Dataset:
DATASET_PATH = "./data"

GENRES_PATH = f"{DATASET_PATH}/genres_original"

GENRES = os.listdir(GENRES_PATH)

# Training
BATCH_SIZE = 20


In [23]:
GENRE_2_LABEL = {k: v for v, k in enumerate(GENRES)}
LABEL_2_GENRE = {v: k for k,v in GENRE_2_LABEL.items()}
print(f"Genre to Label Dict: {GENRE_2_LABEL}")
print(f"Label to Genre Dict: {LABEL_2_GENRE}")

Genre to Label Dict: {'blues': 0, 'classical': 1, 'country': 2, 'disco': 3, 'hiphop': 4, 'jazz': 5, 'metal': 6, 'pop': 7, 'reggae': 8, 'rock': 9}
Label to Genre Dict: {0: 'blues', 1: 'classical', 2: 'country', 3: 'disco', 4: 'hiphop', 5: 'jazz', 6: 'metal', 7: 'pop', 8: 'reggae', 9: 'rock'}


In [24]:
if not len(list(Path(f"{DATASET_PATH}/genres_original/disco").glob("*.wav"))) >= 100:
  print(f"""
    No subfolder 'genres_original/disco' found under {DATASET_PATH}.
    Please download the datasate from https://www.kaggle.com/andradaolteanu/gtzan-dataset-music-genre-classification and
    unzip it under {DATASET_PATH}.
  """)
  raise IOError


In [25]:
def preprocess_track(y: Union[np.ndarray, Any], sr=SAMPLE_RATE):
    S = librosa.feature.melspectrogram(y, sr=sr, n_mels=128)
    log_S = librosa.amplitude_to_db(S)
    return log_S

In [26]:
def load_data(genre_path=GENRES_PATH):
    track_input = []
    labels = []
    for genre in GENRES:
        genre_folder = f"{genre_path}/{genre}"
        label = GENRE_2_LABEL[genre]
        for track in os.listdir(genre_folder):

            if (track == "jazz.00054.wav"):
                # This track is broken 
                continue

            y, sr = librosa.load(f"{genre_folder}/{track}", sr=SAMPLE_RATE)
            
            track = preprocess_track(y, sr)
            
            # Esto es una chapuza: 
            track_input.append(track[:, :1290])
            labels.append(label)



    return np.array(track_input), np.array(labels)

In [27]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [28]:
tracks_, labels_ = load_data()

In [29]:
tracks, labels = tracks_, labels_

shuffled_tracks , shuffled_labels = unison_shuffled_copies(tracks, labels)

tracks = np.expand_dims(shuffled_tracks, axis=3)
labels = shuffled_labels


train_datagen = ImageDataGenerator(rescale = 1./255, validation_split=0.2)

train_generator = train_datagen.flow(tracks, labels, batch_size=BATCH_SIZE, subset="training")
validation_generator = train_datagen.flow(tracks, labels, batch_size=BATCH_SIZE, subset="validation")



In [30]:
model = keras.models.Sequential([
    tf.keras.layers.Conv2D(32, (5,5), activation=keras.layers.LeakyReLU(alpha=0.01), input_shape=(128, 1290, 1)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (5,5), activation=keras.layers.LeakyReLU(alpha=0.01), input_shape=(128, 1290, 1)),
    tf.keras.layers.MaxPooling2D(2, 2),
    tf.keras.layers.Conv2D(32, (5,5), activation=keras.layers.LeakyReLU(alpha=0.01), input_shape=(128, 1290, 1)),
    tf.keras.layers.MaxPooling2D(2, 2),                                       
    
    tf.keras.layers.AveragePooling2D(pool_size=(2, 2)),

    tf.keras.layers.Flatten(),
    
    tf.keras.layers.Dense(256, activation=keras.layers.LeakyReLU(alpha=0.01)),
    tf.keras.layers.Dense(128, activation=keras.layers.LeakyReLU(alpha=0.01)),
    tf.keras.layers.Dense(64, activation=keras.layers.LeakyReLU(alpha=0.01)),
    tf.keras.layers.Dense(10, activation='softmax'),  
])

model.compile(loss = 'sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

history = model.fit(train_generator,
                     validation_data = validation_generator, 
                     validation_steps = len(tracks) / BATCH_SIZE,
                     epochs=1   
                     )

ResourceExhaustedError: Exception encountered when calling layer "conv2d_2" (type Conv2D).

OOM when allocating tensor with shape[20,64,126,1288] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Conv2D]

Call arguments received:
  • inputs=tf.Tensor(shape=(20, 128, 1290, 1), dtype=float32)