In [39]:
#!pip install tensorflow librosa

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/JorgeRuizDev/SpotMyFM/blob/main/Ludwig/notebooks/gtzan/gtzan_demo_transfer_learning.ipynb)
[![Kaggle](https://kaggle.com/static/images/open-in-kaggle.svg)](https://kaggle.com/kernels/welcome?src=https://github.com/JorgeRuizDev/SpotMyFM/blob/main/Ludwig/notebooks/gtzan/gtzan_demo_transfer_learning.ipynb)

In [40]:
# Imports
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import layers
import os
from pathlib import Path
from IPython.core.display import HTML, display

import librosa
import random
import numpy as np
from math import floor
from typing import Union, Any

In [41]:
def download():
  #!pip install kaggle
  import os
  os.environ['KAGGLE_USERNAME'] = "jorgeruizdev"
  os.environ['KAGGLE_KEY'] = ""
  !kaggle datasets download -d andradaolteanu/gtzan-dataset-music-genre-classification
  !unzip -qq /content/gtzan-dataset-music-genre-classification.zip
#download()

In [42]:
print(tf. __version__)
print(tf.config.list_physical_devices('GPU'))


2.6.2
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [43]:
# CONFIG

SAMPLE_RATE = 22050

TRACK_LENGTH = 30

TRACK_SAMPLES = 22050 * TRACK_LENGTH

SPLITS = 10

SAMPLES_PER_SPLIT = TRACK_SAMPLES // SPLITS

N_MELS = 64

print(f"Samples Per Split {SAMPLES_PER_SPLIT}")

INPUT_SHAPE = (SAMPLES_PER_SPLIT , N_MELS, 1)

# Dataset:
DATASET_PATH = "../input/gtzan-extended-wav/wav"

GENRES_PATH = f"{DATASET_PATH}/genres_original"

GENRES = os.listdir(GENRES_PATH)



# Training
BATCH_SIZE = 40


Samples Per Split 66150


In [44]:
GENRE_2_LABEL = {k: v for v, k in enumerate(GENRES)}
LABEL_2_GENRE = {v: k for k,v in GENRE_2_LABEL.items()}
print(f"Genre to Label Dict: {GENRE_2_LABEL}")
print(f"Label to Genre Dict: {LABEL_2_GENRE}")

Genre to Label Dict: {'disco': 0, 'metal': 1, 'reggae': 2, 'blues': 3, 'rock': 4, 'classical': 5, 'jazz': 6, 'hiphop': 7, 'country': 8, 'pop': 9}
Label to Genre Dict: {0: 'disco', 1: 'metal', 2: 'reggae', 3: 'blues', 4: 'rock', 5: 'classical', 6: 'jazz', 7: 'hiphop', 8: 'country', 9: 'pop'}


In [45]:
if not len(list(Path(f"{DATASET_PATH}/genres_original/disco").glob("*.wav"))) >= 100:
  print(f"""
    No subfolder 'genres_original/disco' found under {DATASET_PATH}.
    Please download the datasate from https://www.kaggle.com/andradaolteanu/gtzan-dataset-music-genre-classification and
    unzip it under {DATASET_PATH}.
  """)
  raise IOError


In [46]:
def split_track(y, samples_per_split = SAMPLES_PER_SPLIT):

    return [y[slice_samp:slice_samp + samples_per_split] for slice_samp in range(0, len(y), samples_per_split) ]



In [47]:
def normalize_mfccs(mfcc_feat: np.ndarray):
    return np.subtract(mfcc_feat,np.mean(mfcc_feat))


In [48]:
def preprocess_track(signal: Union[np.ndarray, Any], sr=SAMPLE_RATE):
    mfccs = librosa.feature.mfcc(y=signal, n_mfcc=N_MELS, sr=sr)

    #log_S = librosa.amplitude_to_db(mfccs)
    #delta = librosa.feature.delta(mfccs, order=1)

    return normalize_mfccs(mfccs)

In [49]:
def load_data(genre_path=GENRES_PATH):
    track_input = []
    labels = []
    for genre in GENRES:
        genre_folder = f"{genre_path}/{genre}"
        label = GENRE_2_LABEL[genre]
        for track in os.listdir(genre_folder):

            if (track == "jazz.00054.wav"):
                # This track is broken 
                continue

            y, sr = librosa.load(f"{genre_folder}/{track}", sr=SAMPLE_RATE)
            
            # Split into chunkso f SAMPLES_PER_SPLIT length 
            splits = split_track(y)

            track = []

            for split in splits:
                
                if (len(split) == SAMPLES_PER_SPLIT):
                    track.append(preprocess_track(split, sr))
            
            if len(track):  
                track_input.append(track)
                labels.append(label)

    

    return track_input, labels

In [50]:
def unison_shuffled_copies(a, b):
    assert len(a) == len(b), f"len(a) = {len(a)} != len(b) = {len(b)}"
    p = np.random.permutation(len(a))
    return a[p], b[p]

In [51]:
tracks_, labels_ = load_data()

KeyboardInterrupt: 

In [None]:
def train_test_val(ds, test=0.1, val=0.2):


    train_slice = floor(len(ds) * (1 - val + test))


    train = ds[:train_slice]
    rest = ds[train_slice:]

    rest_slice =  floor(len(rest) * (1 - (test / ( test + val))))

    test = rest[:rest_slice]
    val = rest[rest_slice:]

    return train, test, val


In [None]:
class TrackDataGen(keras.utils.Sequence):
    
    def __init__(self, data,
                 input_shape = (13, 259),
                 batch_size = 30):
        
        self.batch_size = batch_size
        self.shape = input_shape
        X = []
        Y = []

        for track_splits, labels in data:
            X.extend(track_splits)
            Y.extend(([labels]* len(track_splits)))

        X_np = np.array(X)
        X_np = np.expand_dims(X_np, axis=3)
        Y_np = np.array(Y)

        self.X, self.Y = unison_shuffled_copies(X_np, Y_np)
        
    
    
    def on_epoch_end(self):
        pass
    
    def __getitem__(self, idx):
        batch_x = self.X[idx * self.batch_size:(idx + 1) * self.batch_size]
        batch_y = self.Y[idx * self.batch_size:(idx + 1) * self.batch_size] 
        
        return batch_x, np.array(batch_y)
    
    def __len__(self):
        return len(self.X) // self.batch_size

In [None]:
class StopCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs={}):
        if (logs.get("accuracy") or 0) > 0.97:
            self.model.stop_training = True
stop_callback = StopCallback()

In [None]:
checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    "./checkpoint",
    monitor='val_accuracy',
)

In [None]:
tracks, labels = tracks_, labels_


track_labels = list(zip(tracks, labels))

random.shuffle(track_labels)

train, test, val = train_test_val(track_labels)
print(len(train) + len(test) + len(val))


train_generator = TrackDataGen(train)
print(train_generator[0][0].shape)
validation_generator = TrackDataGen(val)




In [None]:
base_model = keras.applications.ResNet50(
    include_top=False, 
    #input_shape=INPUT_SHAPE,
)

# Freeze the model
base_model.trainable = False

model = keras.models.Sequential([
    keras.layers.Conv2D(32, (5,5), activation=keras.layers.LeakyReLU(alpha=0.01)),
    keras.layers.MaxPooling2D(2, 2),
    keras.layers.Conv2D(32, (5,5), activation=keras.layers.LeakyReLU(alpha=0.01)),
    keras.layers.MaxPooling2D(2, 2),
    keras.layers.Conv2D(32, (5,5), activation=keras.layers.LeakyReLU(alpha=0.01)),
    keras.layers.MaxPooling2D(2, 2),                                       

    keras.layers.AveragePooling2D(pool_size=(2, 2)),

    keras.layers.Flatten(),

    keras.layers.Dense(256, activation=keras.layers.LeakyReLU(alpha=0.01)),

    keras.layers.Dense(128, activation=keras.layers.LeakyReLU(alpha=0.01)),

    keras.layers.Dense(64, activation=keras.layers.LeakyReLU(alpha=0.01)),
    keras.layers.Dense(10, activation='softmax'),  
])

model.compile(loss = 'sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

model.compile(loss = 'sparse_categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])



In [None]:
history = model.fit(train_generator,
                     validation_data = validation_generator, 
                     #validation_steps = len(tracks) // BATCH_SIZE,
                     epochs=1000,
                     callbacks = [stop_callback, checkpoint_callback]
                     )

In [None]:

test_generator = TrackDataGen(test)
results = model.evaluate(test_generator)
print("test loss, test acc:", results)


        
    



In [None]:
def get_real_acc(dataset):
    hit = 0
    hit2 = 0
    for splits, label in test:

        res = model.predict(np.expand_dims(splits, axis=3))
        res_sum = res.sum(axis=0)
        pred_label = np.argmax(res_sum)
        if label == pred_label:
            hit += 1
        else:
            if np.argsort(res_sum)[-2] == label:
                hit2 +=1
    return hit/len(dataset), hit2 / len(dataset)

In [None]:
train_first, train_second = get_real_acc(train)
print(f"TRAIN - Best of all segments ACC: {train_first* 100 : .2f} %" )
print(f"TRAIN - Second Hit ACC: {train_second * 100 : .2f} %" )

test_first, test_second = get_real_acc(test)
print(f"TEST - Best of all segments ACC: {test_first* 100 : .2f} %" )
print(f"TEST - Second Hit ACC: {test_second * 100 : .2f} %" )

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

In [None]:
model.save('/tmp/keras_model')

In [None]:
import requests
from IPython import display
url = input()
r = requests.get(url, allow_redirects=True)
open('test.mp3', 'wb').write(r.content)

Y, sr = librosa.load('test.mp3')

display.Audio("test.mp3")

In [None]:
splits = []

for split in split_track(Y):
    if len(split) == SAMPLES_PER_SPLIT:
        splits.append(preprocess_track(split))

print(np.array(splits).shape)
pred = model.predict(np.expand_dims(splits, axis=3))
#print(pred)
pred2 = pred.sum(axis=0)
print(pred2)
print(LABEL_2_GENRE)
print(LABEL_2_GENRE.get(np.argmax(pred2)))

