In [1]:
#@markdown --------------- 
#@markdown ## **⚠️❗ Ejecute esta celda para descargar Shoes dataset❗⚠️** 
#@markdown ### Esta celda creará la carpeta ```/content/genres_original```

!pip install -qq gdown
!gdown -qq "https://drive.google.com/u/1/uc?id=1o8RYJtI7FCdyPqbglgXf17bxajy8_CD1&export=download" -O /genres_original_short.zip
!mkdir /content/genres_original/
!unzip -qq /genres_original_short.zip -d /content/genres_original
!rm -r /genres_original_short.zip
print ("Done!")
#@markdown ---------------

Done!


In [2]:
import os
import librosa
import math
import json 
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
from tensorflow.keras import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers
import tensorflow.keras as keras

from tqdm import tqdm
import time

In [3]:
dataset_path = "/content/genres_original"
json_path = r"data.json"
SAMPLE_RATE = 22050
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

In [21]:
# Parámetros de configuración
n_mfcc = 13
n_fft = 2048
hop_length = 512
num_segments = 10

# Data storage dictionary
data = {
    "mapping": [],
    "mfcc": [],
    "labels": [],
}

# Cálculo de variables auxiliares
samples_ps = int(SAMPLES_PER_TRACK / num_segments)  # ps = per segment
expected_vects_ps = math.ceil(samples_ps / hop_length)

# Recorrido de géneros musicales
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
    # Se asegura de no estar en la raíz
    if dirpath != dataset_path:
        # Guardar la etiqueta semántica
        semantic_label = os.path.basename(dirpath)
        data["mapping"].append(semantic_label)
        print(f"Processing: {semantic_label}")

        # Procesar archivos para el género específico
        for f in tqdm(filenames, ncols=80):
            if f == "jazz.00054.wav":
                # Como librosa solo lee archivos <1Mb
                continue
            else:
                # Cargar el archivo de audio
                file_path = os.path.join(dirpath, f)
                signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                for s in range(num_segments):
                    start_sample = samples_ps * s
                    finish_sample = start_sample + samples_ps

                    # Obtener los coeficientes MFCC
                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                                                sr=sr,
                                                n_mfcc=n_mfcc,
                                                hop_length=hop_length,
                                                n_fft=n_fft)

                    mfcc = mfcc.T

                    # Guardar los coeficientes MFCC si tienen la longitud esperada
                    if len(mfcc) == expected_vects_ps:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i - 1)

# Guardar los datos en formato JSON
with open(json_path, "w") as f:
    json.dump(data, f, indent=4)

Processing: rock


100%|█████████████████████████████████████████| 100/100 [00:15<00:00,  6.66it/s]


Processing: country


100%|█████████████████████████████████████████| 100/100 [00:13<00:00,  7.37it/s]


Processing: hiphop


100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  7.10it/s]


Processing: jazz


100%|█████████████████████████████████████████| 100/100 [00:13<00:00,  7.32it/s]


Processing: pop


100%|█████████████████████████████████████████| 100/100 [00:13<00:00,  7.37it/s]


Processing: metal


100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.95it/s]


Processing: blues


100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  6.94it/s]


Processing: disco


100%|█████████████████████████████████████████| 100/100 [00:13<00:00,  7.21it/s]


Processing: reggae


100%|█████████████████████████████████████████| 100/100 [00:14<00:00,  7.06it/s]


Processing: classical


100%|█████████████████████████████████████████| 100/100 [00:18<00:00,  5.50it/s]


In [22]:
with open("data.json","r") as f:
    data = json.load(f)
    # Convert list to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
      
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)

In [23]:
X_train.shape

(7988, 130, 13)

In [24]:
model = Sequential()
model.add(Conv2D(64, (3, 3), activation = "relu", input_shape = (130, 13, 1)))
model.add(MaxPool2D((3, 3), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(32, (3, 3), activation = "relu"))
model.add(MaxPool2D((3, 3), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(32, (2, 2), activation = "relu"))
model.add(MaxPool2D((2, 2), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(16, (1, 1), activation = "relu"))
model.add(MaxPool2D((1, 1), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(10, activation="softmax"))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_4 (Conv2D)           (None, 128, 11, 64)       640       
                                                                 
 max_pooling2d_4 (MaxPooling  (None, 64, 6, 64)        0         
 2D)                                                             
                                                                 
 batch_normalization_4 (Batc  (None, 64, 6, 64)        256       
 hNormalization)                                                 
                                                                 
 conv2d_5 (Conv2D)           (None, 62, 4, 32)         18464     
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 31, 2, 32)        0         
 2D)                                                             
                                                      

In [25]:
adam = optimizers.Adam(lr=1e-4)
model.compile(optimizer=adam,
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])



In [26]:
hist = model.fit(X_train, y_train,\
                 epochs = 100 , batch_size = 200 , \
                    verbose = 1 , validation_split = 0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [27]:
test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.7617617845535278
