In [None]:
#@markdown --------------- 
#@markdown ## **⚠️❗ Ejecute esta celda para descargar Shoes dataset❗⚠️** 
#@markdown ### Esta celda creará la carpeta ```/content/genres_original```

!pip install -qq gdown
!gdown -qq "https://drive.google.com/u/1/uc?id=189XUvtQu4E63VP_KmrMaVVVk243Dig1P&export=download" -O /genres_original_short.zip
!mkdir /content/genres_original/
!unzip -qq /genres_original_short.zip -d /content/genres_original
!rm -r /genres_original_short.zip
print ("Done!")
#@markdown ---------------

Done!


In [None]:
import os
import librosa
import math
import json 
import matplotlib.pyplot as plt
import numpy as np

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.optim as optim

from tqdm import tqdm
import time

In [None]:
dataset_path = "/content/genres_original"
json_path = r"data.json"
SAMPLE_RATE = 22050
DURATION = 30
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

In [None]:
n_mfcc=13 
n_fft=2048
hop_length=512
num_segments=10

# Data storage dictionary
data = {
    "mapping": [],
    "mfcc": [],
    "labels": [],
}
samples_ps = int(SAMPLES_PER_TRACK/num_segments) # ps = per segment
expected_vects_ps = math.ceil(samples_ps/hop_length)

# loop through all the genres
for i, (dirpath, dirnames, filenames) in enumerate(os.walk(dataset_path)):
    # ensuring not at root
    if dirpath is not dataset_path:
        # save the semantic label
        dirpath_comp = dirpath.split("/")
        semantic_label = dirpath_comp[-1]
        data["mapping"].append(semantic_label)
        print(f"Processing: {semantic_label}")
        
        # process files for specific genre
        for f in filenames:
            if(f==str("jazz.00054.wav")):
                # As librosa only read files <1Mb
                continue
            else:
                # load audio file
                file_path = os.path.join(dirpath, f)
                signal,sr = librosa.load(file_path,sr=SAMPLE_RATE)
                for s in range(num_segments):
                    start_sample = int(samples_ps * s)
                    finish_sample = int(start_sample + samples_ps)

                    mfcc = librosa.feature.mfcc(y=signal[start_sample:finish_sample],
                        sr=sr,
                        n_mfcc=n_mfcc,
                        hop_length=hop_length,
                        n_fft=n_fft)

                    mfcc = mfcc.T

                    # store mfcc if it has expected length 
                    if len(mfcc)==expected_vects_ps:
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(i-1)
                        print(f"{file_path}, segment: {s+1}")

with open(json_path,"w") as f:
    json.dump(data,f,indent=4)

Processing: jazz
/content/genres_original/jazz/jazz.00099.wav, segment: 1
/content/genres_original/jazz/jazz.00099.wav, segment: 2
/content/genres_original/jazz/jazz.00099.wav, segment: 3
/content/genres_original/jazz/jazz.00099.wav, segment: 4
/content/genres_original/jazz/jazz.00099.wav, segment: 5
/content/genres_original/jazz/jazz.00099.wav, segment: 6
/content/genres_original/jazz/jazz.00099.wav, segment: 7
/content/genres_original/jazz/jazz.00099.wav, segment: 8
/content/genres_original/jazz/jazz.00099.wav, segment: 9
/content/genres_original/jazz/jazz.00099.wav, segment: 10
Processing: classical
/content/genres_original/classical/classical.00099.wav, segment: 1
/content/genres_original/classical/classical.00099.wav, segment: 2
/content/genres_original/classical/classical.00099.wav, segment: 3
/content/genres_original/classical/classical.00099.wav, segment: 4
/content/genres_original/classical/classical.00099.wav, segment: 5
/content/genres_original/classical/classical.00099.wav,

In [None]:
with open("data.json","r") as f:
    data = json.load(f)
    # Convert list to numpy arrays
    X = np.array(data["mfcc"])
    y = np.array(data["labels"])
      
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size = 0.2)
    X_train = X_train[..., np.newaxis]
    X_val = X_val[..., np.newaxis]
    X_test = X_test[..., np.newaxis]

In [None]:
X_train.shape

(64, 130, 13, 1)

In [None]:
model = nn.Sequential(
    nn.Conv2d(1, 64, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(64),
    nn.Conv2d(64, 32, kernel_size=3, padding=1),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=3, stride=2, padding=1),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 32, kernel_size=2, padding=0),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=2, stride=2, padding=0),
    nn.BatchNorm2d(32),
    nn.Conv2d(32, 16, kernel_size=1, padding=0),
    nn.ReLU(),
    nn.MaxPool2d(kernel_size=1, stride=2, padding=0),
    nn.BatchNorm2d(16),
    nn.Flatten(),
    nn.Linear(16 * 7 * 7, 64),
    nn.ReLU(),
    nn.Dropout(0.3),
    nn.Linear(64, 10),
    nn.Softmax(dim=1)
)

In [None]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 1e-4)
model = model.double()
model.cuda()
loss.cuda()

CrossEntropyLoss()

In [None]:
epochs = 40
batch_size = 32
losses = []

start_time = time.time()
progress = tqdm(range( epochs ), ncols=110)

for epoch in progress:
  #
  batch_losses = 0
  
  for batch_i in range(0, len(X_train) , batch_size):
    #
    batch_X = X_train[ batch_i : batch_i+batch_size ]
    batch_y = y_train[ batch_i : batch_i+batch_size ]
    batch_X = torch.from_numpy( batch_X ).cuda()
    batch_y = torch.from_numpy( batch_y ).cuda()

    # Zero the gradients
    optimizer.zero_grad()

    # Perform forward pass
    predictions = model( batch_X )

    # Compute loss
    batch_loss = loss( predictions , batch_y )

    # Perform backward pass
    batch_loss.backward()

    # Optimize parameters
    optimizer.step()

    ## Save stats
    batch_losses += batch_loss.item()

    elapsed_time = time.time() - start_time
    
  progress.set_description("Epoch [%d/%d] [Loss: %f] time: %3f" % (epoch, epochs,
                                                                   batch_loss.item(),
                                                                   elapsed_time))
  
  losses.append(batch_losses/(len(X_train)/batch_size))


  0%|                                                                                  | 0/40 [00:00<?, ?it/s]


RuntimeError: ignored

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.layers import *
from tensorflow.keras import optimizers
import tensorflow.keras as keras

In [None]:
model = Sequential()
model.add(Conv2D(64, (3, 3), activation = "relu", input_shape = (130, 13, 1)))
model.add(MaxPool2D((3, 3), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(32, (3, 3), activation = "relu"))
model.add(MaxPool2D((3, 3), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(32, (2, 2), activation = "relu"))
model.add(MaxPool2D((2, 2), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Conv2D(16, (1, 1), activation = "relu"))
model.add(MaxPool2D((1, 1), strides=(2, 2), padding="same"))
model.add(BatchNormalization())

model.add(Flatten())
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.3))
model.add(Dense(10, activation="softmax"))

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 128, 11, 64)       640       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 64, 6, 64)        0         
 )                                                               
                                                                 
 batch_normalization (BatchN  (None, 64, 6, 64)        256       
 ormalization)                                                   
                                                                 
 conv2d_1 (Conv2D)           (None, 62, 4, 32)         18464     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 31, 2, 32)        0         
 2D)                                                             
                                                        

In [None]:

adam = optimizers.Adam(lr=1e-4)
model.compile(optimizer=adam,
              loss="sparse_categorical_crossentropy",
              metrics=["accuracy"])

hist = model.fit(X_train, y_train,
                 validation_data = (X_val, y_val),
                 epochs = 40,
                 batch_size = 32)



Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [None]:
test_error, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test accuracy: {test_accuracy}")

Test accuracy: 0.25
