In [1]:
import os
import pandas as pd
from torch import nn
from torch.utils.data import DataLoader
import torch
import torchaudio
from tqdm import tqdm
import matplotlib.pyplot as plt
from torchsummary import summary
import sounddevice as sd



In [4]:
#Correr solo si se va a plotear spectrogramas

import librosa

def plot_spectrogram(specgram, title=None, ylabel="freq_bin", ax=None):
    if ax is None:
        _, ax = plt.subplots(1, 1)
    if title is not None:
        ax.set_title(title)
    ax.set_ylabel(ylabel)
    ax.imshow(librosa.power_to_db(specgram), origin="lower", aspect="auto", interpolation="nearest")

In [5]:
if torch.cuda.is_available():
    device = torch.device("cuda")
else:
    device = torch.device("cpu")

In [6]:
#librerias locales
#from ..libs import dataLoaders as mi_dataLoader
#from ..libs import clasificador_padre as mi_clasificador
from importlib.machinery import SourceFileLoader
mi_dataLoader = SourceFileLoader('dataLoadersV2', '../libs/dataLoaders_16k.py').load_module()
mi_clasificador = SourceFileLoader('clasificador_padre', '../libs/clasificador_padreV2.py').load_module()

In [7]:
DataSetCanciones = mi_dataLoader.data_loader_16k
DataSetRir = mi_dataLoader.data_loader_rir

# Cargando los path

In [8]:
dataSetName = '16k_songs'
root = '.'

In [9]:

while not os.path.isdir(os.path.join(root,"data")):
    root = os.path.join(root,"..")

Path_train = os.path.join(root,'data',dataSetName+'_train.csv')
Path_val = os.path.join(root,'data',dataSetName+'_val.csv')
Path_test = os.path.join(root,'data',dataSetName+'_test.csv')

Path_rir = os.path.join(root,"data","RIR_16K")
Path_noise = os.path.join(root,"data","NOISE_16K")

#cargando los Df

In [10]:
#cargo el data set
trainDf = pd.read_csv(Path_train)
testDf = pd.read_csv(Path_test)
valDf = pd.read_csv(Path_val)



#trainDataSet = DataLoaderCanciones(trainDf, file_col="16k_file", target_col="artist", seed = 98)
#una muestra del data set
#sample, target = testDataSet[0:9]
#print(sample[0].shape, target)

sd.play(sample[0], 16000)
sd.wait(3)
sd.stop()

# Convolucionando canciones con RIR

#a = torch.stack(sample).to(device)
#b =torch.stack(rirData[:a.shape[0]]).to(device)

def normalizer(signal, threshold = 1e-12):
  """
  Recibe una senial en formato tensor y un valor de tresh_hold. 
  Normaliza la senial a no ser que el valor de pico de la  senial sea menor al valor de threshold
  """
  max_abs_val = signal.abs().max(dim=1, keepdim=True)[0]
  condition_mask = (max_abs_val > threshold) & (max_abs_val != 1.0)
  divisors = torch.where(condition_mask, max_abs_val, torch.ones_like(max_abs_val))
  normalized_batch = signal / divisors
  return normalized_batch

#res = torchaudio.functional.convolve(a,b,"full")[:,:10*TARGET_SR]
#res = normalizer(res).unsqueeze(1)
#res.shape
#torch.conv1d(torch.stack(sample).unsqueeze(0),torch.stack(rirData[0:9]).unsqueeze(0),padding="same")

sd.play(res[0,0,0:16000*10].cpu(), 16000)
sd.wait(3)
sd.stop()

# Procesando las canciones en un MFCC

In [11]:
#parametros para los calculos del mell
TARGET_SR = 16000  # Normalmente el audio se sule usar a 16k aunque encontre papers que trabajan a 22050 o a 22k (ver V2)
N_FFT = 512  #muestras de la fft
W_LEN = 400  # Numero de muestras para la ventan de la  fft (seg_de_ventan *sr) 
H_LEN = 160 # paso de la ventana entre una fft y la siguiente (paso * sr)
N_MELS = 40#26
N_MFCC = 32#13

MFCCCalculator = torchaudio.transforms.MFCC(sample_rate = TARGET_SR,
                                            n_mfcc = int(N_MFCC),
                                            dct_type = 2,
                                            norm = 'ortho',
                                            log_mels = False,
                                            melkwargs = {"n_fft": N_FFT, "hop_length": H_LEN, "n_mels": N_MELS, "center": False},
                                            ).to(device)

# Generando el DataSet

In [12]:
DataSetConst = mi_dataLoader.DataSet_song_plus_rir_v4
trainDataSet = DataSetConst(trainDf, "16k_file", "artist", Path_rir, os.listdir(Path_rir), rir_prob = 0.5, seed = 98)

In [13]:
len(trainDataSet)

8712

In [14]:
mi_dataLoader = SourceFileLoader('dataLoadersV2', '../libs/dataLoaders_16k.py').load_module()
mi_clasificador = SourceFileLoader('clasificador_padre', '../libs/clasificador_padreV2.py').load_module()
DataSetConst = mi_dataLoader.DataSet_song_plus_rir_v4
trainDataSet = DataSetConst(trainDf, "16k_file", "artist", Path_rir, os.listdir(Path_rir), rir_prob = 0.5, seed = 98)


# Generando el DataLoader

In [15]:
train_dataloader = DataLoader(trainDataSet, batch_size=2, shuffle=True,num_workers= 16, pin_memory=True)
#muestra_data, muestra_target = next(iter(test_dataloader))
#muestra_target

In [None]:
###Muestras de especrogramas
import multiprocessing  
import random
from multiprocessing import Pool

if __name__ == '__main__':
    train_features = next(iter(train_dataloader))
#print(f"Feature batch shape: {train_features.size()}")
#print(f"Labels batch shape: {train_labels.size()}")
#plot_spectrogram(train_features[0], train_labels[0])
#img = train_features[0].squeeze()
#label = train_labels[0]
#plt.imshow(img, cmap="gray")
#plt.show()
#print(f"Label: {label}")

In [48]:
train_features

[tensor([[ 0.3433,  0.3345,  0.2799,  ...,  0.0304,  0.2436, -0.1227],
         [ 0.2995,  0.3028,  0.3457,  ...,  0.4174,  0.4074,  0.5173],
         [-0.1052, -0.0877, -0.0834,  ...,  0.1168,  0.1176,  0.1191],
         ...,
         [-0.4376, -0.5025, -0.4923,  ..., -0.2704, -0.2596, -0.1519],
         [ 0.1013,  0.1667,  0.1714,  ..., -0.0651,  0.0104,  0.0191],
         [ 0.0705,  0.1400,  0.1454,  ...,  0.0511,  0.0540,  0.1028]]),
 tensor([[0., 1., 0.],
         [1., 0., 0.],
         [1., 0., 0.],
         [1., 0., 0.],
         [0., 0., 1.],
         [0., 1., 0.],
         [1., 0., 0.],
         [1., 0., 0.],
         [0., 1., 0.],
         [1., 0., 0.],
         [0., 0., 1.],
         [1., 0., 0.],
         [1., 0., 0.],
         [0., 1., 0.],
         [0., 1., 0.],
         [0., 1., 0.],
         [0., 0., 1.],
         [0., 1., 0.],
         [0., 1., 0.],
         [1., 0., 0.],
         [0., 0., 1.],
         [0., 1., 0.],
         [0., 1., 0.],
         [0., 0., 1.],
      

# instanciando un modelo

In [1]:
os.listdir('./save/state')
pState = "./save/state/"
pHist = "./save/history/"
overWrite = False
show_metrics = False
sLoad = 0 #save state a cargar
lr = 0.0001
constructor = mi_clasificador.onlyWoman_MFCC_16k_v6

weight_decay= 0.007

NameError: name 'os' is not defined

In [13]:
clasificador = constructor(list(trainDataSet.dictionary.keys()), MFCCCalculator)
clasificador.to(device)

onlyWoman_MFCC_16k_v6(
  (transformer): MFCC(
    (amplitude_to_DB): AmplitudeToDB()
    (MelSpectrogram): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
  )
  (inPut): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (chanelUp): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)

res.shape

#mfcc = MFCCCalculator(res[0]).to(device)
pepe = clasificador(res)

In [None]:
def test_accur (prediccion, target_batch):
    return(torch.argmax(target_batch, axis=1) == torch.argmax(prediccion, axis=1)).sum().item() / len(target_batch)


if((len(os.listdir(pState)) == 0) or overWrite):
    history = {}
    history['loss'] = torch.empty(0)
    history['acur'] = torch.empty(0)

else:
    history =  torch.load(pHist+os.listdir(pHist)[sLoad])

    save_state = torch.load(pState+os.listdir(pState)[sLoad])
    clasificador.load_state_dict(save_state)

test_optimizer = torch.optim.Adam(clasificador.parameters(), lr=lr, weight_decay= weight_decay)
test_criterion = torch.nn.CrossEntropyLoss()
print(clasificador.modules)




<bound method Module.modules of onlyWoman_MFCC_16k_v6(
  (transformer): MFCC(
    (amplitude_to_DB): AmplitudeToDB()
    (MelSpectrogram): MelSpectrogram(
      (spectrogram): Spectrogram()
      (mel_scale): MelScale()
    )
  )
  (inPut): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (chanelUp): Sequential(
    (0): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): MaxPool2d(kernel_size=2, stride=2, padding

In [27]:
clasificador.inPut[1]

ReLU(inplace=True)

# Entrenando el modelo

In [None]:
%matplotlib qt5

if show_metrics:
    plt.ion()
    fig, axes = plt.subplots(nrows=2, ncols=1, figsize = (6,6))
    l1, = axes[0].plot([], [])
    l2, = axes[1].plot([], [])
    axes[0].set_xlabel('Epoch')
    axes[0].set_ylabel('Acur')

    axes[1].set_xlabel('Epoch')
    axes[1].set_ylabel('Loss')

for j in tqdm(range(2)):
    for i in tqdm(range(2)):
        loss_log, acur_log = clasificador.train_loop(1,train_dataloader, test_optimizer , test_criterion, test_accur, device)
        history['loss'] = torch.cat((history['loss'], loss_log), dim=0)
        history['acur'] = torch.cat((history['acur'], acur_log), dim=0)
        
        if show_metrics:
            # Grafica
            l1.set_data(range(len(history['acur'])), history['acur'])
            axes[0].relim()  # Recalculate data limits
            axes[0].autoscale_view()  # Rescale axes

            l2.set_data(range(len(history['loss'])), history['loss'])
            axes[1].relim()  # Recalculate data limits
            axes[1].autoscale_view()  # Rescale axes
            
            fig.canvas.draw()  # Redraw the figure
            fig.canvas.flush_events()  # Ensure events are processed
            plt.pause(0.5)

    save_state = clasificador.state_dict()
    n = len(os.listdir(pState))
    torch.save(save_state, f"{pState}/ss_{n}.pt")
    torch.save(history, f"{pHist}/hist_{n}.pt")

if show_metrics:
    plt.ioff()
    plt.show()

  0%|          | 0/2 [00:00<?, ?it/s]

# Validando

In [24]:
valDataSet = DataSetConst(valDf, "16k_file", "artist", Path_rir, os.listdir(Path_rir), rir_prob = 0, seed = 98)

100%|██████████| 2613/2613 [00:07<00:00, 331.18it/s]


Cargando archivos RIR:


100%|██████████| 325/325 [00:00<00:00, 706.01it/s]


In [25]:
def acuracy_fn (prediccion, target_batch):
    return(torch.argmax(target_batch, axis=1) == torch.argmax(prediccion, axis=1)).sum().item() / len(target_batch)


#Carga de las muestras y el DataLoader
#valDf = pd.read_csv(Path_val)
#valDataSet = mi_dataLoader.clasification_dataset(valDf, file_col="fileMfcc", target_col="artist", seed = 98)
val_dataloader = DataLoader(valDataSet, batch_size=100, shuffle=False)

#Separando las muestras y los targets
muestras = []
targets = []
for muestra_data, muestra_target, muestra_rir in tqdm(val_dataloader):
    muestras.append(muestra_data)
    targets.append(torch.tensor(muestra_target))

loss_fn = torch.nn.CrossEntropyLoss()
resultado_evalucion = clasificador.evaluate(val_dataloader, loss_fn, acuracy_fn, device)
print(resultado_evalucion)


  0%|          | 0/27 [00:00<?, ?it/s]

  targets.append(torch.tensor(muestra_target))
100%|██████████| 27/27 [00:02<00:00, 12.96it/s]
100%|██████████| 27/27 [01:37<00:00,  3.61s/it]

(0.6904457719237717, 0.8608547008547011)





A continuacion se levantan los distintos save state generados para generar una comparacion entre las distintas performance en el validation set

In [26]:
val_histo = []
max_acur = 0
mejor = None
for ss_name in os.listdir(f"{pState}/"):
    ss_load = torch.load(f"{pState}{ss_name}")
    clasificador.load_state_dict(ss_load)
    resultado_evalucion = clasificador.evaluate(val_dataloader, loss_fn, acuracy_fn, device)
    val_histo.append(resultado_evalucion)
    if resultado_evalucion[1] > max_acur:
        max_acur = resultado_evalucion[1]
        mejor = ss_load
val_histo

100%|██████████| 27/27 [01:36<00:00,  3.59s/it]
100%|██████████| 27/27 [01:37<00:00,  3.61s/it]
100%|██████████| 27/27 [01:37<00:00,  3.63s/it]
100%|██████████| 27/27 [01:41<00:00,  3.77s/it]
100%|██████████| 27/27 [01:33<00:00,  3.45s/it]
100%|██████████| 27/27 [01:34<00:00,  3.51s/it]
100%|██████████| 27/27 [01:33<00:00,  3.47s/it]
100%|██████████| 27/27 [01:34<00:00,  3.51s/it]
100%|██████████| 27/27 [01:34<00:00,  3.49s/it]
100%|██████████| 27/27 [01:32<00:00,  3.42s/it]


[(0.9417261459209301, 0.5927065527065527),
 (0.7927345501052009, 0.761082621082621),
 (0.7561724340474164, 0.8023361823361823),
 (0.7501598993937174, 0.8019373219373221),
 (0.8395425346162584, 0.7107122507122506),
 (0.7648259268866645, 0.7905982905982905),
 (0.8374022598619815, 0.7035327635327637),
 (0.7038821578025818, 0.8482621082621081),
 (0.703118171956804, 0.8449287749287749),
 (0.6904457719237717, 0.8608547008547011)]

# Test set

In [52]:
#Carga de las muestras y el DataLoader
testDf = pd.read_csv(Path_test)
testDataSet = mi_dataLoader.clasification_dataset(testDf, file_col="fileMfcc", target_col="artist", seed = 98)
test_dataloader = DataLoader(testDataSet, batch_size=100, shuffle=False)

clasificador.load_state_dict(mejor)
clasificador.evaluate(test_dataloader, loss_fn, acuracy_fn, device)

AttributeError: module 'dataLoadersV2' has no attribute 'clasification_dataset'

## Codigo usado mientras se generaba la arquitectura (osea, codigo desechable)

In [None]:
mi_clasificador = SourceFileLoader('clasificador_padre', '../libs/clasificador_padre.py').load_module()

In [24]:
muestra_data, muestra_target = next(iter(test_dataloader))
#muestra = torch.stack(muestra, 0)
muestra_data.shape

torch.Size([100, 13, 997])

In [59]:
mi_clasificador = SourceFileLoader('clasificador_padre', '../libs/clasificador_padre.py').load_module()
clasificador = mi_clasificador.onlyWoman_MFCC_01(list(testDataSet.dictionary.keys()))

In [60]:
res =clasificador(muestra_data)
res.shape

In: torch.Size([100, 13, 997])
squeeze: torch.Size([100, 1, 13, 997])
Shape post 1D:  torch.Size([100, 4, 15, 999])
post flatten: torch.Size([100, 136136])
Alive


torch.Size([100, 3])

In [None]:
out = nn.Sequential(   # bloque que acondiciona la salida
    #nn.Linear(*self.architecture[-1]), #Lineal que sale a las clases objetivo
    nn.Softmax( dim = 1))

NameError: name 'self' is not defined

In [61]:
test_criterion(res,muestra_target)

tensor(1.1800, grad_fn=<DivBackward1>)

In [62]:
test_accur (res, muestra_target)

0.3

In [64]:
#importlib.reload(mi_clasificador)
clasificador = mi_clasificador.Clasificador_01(testDataSet.dictionary.keys())
res =clasificador(muestra_data)
test_accur (res, muestra_target)


TypeError: _Clasificador_padre.__init__() missing 1 required positional argument: 'classes'

In [318]:
(torch.argmax(muestra_target, axis = 1) == torch.argmax(res, axis=1)).sum().item() / len(muestra_target)

0.0

In [319]:
torch.argmax(muestra_target, axis = 1) == torch.argmax(res, axis=1)

tensor([False])

In [42]:
muestra_target.shape, res.shape

(torch.Size([3, 3]), torch.Size([3, 3]))

In [43]:
torch.argmax(res, axis = 1)

tensor([0, 2, 2])

In [44]:
res

tensor([[9.9844e-01, 2.4358e-21, 1.5574e-03],
        [1.7411e-05, 4.4522e-07, 9.9998e-01],
        [0.0000e+00, 0.0000e+00, 1.0000e+00]], grad_fn=<SoftmaxBackward0>)

In [43]:
torch.argmax(muestra_target, axis=1)

tensor([0, 0, 0, 0, 0])

In [44]:
muestra_target

tensor([[1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.],
        [1., 0., 0.]])

In [21]:
muestra_target == res

tensor([[False,  True, False],
        [False,  True, False],
        [False, False, False],
        [False,  True, False],
        [False, False, False]])

In [45]:
res

tensor([[0.0000e+00, 4.1355e-15, 1.0000e+00],
        [0.0000e+00, 0.0000e+00, 1.0000e+00],
        [0.0000e+00, 1.0000e+00, 4.7325e-08],
        [0.0000e+00, 1.0000e+00, 4.9665e-37],
        [1.1335e-33, 3.0150e-14, 1.0000e+00]], grad_fn=<SoftmaxBackward0>)

In [None]:
flat = nn.Flatten(start_dim = 0)
lin = nn.Linear(17920,3)
soft = nn.Softmax(dim = 0)

print("Red: ",res.shape)
aux = flat(res)
print("Flat: ",aux.shape)
aux = lin(aux)
print("Line: ",aux.shape)
aux = soft(aux)
print("Soft:",aux.shape)

Red:  torch.Size([128, 5, 28])
Flat:  torch.Size([17920])
Line:  torch.Size([3])
Soft: torch.Size([3])


# Codigo testeo

In [14]:
import random
import numpy as np

In [15]:
prob_mariano = 0.3

In [16]:
mariano = torch.zeros(5)
pepe = torch.stack([0*torch.ones(5)
                ,1*torch.ones(5)
                ,2*torch.ones(5)
                ,3*torch.ones(5)]).to(device)


In [None]:
DataGen = mi_dataLoader.DataSet_song_plus_rir
train_dataloader = DataGen(trainDf, "16k_file", "artist", Path_rir, os.listdir(Path_rir), rir_prob = 0.5, seed = 98, device  = device)

0                 ..\data\16k_songs\11. borderline14.flac
1           ..\data\16k_songs\07. sweetener (live)10.flac
2                 ..\data\16k_songs\06. six thirty12.flac
3                   ..\data\16k_songs\10. Sometimes5.flac
4       ..\data\16k_songs\14. Knew Better _ Forever Bo...
                              ...                        
8794                   ..\data\16k_songs\02 Fifteen15.mp3
8795                     ..\data\16k_songs\03 Style18.mp3
8796            ..\data\16k_songs\08 Stay Beautiful18.mp3
8797                  ..\data\16k_songs\01 Fearless18.mp3
8798    ..\data\16k_songs\16 ME! (feat. Brendon Urie o...
Name: 16k_file, Length: 8799, dtype: object


100%|██████████| 8799/8799 [00:33<00:00, 260.35it/s]


Cargando archivos RIR:


100%|██████████| 325/325 [00:00<00:00, 567.16it/s]


In [None]:
def sampler(song, rir, rir_prob, device):
    if(type(rir) == list):
        rir = torch.stack(rir)
    if(type(song) == list):
        song = torch.stack(song)
    rir = rir.to(device)
    song = song.to(device)
    
    #genero una respuesta al impulso ideal (La identidad de la convolucion)
    impulse = torch.zeros(rir.shape[1], device=device)
    impulse[0] = 1
    # Tiro una "moneda" para cada cancion y genero una mascara comparando 
    # el resultado con la probabilidad de convolucion
    sample_mask = torch.rand(song.shape[0], device=device) < rir_prob
    #Para cada muestra elijo un rir aleatoreo
    rir_sampler = torch.randint(0,rir.shape[0],(song.shape[0],), device=device)
    
    #Genero las lista de seniales a convolucionar con las canciones
    # Sobre Sample_mask: Primero se transforma en una columna y despues lo expando hasta la cantidad de muestras temporales de rir
    # Sobre rir[rir_sampler]: genera un nuevo tensor donde cada fila corresponde al indice que marca rir_sampler
    # Sobre torch.were: Dependiendo del valor de sample_mask, selecciona un valor de rir[rir_sampler] o de impulse
    samples = torch.where(sample_mask.unsqueeze(-1).expand(sample_mask.shape[0], rir.shape[1]), rir[rir_sampler], impulse)
    return sample_mask, rir[rir_sampler], samples