In [1]:
import torch
import torch.nn as nn
import torchaudio
import torch.fft
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
import numpy as np
from IPython.display import Audio
import seaborn as sns
import pandas as pd
import os
from torch.nn.utils.rnn import pad_sequence
import torchvision.transforms as transforms
import torchaudio.transforms as transformsaudio
import datetime
from torch.utils.tensorboard import SummaryWriter
import soundfile as sf
import time
import torch.nn.functional as F
import math
import random
import pytorch_lightning as pl
from pytorch_lightning.callbacks.early_stopping import EarlyStopping

writer = SummaryWriter()
%load_ext tensorboard

from prettytable import PrettyTable

def count_parameters(model):
    table = PrettyTable(["Modules", "Parameters"])
    total_params = 0
    for name, parameter in model.named_parameters():
        if not parameter.requires_grad:
            continue
        params = parameter.numel()
        table.add_row([name, params])
        total_params += params
    print(table)
    print(f"Total Trainable Params: {total_params}")
    return total_params
    


In [2]:
device = "cuda"
directoryBase = os.getcwd()

In [3]:

def pad_to_max_length(tensor1, tensor2):
    max_length = max(tensor1.size(1), tensor2.size(1))

    pad_tensor1 = torch.nn.functional.pad(tensor1, (0, max_length - tensor1.size(1)))
    pad_tensor2 = torch.nn.functional.pad(tensor2, (0, max_length - tensor2.size(1)))

    return pad_tensor1, pad_tensor2

In [4]:
inputSize = 32000
class AudioCleaningDataset(Dataset):
    def __init__(self, csv_file, audio_dir, noise_dir, reverb_dir, target_length=inputSize, maxRuido=0):
        self.dataframe = pd.read_csv(csv_file)
        self.audio_dir = audio_dir
        self.noise_dir = noise_dir
        self.reverb_dir = reverb_dir
        self.target_length = target_length
        self.resampleo = transformsaudio.Resample(orig_freq=48000, new_freq=16000)  # Resampling
        self.resampleoIR = transformsaudio.Resample(orig_freq=32000, new_freq=16000)  # Resampling

        self.noise_files = os.listdir(noise_dir) # List all noise files
        self.reverb_files = os.listdir(reverb_dir)
        self.maxRuido = maxRuido
        self.conv= transformsaudio.Convolve(mode="same")


    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        audio_file_name = os.path.join(self.audio_dir, self.dataframe.iloc[idx]['audio_file_name']+".wav")
        waveform, sample_rate = torchaudio.load(audio_file_name)

        waveformOriginal = self.resampleo(waveform)

        #aca seleccionar
        maximoPosible = len(waveformOriginal[0])-self.target_length
        if(maximoPosible>1):
          comienzoAleatorio = (random.randint(0,maximoPosible))
          waveformOriginal = waveformOriginal[:,comienzoAleatorio:comienzoAleatorio+self.target_length]
        #print(len(waveformOriginal[0]))


        waveformSucia = waveformOriginal.clone()
        padding = torch.zeros((1, max(self.target_length - waveformSucia.size(1),1)))

        waveformOriginal = torch.cat((waveformOriginal, padding), dim=1)
        waveformSucia = torch.cat((waveformSucia, padding), dim=1)

        waveformOriginal = waveformOriginal[:,:self.target_length]
        waveformSucia = waveformSucia[:,:self.target_length]

        # Load a random noise file
        noise_file_name = random.choice(self.noise_files)
        noise_waveform, sample_Rate_ruido = torchaudio.load(os.path.join(self.noise_dir, noise_file_name))
        # Repeat the noise waveform until it's at least as long as the audio waveform
        while noise_waveform.size(1) < waveformSucia.size(1):
            noise_waveform = torch.cat((noise_waveform, noise_waveform), dim=1)

        # Trim the noise waveform to match the length of the audio waveform
        noise_waveform = noise_waveform[:,:waveformSucia.size(1)]

        # Add noise with a random signal-to-noise ratio between 0.01 and 0.1
        snr = random.uniform(0.001, 0.01) #random.uniform(0.01, 0.1)

        # LE SACO EL RUIDO SUCIO PARA ACELERAR APRENDIZAJE !!!!!!!!!!!!!!!!!!!!!!!!!!!!
        #waveformSucia = waveformSucia + noise_waveform * snr
        whitenoise = random.uniform(0.0, self.maxRuido)
        waveformSucia = waveformSucia + torch.randn_like(waveformOriginal) * whitenoise
        return 1*waveformSucia, 1*waveformOriginal


        IR_file_name = random.choice(self.reverb_files)
        IR_waveform, sample_Rate_IR = torchaudio.load(os.path.join(self.reverb_dir, IR_file_name))
        IR_waveform = self.resampleoIR(IR_waveform)

        # Normalize impulse response
        normalized_ir = IR_waveform / (IR_waveform.abs().max())

        # Perform convolution
        padded_signal, padded_filter = pad_to_max_length(waveformSucia, normalized_ir)

        # Perform the FFT
        fft_signal = torch.fft.fft(padded_signal)
        fft_filter = torch.fft.fft(padded_filter)

        # Perform the convolution in the frequency domain
        fft_result = fft_signal * fft_filter

        # Perform the inverse FFT to get the result in the time domain
        result = torch.fft.ifft(fft_result)

        # The result is complex, take the real part

        # LE SACO EL REVERB PARA ACELERAR APRENDIZAJE !!!!!!!!!!!!!!!!!!!!!!!!!!!!
        #waveformSucia = result.real

        padding = torch.zeros((1, max(self.target_length - waveformSucia.size(1),1)))

        waveformOriginal = torch.cat((waveformOriginal, padding), dim=1)
        waveformSucia = torch.cat((waveformSucia, padding), dim=1)

        waveformOriginal = waveformOriginal[:,:self.target_length]
        waveformSucia = waveformSucia[:,:self.target_length]

       # waveformOriginal = (waveformOriginal - waveformOriginal.mean()) / waveformOriginal.std()
       # waveformSucia = (waveformSucia - waveformSucia.mean()) / waveformSucia.std()
       # Optional: Normalize convolved audio
       # waveformSucia = waveformSucia / (waveformSucia.abs().max())
       # waveformOriginal = waveformOriginal / (waveformOriginal.abs().max())

        return 1*waveformSucia, 1*waveformOriginal

In [5]:
"""
Neural network modules for WaveNet

References :
    https://arxiv.org/pdf/1609.03499.pdf
    https://github.com/ibab/tensorflow-wavenet
    https://qiita.com/MasaEguchi/items/cd5f7e9735a120f27e2a
    https://github.com/musyoku/wavenet/issues/4
"""

class DilatedCausalConv1d(torch.nn.Module):
    """Dilated Causal Convolution for WaveNet"""
    def __init__(self, channels, dilation=1):
        super(DilatedCausalConv1d, self).__init__()

        self.conv = torch.nn.Conv1d(channels, channels,
                                    kernel_size=2, stride=1,  # Fixed for WaveNet
                                    dilation=dilation,
                                    padding=dilation,  # Fixed for WaveNet dilation
                                    bias=False)  # Fixed for WaveNet but not sure
        if torch.cuda.is_available():
            self.conv = self.conv.to(device)

    def init_weights_for_test(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv1d):
                m.weight.data.fill_(1)

    def forward(self, x):
        output = self.conv(x)

        return output


class CausalConv1d(torch.nn.Module):
    """Causal Convolution for WaveNet"""
    def __init__(self, in_channels, out_channels):
        super(CausalConv1d, self).__init__()

        # padding=1 for same size(length) between input and output for causal convolution
        self.conv = torch.nn.Conv1d(in_channels, out_channels,
                                    kernel_size=2, stride=1, padding=1,
                                    bias=False)  # Fixed for WaveNet but not sure
        if torch.cuda.is_available():
            self.conv = self.conv.to(device)

    def init_weights_for_test(self):
        for m in self.modules():
            if isinstance(m, torch.nn.Conv1d):
                m.weight.data.fill_(1)

    def forward(self, x):
        output = self.conv(x)

        # remove last value for causal convolution
        return output[:, :, :-1]


class ResidualBlock(torch.nn.Module):
    def __init__(self, res_channels, skip_channels, dilation):
        """
        Residual block
        :param res_channels: number of residual channel for input, output
        :param skip_channels: number of skip channel for output
        :param dilation:
        """
        super(ResidualBlock, self).__init__()

        self.dilated = DilatedCausalConv1d(res_channels, dilation=dilation)
        self.conv_res = torch.nn.Conv1d(res_channels, res_channels, 1)
        self.conv_skip = torch.nn.Conv1d(res_channels, skip_channels, 1)

        self.gate_tanh = torch.nn.Tanh()
        self.gate_sigmoid = torch.nn.Sigmoid()

        if torch.cuda.is_available():
            self.conv_skip = self.conv_skip.to(device)
            self.conv_res = self.conv_res.to(device)

    def forward(self, x, skip_size):
        """
        :param x:
        :param skip_size: The last output size for loss and prediction
        :return:
        """
        output = self.dilated(x)

        # PixelCNN gate
        gated_tanh = self.gate_tanh(output)
        gated_sigmoid = self.gate_sigmoid(output)
        gated = gated_tanh * gated_sigmoid

        # Residual network
        output = self.conv_res(gated)
        output = output[:, :, 0:inputSize]

        input_cut = x#[:, :, -output.size(2):]

        output += input_cut

        # Skip connection
        skip = self.conv_skip(gated)
        skip = skip[:, :, -skip_size:]

        return output, skip


class ResidualStack(torch.nn.Module):
    def __init__(self, layer_size, stack_size, res_channels, skip_channels):
        """
        Stack residual blocks by layer and stack size
        :param layer_size: integer, 10 = layer[dilation=1, dilation=2, 4, 8, 16, 32, 64, 128, 256, 512]
        :param stack_size: integer, 5 = stack[layer1, layer2, layer3, layer4, layer5]
        :param res_channels: number of residual channel for input, output
        :param skip_channels: number of skip channel for output
        :return:
        """
        super(ResidualStack, self).__init__()

        self.layer_size = layer_size
        self.stack_size = stack_size

        self.res_blocks = self.stack_res_block(res_channels, skip_channels)

    @staticmethod
    def _residual_block(res_channels, skip_channels, dilation):
        block = ResidualBlock(res_channels, skip_channels, dilation)

        if torch.cuda.device_count() > 1:
            block = torch.nn.DataParallel(block)

        if torch.cuda.is_available():
            block.cuda()

        return block

    def build_dilations(self):
        dilations = []

        # 5 = stack[layer1, layer2, layer3, layer4, layer5]
        for s in range(0, self.stack_size):
            # 10 = layer[dilation=1, dilation=2, 4, 8, 16, 32, 64, 128, 256, 512]
            for l in range(0, self.layer_size):
                dilations.append(2 ** l)

        return dilations

    def stack_res_block(self, res_channels, skip_channels):
        """
        Prepare dilated convolution blocks by layer and stack size
        :return:
        """
        res_blocks = []
        dilations = self.build_dilations()

        for dilation in dilations:
            block = self._residual_block(res_channels, skip_channels, dilation)
            res_blocks.append(block)

        return res_blocks

    def forward(self, x, skip_size):
        """
        :param x:
        :param skip_size: The last output size for loss and prediction
        :return:
        """
        output = x
        skip_connections = []

        for res_block in self.res_blocks:
            # output is the next input
            output, skip = res_block(output, skip_size)

            skip_connections.append(skip)


        return torch.stack(skip_connections)


class DensNet(torch.nn.Module):
    def __init__(self, channels):
        """
        The last network of WaveNet
        :param channels: number of channels for input and output
        :return:
        """
        super(DensNet, self).__init__()

        self.conv1 = torch.nn.Conv1d(channels, channels, kernel_size=3, stride=1, padding=1)
        self.conv2 = torch.nn.Conv1d(channels, channels, kernel_size=3, stride=1, padding=1)

        self.tan = torch.nn.Tanh()

        if torch.cuda.is_available():
            self.conv1 = self.conv1.to(device)
            self.conv2 = self.conv2.to(device)

    def forward(self, x):
        output = self.conv1(x)
        output = self.tan(output)
        output = self.conv2(output)

        return output





In [6]:
#loss_fn = nn.L1Loss()
#learning_rate = 0.0015
#optimizer = torch.optim.AdamW(params=wavenetModel.parameters(), lr=learning_rate)
#optimizerPost = torch.optim.AdamW(params=postnetModel.parameters(), lr=learning_rate)

mel_transform1 = transformsaudio.MelSpectrogram(sample_rate = 16000,
                                               n_fft = 2048,
                                                n_mels = 120,
                                                hop_length = 512).to('cuda')

mel_transform2 = transformsaudio.MelSpectrogram(sample_rate = 16000,
                                               n_fft = 512,
                                                n_mels = 80,
                                                hop_length = 128).to('cuda')


In [7]:
def prepareSpectogram(melspect):
    melspect = ((melspect - melspect.min()) / (melspect.max()-melspect.min()) + 0.00000001)
    melspect=melspect.log10()
    min_db = -6
    # Clamp the values in the tensor to be no less than min_db
    melspect = melspect.clamp(min=min_db)
    return melspect

In [8]:
"""    def melspectogramLoss(y_pred, true_y):

      mel1true_y = (mel_transform1(true_y.to('cuda')))
      mel1y_pred = (mel_transform1(y_pred.to('cuda')))
      mel2true_y = (mel_transform2(true_y.to('cuda')))
      mel2y_pred = (mel_transform2(y_pred.to('cuda')))

      mel1true_y = prepareSpectogram(mel1true_y)
      mel1y_pred = prepareSpectogram(mel1y_pred)
      mel2true_y = prepareSpectogram(mel2true_y)
      mel2y_pred = prepareSpectogram(mel2y_pred)

      dif1 = (mel1true_y - mel1y_pred).abs().mean()
      dif2 = (mel2true_y - mel2y_pred).abs().mean()
      # Define the minimum dB value


      return dif1*15 + dif2*10
"""

"    def melspectogramLoss(y_pred, true_y):\n\n      mel1true_y = (mel_transform1(true_y.to('cuda')))\n      mel1y_pred = (mel_transform1(y_pred.to('cuda')))\n      mel2true_y = (mel_transform2(true_y.to('cuda')))\n      mel2y_pred = (mel_transform2(y_pred.to('cuda')))\n\n      mel1true_y = prepareSpectogram(mel1true_y)\n      mel1y_pred = prepareSpectogram(mel1y_pred)\n      mel2true_y = prepareSpectogram(mel2true_y)\n      mel2y_pred = prepareSpectogram(mel2y_pred)\n\n      dif1 = (mel1true_y - mel1y_pred).abs().mean()\n      dif2 = (mel2true_y - mel2y_pred).abs().mean()\n      # Define the minimum dB value\n\n\n      return dif1*15 + dif2*10\n"

In [9]:

class MelspectogramLoss(nn.Module):
    def __init__(self):
        super(MelspectogramLoss, self).__init__()

    def forward(self, y_pred, true_y):
        mel1true_y = (mel_transform1(true_y.to('cuda')))
        mel1y_pred = (mel_transform1(y_pred.to('cuda')))
        mel2true_y = (mel_transform2(true_y.to('cuda')))
        mel2y_pred = (mel_transform2(y_pred.to('cuda')))

        mel1true_y = prepareSpectogram(mel1true_y)
        mel1y_pred = prepareSpectogram(mel1y_pred)
        mel2true_y = prepareSpectogram(mel2true_y)
        mel2y_pred = prepareSpectogram(mel2y_pred)

        dif1 = (mel1true_y - mel1y_pred).abs().mean()
        dif2 = (mel2true_y - mel2y_pred).abs().mean()

        return dif1*15 + dif2*10

In [10]:
class CombinedLoss(nn.Module):
    def __init__(self):
        super(CombinedLoss, self).__init__()
        self.mel_loss = MelspectogramLoss()
        self.l1_loss = nn.L1Loss()

    def forward(self, y_pred, true_y):
        return self.mel_loss(y_pred, true_y) + self.l1_loss(y_pred, true_y)

In [11]:
learning_rate = 0.001

In [24]:
class WaveNet(pl.LightningModule):
    def __init__(self, layer_size, stack_size, in_channels, res_channels):
        """
        Stack residual blocks by layer and stack size
        :param layer_size: integer, 10 = layer[dilation=1, dilation=2, 4, 8, 16, 32, 64, 128, 256, 512]
        :param stack_size: integer, 5 = stack[layer1, layer2, layer3, layer4, layer5]
        :param in_channels: number of channels for input data. skip channel is same as input channel
        :param res_channels: number of residual channel for input, output
        :return:
        """
        super(WaveNet, self).__init__()

        self.receptive_fields = self.calc_receptive_fields(layer_size, stack_size)

        self.causal = CausalConv1d(in_channels, res_channels)

        self.res_stack = ResidualStack(layer_size, stack_size, res_channels, in_channels)

        self.densnet = DensNet(in_channels)

        self.loss_fun = CombinedLoss()


    @staticmethod
    def calc_receptive_fields(layer_size, stack_size):
        layers = [2 ** i for i in range(0, layer_size)] * stack_size
        num_receptive_fields = np.sum(layers)

        return int(num_receptive_fields)

    def calc_output_size(self, x):
        output_size = int(x.size(2)) - self.receptive_fields

        #self.check_input_size(x, output_size)

        return inputSize

    def check_input_size(self, x, output_size):
        if output_size < 1:
            raise InputSizeError(int(x.size(2)), self.receptive_fields, output_size)

    def forward(self, x):
        """
        The size of timestep(3rd dimention) has to be bigger than receptive fields
        :param x: Tensor[batch, timestep, channels]
        :return: Tensor[batch, timestep, channels]
        """
        output = x#.transpose(1, 2)

        output_size = self.calc_output_size(output)

        output = self.causal(output)

        skip_connections = self.res_stack(output, output_size)

        output = torch.sum(skip_connections, dim=0)


        output = self.densnet(output)

        return output#.transpose(1, 2).contiguous()
    
    def configure_optimizers(self, lr=learning_rate):
        learning_rate = lr
        optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate)
        return optimizer
    
    def training_step(self, train_batch, batch_idx):
        X, y = train_batch
        X = X.to(device)
        y = y.to(device)

        # forward pass
        y_pred = self.forward(X)

        # compute loss
        loss = self.loss_fun(y_pred, y)
        self.log('train_loss', loss, prog_bar=True)
        rand = random.random()
        if(rand<0.05):
            torch.save(model, modeloNombre)
        return loss
    
    def val_step(self, val_batch, batch_idx):
        X, y = val_batch
        X = X.to(device)
        y = y.to(device)
        # forward pass
        y_pred = model(X)

        # compute loss
        loss = self.loss_fun(y_pred, y)
        self.log('val_loss', loss, prog_bar=True)
        return loss

In [23]:
modeloNombre = directoryBase+'/modelos/wavenetReal/wavenetReplicaAudio.pth'


In [14]:
#waveform, label = next(iter(dataloader))

In [15]:
#Audio(data=waveform.cpu()[0], rate=16000)

In [16]:
#Audio(data=label.cpu()[0], rate=16000)

In [17]:
import sys
def size_of_tensor(a):
    return sys.getsizeof(a) + torch.numel(a)*a.element_size()

In [18]:
#dict(wavenetModel.named_parameters()).keys()

In [19]:
model = WaveNet(layer_size=18, stack_size=2, in_channels=1, res_channels=128)
model = torch.load(modeloNombre, map_location=torch.device('cuda'))
model = model.to(device)

In [20]:
batch_size = 2
#torch.multiprocessing.set_start_method('spawn')
dataset = AudioCleaningDataset(directoryBase+'/CSV/audiosBastantes.csv', directoryBase+'/audiosDivididos/', directoryBase+'/audiosDivididos/ruidosPocos/', directoryBase+"/audiosDivididos/reverbPocos")
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=24)
# training
early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=0.4, patience=1, verbose=True, mode="min")

trainer = pl.Trainer(accelerator='gpu', devices=1, max_epochs=1, log_every_n_steps=2, callbacks=[early_stop_callback])
trainer.fit(model=model, train_dataloaders=dataloader)

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type          | Params
--------------------------------------------
0 | causal    | CausalConv1d  | 256   
1 | res_stack | ResidualStack | 0     
2 | densnet   | DensNet       | 8     
3 | loss_fun  | MSELoss       | 0     
--------------------------------------------
264       Trainable params
0         Non-trainable params
264       Total params
0.001     Total estimated model params size (MB)


Training: 0it [00:00, ?it/s]

Metric train_loss improved. New best score: 0.000
`Trainer.fit` stopped: `max_epochs=1` reached.


In [21]:
torch.save(model, modeloNombre)

In [21]:
wavenetModel = WaveNet( layer_size=18, stack_size=2, in_channels=1, res_channels=128)

wavenetModel = torch.load(modeloNombre, map_location=torch.device('cuda'))
wavenetModel = wavenetModel.to(device)
#.load_state_dict(torch.load(modeloNombre))


In [22]:
size_of_tensor(waveform)/1024/1024

NameError: name 'waveform' is not defined

In [None]:
t = torch.cuda.get_device_properties(0).total_memory
r = torch.cuda.memory_reserved(0)
a = torch.cuda.memory_allocated(0)
f = r-a  # free inside reserved
a/1024/1024/1024, r/1024/1024/1024, t/1024/1024/1024

In [None]:
#with torch.no_grad():
#wavenetModel(waveform)

In [None]:
#modelPanoramico = WaveNetPanoramico(layer_size=18, stack_size=2, in_channels=1, res_channels=128)
#modelPanoramico.load_state_dict(torch.load(modeloNombre))


In [None]:
#modeloPostnetNombre = '/content/drive/MyDrive/tesisPabloAxel/wavenet/modelos/wavenetReal/PostNetSimple12capas.pth'

In [None]:
#postnetModel = PostNetSimple()
#postnetModel.load_state_dict(torch.load(modeloPostnetNombre))

In [None]:
#wavenetModel = modelCargado
#WaveNet( layer_size=15, stack_size=2, in_channels=1, res_channels=128)
losses = []

In [None]:
batch_size = 2

dataset = AudioCleaningDataset(directoryBase+'/CSV/audiosBastantes.csv', directoryBase+'/audiosDivididos/', directoryBase+'/audiosDivididos/ruidosPocos/', directoryBase+"/audiosDivididos/reverbPocos")
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

In [None]:
melspectogramLoss(waveform, label)

In [None]:
losses = []

In [None]:
%tensorboard --logdir=runs

In [None]:
NUM_EPOCHS = 10
for epoch in range(NUM_EPOCHS):
    avgLossOfBatch = []
    for i, (X, true_y) in enumerate(dataloader, 0):
        # zero gradients
        optimizer.zero_grad()
        # forward pass
        y_pred = wavenetModel(X.to(device))
        #y_pred_postnet = postnetModel(y_pred)
        # calc losses

        lossAudio = loss_fn(y_pred, true_y) * 100
        lossSpect1, lossSpect2 = melspectogramLoss(y_pred, true_y)


        loss = lossAudio + lossSpect1 + lossSpect2
        #lossSpectPost1, lossSpectPost2 = melspectogramLoss(y_pred_postnet, true_y)
        #lossPostnet = loss_fn(y_pred_postnet, true_y) * 10 + lossSpectPost1 + lossSpectPost2
        # backward pass
        loss.backward()
        #lossPostnet.backward()
        # update weights
        optimizer.step()
        #optimizerPost.step()

        avgLossOfBatch.append(loss.item())
        xs = 2917 * epoch + i
        writer.add_scalar("Loss audio", lossAudio, xs)
        writer.add_scalar("Loss spect1", lossSpect1, xs)
        writer.add_scalar("Loss spect2", lossSpect2, xs)
        writer.add_scalar("Loss spect all", lossSpect1 + lossSpect2, xs)

        writer.add_scalar("Loss total", loss, i)

        if((i%1)==0):
          print(str(round(100*(i)/(2917/batch_size),2)) + "% del epoch " + str(epoch))
          prom = np.array(avgLossOfBatch).mean()
          losses.append(prom)
          avgLossOfBatch = []
          #print(prom)
          print("Perdida de wavenet: ")
          print(loss)
          print()
          print(lossAudio)
          print(lossSpect1)
          print(lossSpect2)
          #torch.save(wavenetModel.state_dict(), modeloNombre)
          torch.save(wavenetModel, modeloNombre)
            #torch.save(postnetModel.state_dict(), modeloPostnetNombre)


In [None]:
torch.save(wavenetModel, modeloNombre)


In [None]:
13 % 1==0

In [None]:
writer.close()


In [None]:
#predicho = wavenetModel(X).detach().to("cuda")
X = X.to(device)
true_y = true_y.to(device)

In [None]:
wdet = wavenetModel(X).detach()
predicho = wdet.cpu()
X = X.cpu()
true_y = true_y.cpu()

In [None]:
opin = [1,2,4,5,6,7]

In [None]:
audioAnalizar = 1

In [None]:
lossSpect1, lossSpect2 = melspectogramLoss(predicho[audioAnalizar], true_y[audioAnalizar])
loss1 = loss_fn(predicho[audioAnalizar], true_y[audioAnalizar])
loss = loss1 + lossSpect1 + lossSpect2
print("error de prediccion " + str(loss.item()))


lossSpect1t, lossSpect2t = melspectogramLoss(X[audioAnalizar], true_y[audioAnalizar])
loss2 = loss_fn(X[audioAnalizar], true_y[audioAnalizar])
loss2t = loss2 + lossSpect1t + lossSpect2t

print("error sin hacer nada " + str(loss2t.item()))

In [None]:
Audio(data=X.cpu()[audioAnalizar], rate=16000)

In [None]:
Audio(data=predicho.cpu()[audioAnalizar], rate=16000)

In [None]:
Audio(data=predichoPost.cpu()[audioAnalizar], rate=16000)

In [None]:
Audio(data=true_y.cpu()[audioAnalizar], rate=16000)

In [None]:
audioAnalizar = 1

In [None]:
torch.isnan(true_y[audioAnalizar]).sum()

In [None]:
plt.plot(range(0,len(onda2)), (onda2.numpy()-onda.numpy()))

In [None]:
plt.plot(range(0,len(onda)), onda.numpy())

In [None]:
pos = 0
delta = 32000
onda = predicho[audioAnalizar][0][pos:pos+delta]
onda2 = true_y[audioAnalizar][0][pos:pos+delta]
#onda3 = X[audioAnalizar][0][pos:pos+delta]

sns.lineplot(x=range(0,len(onda2)), y=onda2.numpy(), label="Real")
#sns.lineplot(x=range(0,len(onda3)), y=(onda3), label="Sucia")
sns.lineplot(x=range(0,len(onda)), y=onda.numpy(), label="Predicha")


In [None]:
  mel1true_y = mel_transform1(true_y.to('cuda'))
  mel1y_pred = mel_transform1(predicho.to('cuda'))
  mel1sucio = mel_transform1(X.to('cuda'))


In [None]:
mel_spectrogram_db = transformsaudio.AmplitudeToDB()(mel1true_y).cpu()


# Display the mel spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(mel_spectrogram_db[0][0].detach().numpy(), cmap='viridis', origin='lower', aspect='auto')
plt.colorbar(format="%+2.0f dB")
plt.title("Mel Spectrogram verdadero")
plt.xlabel("Time")
plt.ylabel("Mel Frequency Bin")
plt.show()

In [None]:
mel_spectrogram_db2 = transformsaudio.AmplitudeToDB()(mel1y_pred).cpu()


# Display the mel spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(mel_spectrogram_db2[0][0].detach().numpy(), cmap='viridis', origin='lower', aspect='auto')
plt.colorbar(format="%+2.0f dB")
plt.title("Mel Spectrogram predicho")
plt.xlabel("Time")
plt.ylabel("Mel Frequency Bin")
plt.show()

In [None]:
mel_spectrogram_db3 = transformsaudio.AmplitudeToDB()(mel1sucio).cpu()


# Display the mel spectrogram
plt.figure(figsize=(10, 4))
plt.imshow(mel_spectrogram_db3[0][0].detach().numpy(), cmap='viridis', origin='lower', aspect='auto')
plt.colorbar(format="%+2.0f dB")
plt.title("Mel Spectrogram sucio")
plt.xlabel("Time")
plt.ylabel("Mel Frequency Bin")
plt.show()

In [None]:
audioAnalizar = 0

In [None]:
import soundfile as sf


In [None]:
for audioAnalizar in opin:
  sf.write('sucio'+str(audioAnalizar)+'.wav', np.ravel(X[audioAnalizar]), 16000, 'PCM_24')
  sf.write('original'+str(audioAnalizar)+'.wav', np.ravel(true_y[audioAnalizar]), 16000, 'PCM_24')
  sf.write('limpiado'+str(audioAnalizar)+'.wav', np.ravel(predicho[audioAnalizar][0]), 16000, 'PCM_24')

In [None]:
sf.write('sucio'+str(audioAnalizar)+'.wav', np.ravel(X[audioAnalizar]), 16000, 'PCM_24')

In [None]:
sf.write('limpiadoEco'+str(audioAnalizar)+'.wav', np.ravel(predicho[audioAnalizar][0]), 16000, 'PCM_24')

In [None]:
sf.write('originalSinTocarWavenetReplica'+str(audioAnalizar)+'.wav', np.ravel(true_y[audioAnalizar]), 16000, 'PCM_24')

In [None]:
#dataloader give 1 example
ini = time.time()
for waveform, label in dataloader:
    print(waveform.shape)
    print(label.shape)
    break
fin = time.time()

In [None]:
out = modelPanoramico(waveform)

In [None]:
nuevow = WaveNet( layer_size=18, stack_size=2, in_channels=1, res_channels=128)

In [None]:
#nuevow.load_state_dict(torch.load(modeloNombre), map_location=torch.device('cpu'))
nuevow = torch.load(modeloNombre, map_location=torch.device('cpu'))


In [None]:
predicho = nuevow(waveform).detach().cpu()
waveform = waveform.cpu().detach()
true_y = label.cpu()
X = waveform

In [None]:
lossSpect1, lossSpect2 = melspectogramLoss(predicho[audioAnalizar], true_y[audioAnalizar])
loss1 = loss_fn(predicho[audioAnalizar], true_y[audioAnalizar])
loss = loss1 + lossSpect1 + lossSpect2
print("error de prediccion " + str(loss.item()))

lossSpect1t, lossSpect2t = melspectogramLoss(waveform[audioAnalizar], true_y[audioAnalizar])
loss2 = loss_fn(waveform[audioAnalizar], true_y[audioAnalizar])
loss2t = loss2 + lossSpect1t + lossSpect2t

print("error sin hacer nada " + str(loss2t.item()))

In [None]:
class WaveNetPanoramico(pl.LightningModule):
    def __init__(self, layer_size, stack_size, in_channels, res_channels):
        """
        Stack residual blocks by layer and stack size
        :param layer_size: integer, 10 = layer[dilation=1, dilation=2, 4, 8, 16, 32, 64, 128, 256, 512]
        :param stack_size: integer, 5 = stack[layer1, layer2, layer3, layer4, layer5]
        :param in_channels: number of channels for input data. skip channel is same as input channel
        :param res_channels: number of residual channel for input, output
        :return:
        """
        super(WaveNetPanoramico, self).__init__()

        self.receptive_fields = self.calc_receptive_fields(layer_size, stack_size)

        self.causal = CausalConv1d(in_channels, res_channels)

        self.res_stack = ResidualStack(layer_size, stack_size, res_channels, in_channels)

        self.densnet = DensNet(in_channels)

        self.loss_fun = nn.MSELoss()
        inp = layer_size*stack_size+res_channels+1
        inp2 = math.floor(inp/2)
        self.convFinal1 = nn.Conv1d(in_channels=layer_size*stack_size+res_channels+1,
                                   out_channels=inp2,
                                   kernel_size=15, stride=1,
                                   dilation=1, padding=7, bias=True).to(device)
        self.convFinal2 = nn.Conv1d(in_channels=inp2,
                                   out_channels=1,
                                   kernel_size=15, stride=1,
                                   dilation=1, padding=7, bias=True).to(device)

        self.convFinal3 = nn.Conv1d(in_channels=1,
                                   out_channels=1,
                                   kernel_size=101, stride=1,
                                   dilation=1, padding=50, bias=False).to(device)


        self.relu = nn.ReLU()

    @staticmethod
    def calc_receptive_fields(layer_size, stack_size):
        layers = [2 ** i for i in range(0, layer_size)] * stack_size
        num_receptive_fields = np.sum(layers)

        return int(num_receptive_fields)

    def calc_output_size(self, x):
        output_size = int(x.size(2)) - self.receptive_fields

        #self.check_input_size(x, output_size)

        return inputSize

    def check_input_size(self, x, output_size):
        if output_size < 1:
            raise InputSizeError(int(x.size(2)), self.receptive_fields, output_size)

    def forward(self, x):
        """
        The size of timestep(3rd dimention) has to be bigger than receptive fields
        :param x: Tensor[batch, timestep, channels]
        :return: Tensor[batch, timestep, channels]
        """
        output = x#.transpose(1, 2)
        copiaConvolucionFinal = x.clone()

        output_size = self.calc_output_size(output)

        output = self.causal(output)

        skip_connections = self.res_stack(output, output_size)



        #en vez de sumar skip connections hago convolucion
        #print(skip_connections.shape)
        #output = torch.sum(skip_connections, dim=0)
        skip_connections_squeezed = skip_connections.squeeze()
        # Check the shape
        # Swap the first two dimensions
        skip_connections_squeezed = skip_connections_squeezed.transpose(0, 1)
        output = torch.cat((output, copiaConvolucionFinal), dim=1)

        output = torch.cat((output, skip_connections_squeezed), dim=1)

        output = self.convFinal1(output)
        output = self.relu(output)
        output = self.convFinal2(output)
        #output = self.relu(output)
        #output = self.convFinal3(output)

        #output = self.densnet(output)

        return output#.transpose(1, 2).contiguous()

In [None]:
class PostNetSimple(pl.LightningModule):
  def __init__(self, layers=12):
    super(PostNetSimple, self).__init__()
    self.convInicial = nn.Conv1d(in_channels=1,
                            out_channels=128,
                            kernel_size=33, stride=1,
                            dilation=1, padding=16, bias=True).to("cuda")
    self.totalLayers = layers
    self.convs = []
    for conv in range(0, layers):
      self.convs.append(
          nn.Conv1d(in_channels=128,
                            out_channels=128,
                            kernel_size=33, stride=1,
                            dilation=1, padding=16, bias=True).to("cuda")
      )
    self.convFinal = nn.Conv1d(in_channels=129,
                            out_channels=1,
                            kernel_size=33, stride=1,
                            dilation=1, padding=16, bias=True).to("cuda")
    self.tan = nn.Tanh()

  def forward(self, x):
    xCopia = x.clone()
    x = self.convInicial(x)
    x = self.tan(x)
    for conv in self.convs:
      x = conv(x)
      x = self.tan(x)
    x = torch.cat((x, xCopia), dim=1)
    x = self.convFinal(x)
    return x