<a href="https://colab.research.google.com/github/Frads01/NoiseRemover/blob/main/NoiseRemover.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Noise Remover


Indicazione Path per i dati di input e target:

In [None]:
from pathlib import Path
TRAIN_INPUT_DIR = Path('Datasets/Train_Input')
TRAIN_TARGET_DIR = Path('Datasets/Train_Output')

#TEST_NOISY_DIR = Path('Datasets/Test_Input')
#TEST_CLEAN_DIR = Path('Datasets/clean_testset_wav')

Import librerie:

In [None]:
import torch
import torchaudio
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd

#determinismo CUDA GPU
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False


Parametri per la trasformazione STFT:

In [None]:
n_fft = 2048 #grandezza della finestra (risoluzione in frequenza) - nel paper è 3072, ottima per il parlato
hop_length = 512 #salto tra una finestra e l’altra (risoluzione temporale) - nel paper è 768

Dichiarazioni del Dataset e del Dataloader:

In [None]:
class Noise2NoiseDataset(Dataset):
    """
    Dataset per training Noise2Noise su audio.
    Ogni esempio ha due versioni rumorose dello stesso contenuto.
    """
    def __init__(self, noisy_file_set_A, noisy_file_set_B, n_fft=2048, hop_length=512):
        super().__init__()
        self.noisy_A = sorted(noisy_file_set_A)
        self.noisy_B = sorted(noisy_file_set_B)

        assert len(self.noisy_A) == len(self.noisy_B), "Le due liste devono avere la stessa lunghezza."

        self.n_fft = n_fft
        self.hop_length = hop_length
        self.max_len = 165000  # puoi regolarla

    def __len__(self):
        return len(self.noisy_A)

    def load_sample(self, file):
        waveform, _ = torchaudio.load(file)
        return waveform

    def _prepare_sample(self, waveform):
        """Pad o tronca a self.max_len campioni, da sinistra o destra."""
        if waveform.shape[0] > 1:
            waveform = waveform[:1, :]  # prendi solo 1 canale

        current_len = waveform.shape[1]

        if current_len >= self.max_len:
            waveform = waveform[:, :self.max_len]
        else:
            pad_len = self.max_len - current_len
            waveform = torch.nn.functional.pad(waveform, (0, pad_len), mode='constant', value=0.0)

        return waveform

    def __getitem__(self, index):
      # carica audio
        x1 = self.load_sample(self.noisy_A[index])
        x2 = self.load_sample(self.noisy_B[index])

        # padding/troncamento
        x1 = self._prepare_sample(x1)
        x2 = self._prepare_sample(x2)

        # STFT
        x1_stft = torch.stft(x1, n_fft=self.n_fft, hop_length=self.hop_length, normalized=True, return_complex=True)
        x2_stft = torch.stft(x2, n_fft=self.n_fft, hop_length=self.hop_length, normalized=True, return_complex=True)

        return x1_stft, x2_stft

    files_noise_input = sorted(list(PATH_TO_NOISE_VERSION_1.rglob("*.wav")))
    files_noise_target = sorted(list(PATH_TO_NOISE_VERSION_2.rglob("*.wav")))
    #test_noisy_files = sorted(list(TEST_NOISY_DIR.rglob('*.wav')))
    #test_clean_files = sorted(list(TEST_CLEAN_DIR.rglob('*.wav')))

    print("No. of Training files:",len(files_noise_input))
    #print("No. of Testing files:",len(test_noisy_files))

    noise2noise_dataset = Noise2NoiseDataset(files_noise_input, files_noise_target, n_fft=1024, hop_length=256)
    train_loader = DataLoader(noise2noise_dataset, batch_size=2, shuffle=True)

    # For testing purpose
    #test_loader_single_unshuffled = DataLoader(test_dataset, batch_size=1, shuffle=False)