
# Audio degradation

This notebook downloads the `VoiceBank-DEMAND` data, and then apply degradation

## TODO:
1. Download the `VoiceBank-DEMAND` data.
2. Extract audio.
3. Load audio.
4. Create the dataset.



# IMPORTS

In [None]:
import os
import urllib.request
import zipfile
from tqdm import tqdm
import torch
from torch.utils.data import Dataset
import librosa
import numpy as np
import torchaudio.transforms as T

# Download dataset

In [None]:
# Lista dei file da scaricare: nome logico → URL
files = {
    "clean_trainset": "https://datashare.ed.ac.uk/bitstream/handle/10283/2791/clean_trainset_28spk_wav.zip?sequence=2&isAllowed=y",
    "noisy_trainset": "https://datashare.ed.ac.uk/bitstream/handle/10283/2791/noisy_trainset_28spk_wav.zip?sequence=6&isAllowed=y",
    "clean_testset":  "https://datashare.ed.ac.uk/bitstream/handle/10283/2791/clean_testset_wav.zip?sequence=1&isAllowed=y",
    "noisy_testset":  "https://datashare.ed.ac.uk/bitstream/handle/10283/2791/noisy_testset_wav.zip?sequence=5&isAllowed=y"
}

# Percorso principale in cui salvare dataset e estrazione
base_path = "VoiceBank_DEMAND"
os.makedirs(base_path, exist_ok=True)

# Funzione di hook per tqdm
class DownloadProgressBar(tqdm):
    def update_to(self, b=1, bsize=1, tsize=None):
        if tsize is not None:
            self.total = tsize
        self.update(b * bsize - self.n)

# Scarica e decomprimi ogni file
for name, url in files.items():
    zip_path = os.path.join(base_path, f"{name}.zip")
    extract_folder = os.path.join(base_path, name)

    # Scaricamento con barra di progresso
    if not os.path.exists(zip_path):
        print(f"\nScaricamento di {name}...")
        with DownloadProgressBar(unit='B', unit_scale=True, miniters=1, desc=name) as t:
            urllib.request.urlretrieve(url, filename=zip_path, reporthook=t.update_to)
        print(f"Scaricato {zip_path}")
    else:
        print(f"{zip_path} già presente, skip scaricamento.")

    # Estrazione
    if not os.path.exists(extract_folder):
        print(f"Estrazione di {name} in {extract_folder}...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_folder)
        print(f"Estratto in {extract_folder}")
    else:
        print(f"{extract_folder} già esistente, skip estrazione.")

print("\nTutti i file scaricati ed estratti correttamente.")


VoiceBank_DEMAND\clean_trainset.zip già presente, skip scaricamento.
VoiceBank_DEMAND\clean_trainset già esistente, skip estrazione.
VoiceBank_DEMAND\noisy_trainset.zip già presente, skip scaricamento.
VoiceBank_DEMAND\noisy_trainset già esistente, skip estrazione.
VoiceBank_DEMAND\clean_testset.zip già presente, skip scaricamento.
VoiceBank_DEMAND\clean_testset già esistente, skip estrazione.
VoiceBank_DEMAND\noisy_testset.zip già presente, skip scaricamento.
VoiceBank_DEMAND\noisy_testset già esistente, skip estrazione.

Tutti i file scaricati ed estratti correttamente.


# Dataset build

Create the couple (original, noisy)

In [None]:
def build_clean_noisy_pairs(clean_dir, noisy_dir):
    clean_files = sorted(os.listdir(clean_dir))
    noisy_files = sorted(os.listdir(noisy_dir))

    pairs = []
    for fname in clean_files:
        if fname in noisy_files:
            clean_path = os.path.join(clean_dir, fname)
            noisy_path = os.path.join(noisy_dir, fname)
            pairs.append((clean_path, noisy_path))
        else:
            print(f"❗ File mancante nella cartella noisy: {fname}")

    return pairs

# Percorsi
base_path = "VoiceBank_DEMAND"
train_clean_dir = os.path.join(base_path, "clean_trainset")
train_noisy_dir = os.path.join(base_path, "noisy_trainset")

test_clean_dir = os.path.join(base_path, "clean_testset")
test_noisy_dir = os.path.join(base_path, "noisy_testset")

# Costruzione delle coppie
train_pairs = build_clean_noisy_pairs(train_clean_dir, train_noisy_dir)
test_pairs = build_clean_noisy_pairs(test_clean_dir, test_noisy_dir)

print(f"✅ Coppie train: {len(train_pairs)}")
print(f"✅ Coppie test: {len(test_pairs)}")

# Esempio
print("\nEsempio coppia:")
print(train_pairs[0])


In [None]:
class AudioPairDataset(Dataset):
    def __init__(self, pairs, sample_rate=16000, duration=None, use_logmel=True, n_mels=64):
        """
        pairs: lista di tuple (clean_path, noisy_path)
        sample_rate: sample rate target
        duration: in secondi (se vuoi segmenti fissi), oppure None per completo
        use_logmel: se True, converte in spettrogramma log-Mel
        """
        self.pairs = pairs
        self.sample_rate = sample_rate
        self.duration = duration
        self.use_logmel = use_logmel
        self.n_mels = n_mels

        if self.use_logmel:
            self.mel_transform = T.MelSpectrogram(
                sample_rate=self.sample_rate,
                n_mels=self.n_mels,
                n_fft=1024,
                hop_length=256
            )
            self.db_transform = T.AmplitudeToDB()

    def __len__(self):
        return len(self.pairs)

    def load_audio(self, path):
        y, sr = librosa.load(path, sr=self.sample_rate, duration=self.duration)
        if self.duration:
            expected_len = int(self.sample_rate * self.duration)
            if len(y) < expected_len:
                # zero-pad se troppo corto
                y = np.pad(y, (0, expected_len - len(y)))
            else:
                y = y[:expected_len]
        return torch.tensor(y, dtype=torch.float32)

    def to_logmel(self, y):
        mel = self.mel_transform(y)
        db = self.db_transform(mel)
        return db

    def __getitem__(self, idx):
        clean_path, noisy_path = self.pairs[idx]
        clean = self.load_audio(clean_path)
        noisy = self.load_audio(noisy_path)

        if self.use_logmel:
            clean = self.to_logmel(clean)
            noisy = self.to_logmel(noisy)

        return clean, noisy
