# Download dataset

In [None]:
import os
import zipfile
import requests
import pandas as pd
import torchaudio
import torch
from torch.utils.data import Dataset, DataLoader
from torchaudio.transforms import MelSpectrogram, AmplitudeToDB
from tqdm import tqdm

# Step 1: Scarica e decomprimi il dataset con barra di avanzamento
def download_and_unzip(url, extract_to='.'):
    local_zip = 'ODAQ_dataset.zip'
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024  # 1 Kibibyte

    with open(local_zip, 'wb') as f, tqdm(
        desc="Scaricamento ODAQ",
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(block_size):
            f.write(data)
            bar.update(len(data))

    with zipfile.ZipFile(local_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    os.remove(local_zip)

# Step 2: Dataset PyTorch
class ODAQDataset(Dataset):
    def __init__(self, annotations_file, audio_dir, target_sample_rate=44100, n_mels=64):
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        self.target_sample_rate = target_sample_rate
        self.mel_spectrogram = MelSpectrogram(
            sample_rate=self.target_sample_rate,
            n_mels=n_mels,
            n_fft=1024,
            hop_length=512
        )
        self.amplitude_to_db = AmplitudeToDB()

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        audio_path = os.path.join(self.audio_dir, self.annotations.iloc[idx, 0])
        waveform, sample_rate = torchaudio.load(audio_path)
        if sample_rate != self.target_sample_rate:
            waveform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=self.target_sample_rate)(waveform)
        mel_spec = self.mel_spectrogram(waveform)
        mel_spec_db = self.amplitude_to_db(mel_spec)
        score = torch.tensor(self.annotations.iloc[idx, 1], dtype=torch.float32)
        return mel_spec_db, score
# Step 3: Esegui tutto
def prepare_dataset():
    # dataset_url = 'https://zenodo.org/records/10405774/files/ODAQ.zip'
    # download_and_unzip(dataset_url, 'ODAQ_dataset')
    annotations_file = 'ODAQ_dataset/ODAQ/ODAQ_listening_test/ODAQ_results.csv'
    audio_dir = 'ODAQ_dataset/ODAQ/ODAQ_listening_test'
    dataset = ODAQDataset(annotations_file, audio_dir)
    return dataset

# Esempio di utilizzo
if __name__ == "__main__":
    dataset = prepare_dataset()
    dataloader = DataLoader(dataset, batch_size=16, shuffle=True)
    for mel_specs, scores in dataloader:
        print(mel_specs.shape, scores.shape)
        break


Scaricamento ODAQ: 100%|██████████| 0.98G/0.98G [02:30<00:00, 6.98MiB/s]    


FileNotFoundError: [Errno 2] No such file or directory: 'ODAQ_dataset/ODAQ_results.csv'

# Dataset build

In [14]:
import pandas as pd
import os

csv_path = "ODAQ_dataset/ODAQ_results.csv"

# Carica il file Excel
df = pd.read_csv(csv_path)

# Calcola la media dei punteggi per ciascun 'item'
mean_scores = df.groupby(['item', 'process'])['score'].mean().reset_index()

# Rinomina le colonne per chiarezza
mean_scores.columns = ['item', 'process', 'mean_score']
# Save il DataFrame in un file CSV
mean_scores.to_csv("ODAQ_dataset/mean_scores_long.csv", index=False)


In [22]:
import os
import torch
import torchaudio
from torch.utils.data import Dataset
import pandas as pd

class SpectrogramDataset(Dataset):
    def __init__(self, csv_file, root_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform or torchaudio.transforms.MelSpectrogram()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        item = row['item']
        process = row['process']
        score = torch.tensor(row['mean_score'], dtype=torch.float32)

        # Costruisci il percorso al file .wav
        wav_path = f"{self.root_dir}/{item}/{process}.wav"

        # Carica audio
        waveform, sr = torchaudio.load(wav_path)

        # Calcola spettrogramma
        spectrogram = self.transform(waveform)

        # (opzionale) Riduci a mono e/o rimuovi asse batch
        spectrogram = spectrogram.squeeze(0)

        return {
            'spectrogram': spectrogram,
            'score': score
        }


In [23]:
from torch.utils.data import random_split, DataLoader

csv_path = "ODAQ_dataset/mean_scores_long.csv"

# Dataset intero
full_dataset = SpectrogramDataset(csv_path, root_dir="ODAQ_dataset")

# Lunghezze per la divisione
total_len = len(full_dataset)
train_len = int(0.7 * total_len)
val_len = int(0.15 * total_len)
test_len = total_len - train_len - val_len  # Assicura somma esatta

# Suddivisione casuale (usa seed per ripetibilità)
train_dataset, val_dataset, test_dataset = random_split(
    full_dataset,
    [train_len, val_len, test_len],
    generator=torch.Generator().manual_seed(42)  # seed riproducibile
)

# DataLoader per ciascun set
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)




In [24]:
# get one dataloader element
print("Train loader example:")
for batch in train_loader:
    spectrogram = batch['spectrogram']
    score = batch['score']
    print(f"Spectrogram shape: {spectrogram.shape}, Score shape: {score.shape}")
    break

Train loader example:


RuntimeError: Couldn't find appropriate backend to handle uri ODAQ_dataset/PE_AmateurOnPurpose/LP35.wav and format None.