# Step 3: Dataset Setup
## Different options
- First one is downloading using a script that places the data in the download folder (usually recommended)
- Second one is uploading the dataset to your personal/institutional Google Drive and load it from there ([Read More](https://saturncloud.io/blog/google-colab-how-to-read-data-from-my-google-drive/))
- Place the download script directly here on colab

You are free to do as you please in this phase.


In [None]:
# Import all required libraries
import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from pathlib import Path

# Print system and environment info
print("="*80)
print("üîß PROJECT ENVIRONMENT INFO")
print("="*80)
print(f"Python Version: {sys.version.split()[0]}")
print(f"PyTorch Version: {torch.__version__}")
print(f"NumPy Version: {np.__version__}")
print(f"Current Working Directory: {os.getcwd()}")
print()

# Check CUDA availability
if torch.cuda.is_available():
    print(f"‚úÖ CUDA is AVAILABLE")
    print(f"   GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"   CUDA Version: {torch.version.cuda}")
    print(f"   Number of GPUs: {torch.cuda.device_count()}")
    device = torch.device('cuda')
else:
    print(f"‚ùå CUDA is NOT available - Using CPU")
    device = torch.device('cpu')

print(f"   Default Device: {device}")
print("="*80)

In [None]:
from utils.download_dataset_local import dowload_ravdess_local

dataset_path = dowload_ravdess_local()
if dataset_path:
    print(f"‚úÖ Downloaded RAVDESS dataset locally in {dataset_path}...")
else:
    print("‚ùå RAVDESS dataset download failed.")
    
ravdess_path = dataset_path

In [None]:
from torch.utils.data import DataLoader
from dataset.custom_ravdess_dataset import CustomRAVDESSDataset
from utils.get_dataset_statistics import print_dataset_stats

print("="*80)
print("üîÑ CREAZIONE DATASET E DATALOADER - RAVDESS")
print("="*80)

# Verifica percorso
if not ravdess_path or not Path(ravdess_path).exists():
    raise ValueError(f"‚ùå Dataset RAVDESS non trovato in: {ravdess_path}")

print(f"‚úÖ Usando dataset da: {ravdess_path}\n")

# Crea i dataset
train_dataset = CustomRAVDESSDataset(dataset_root=ravdess_path, split='train')
val_dataset = CustomRAVDESSDataset(dataset_root=ravdess_path, split='validation')
test_dataset = CustomRAVDESSDataset(dataset_root=ravdess_path, split='test')

# Crea i dataloader
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


# Riepilogo dataloader
print("\n" + "="*80)
print("üì¶ DATALOADER SUMMARY")
print("="*80)
print(f"Train Dataloader:      {len(train_dataloader)} batch √ó {batch_size} samples = {len(train_dataset)} totali")
print(f"Validation Dataloader: {len(val_dataloader)} batch √ó {batch_size} samples = {len(val_dataset)} totali")
print(f"Test Dataloader:       {len(test_dataloader)} batch √ó {batch_size} samples = {len(test_dataset)} totali")
print("="*80)

In [None]:
# Ricarica il modulo per usare la versione fixata
import importlib
import sys
if 'utils.download_dataset_local' in sys.modules:
    importlib.reload(sys.modules['utils.download_dataset_local'])

from utils.download_dataset_local import dowload_iemocap_local

iemocap_dataset_path = dowload_iemocap_local()
if iemocap_dataset_path:
    print(f"‚úÖ Downloaded IEMOCAP dataset locally in {iemocap_dataset_path}...")
else:
    print("‚ùå IEMOCAP dataset download failed.")
    
iemocap_path = iemocap_dataset_path

In [None]:
# DEBUG: Verifica percorsi IEMOCAP
print("="*80)
print("üîç DEBUG - VERIFICA PERCORSI IEMOCAP")
print("="*80)

iemocap_debug_path = iemocap_path
print(f"1Ô∏è‚É£  Percorso passato: {iemocap_debug_path}\n")

# Controlla se il percorso esiste
print(f"2Ô∏è‚É£  Percorso esiste: {Path(iemocap_debug_path).exists()}\n")

# Lista cosa c'√® dentro
if Path(iemocap_debug_path).exists():
    print(f"3Ô∏è‚É£  Contenuto di {iemocap_debug_path}:")
    for item in Path(iemocap_debug_path).iterdir():
        print(f"   - {item.name} {'(DIR)' if item.is_dir() else ''}")
    print()

# Cerca le cartelle Session
print(f"4Ô∏è‚É£  Ricerca cartelle Session:")
session_folders = list(Path(iemocap_debug_path).glob("Session*"))
print(f"   Trovate: {len(session_folders)} cartelle Session")
for s in session_folders[:3]:
    print(f"   - {s.name}")
print()

# Se ci sono Session, controlla la struttura di una
if session_folders:
    session1 = session_folders[0]
    print(f"5Ô∏è‚É£  Dentro {session1.name}:")
    for item in (session1).iterdir():
        print(f"   - {item.name}")
    print()
    
    # Controlla wav folder
    wav_path = session1 / "sentences" / "wav"
    print(f"6Ô∏è‚É£  Percorso wav: {wav_path}")
    print(f"   Esiste: {wav_path.exists()}")
    if wav_path.exists():
        wav_items = list(wav_path.iterdir())
        print(f"   Contiene {len(wav_items)} elementi:")
        for item in wav_items[:5]:
            print(f"      - {item.name} {'(DIR)' if item.is_dir() else ''}")
    print()
    
    # Controlla label folder
    label_path = session1 / "dialog" / "EmoEvaluation"
    print(f"7Ô∏è‚É£  Percorso label: {label_path}")
    print(f"   Esiste: {label_path.exists()}")
    if label_path.exists():
        label_items = list(label_path.glob("*.txt"))
        print(f"   Trovati {len(label_items)} file .txt")
        for item in label_items[:3]:
            print(f"      - {item.name}")

print("="*80)

In [None]:
from dataset.custom_iemocap_dataset import CustomIEMOCAPDataset
from utils.get_dataset_statistics import print_iemocap_stats
print("="*80)
print("üîÑ CREAZIONE DATASET E DATALOADER - IEMOCAP")
print("="*80)

# Verifica percorso
if not iemocap_path or not Path(iemocap_path).exists():
    raise ValueError(f"‚ùå Dataset IEMOCAP non trovato in: {iemocap_path}")

print(f"‚úÖ Usando dataset da: {iemocap_path}\n")

# Crea i dataset
train_iemocap_dataset = CustomIEMOCAPDataset(dataset_root=iemocap_path, split='train')
val_iemocap_dataset = CustomIEMOCAPDataset(dataset_root=iemocap_path, split='validation')
test_iemocap_dataset = CustomIEMOCAPDataset(dataset_root=iemocap_path, split='test')

# Crea i dataloader
batch_size = 32
train_iemocap_dataloader = DataLoader(train_iemocap_dataset, batch_size=batch_size, shuffle=True)
val_iemocap_dataloader = DataLoader(val_iemocap_dataset, batch_size=batch_size, shuffle=False)
test_iemocap_dataloader = DataLoader(test_iemocap_dataset, batch_size=batch_size, shuffle=False)


# Riepilogo dataloader
print("\n" + "="*80)
print("üì¶ DATALOADER SUMMARY - IEMOCAP")
print("="*80)
print(f"Train Dataloader:      {len(train_iemocap_dataloader)} batch √ó {batch_size} samples = {len(train_iemocap_dataset)} totali")
print(f"Validation Dataloader: {len(val_iemocap_dataloader)} batch √ó {batch_size} samples = {len(val_iemocap_dataset)} totali")
print(f"Test Dataloader:       {len(test_iemocap_dataloader)} batch √ó {batch_size} samples = {len(test_iemocap_dataset)} totali")
print("="*80)

 Weights & Biases : Genera i grafici e compara gli esperimenti

In [None]:
import wandb
import os
os.environ['WANDB_API_KEY'] = '7ade30086de7899bed412e3eb5c2da065c146f90'
wandb.login()

In [None]:
!python train.py --model CRNN_BiLSTM

#!python train.py --model CRNN_BiGRU

# Step 5: Evaluate your model



In [None]:
!python eval.py --model CRNN_BiLSTM --checkpoint checkpoints/best_model.pth

#!python eval.py --model CRNN_BiGRU --checkpoint checkpoints/best_model.pth