In [None]:
# Setup: Add parent directory to path to import src modules
import sys
sys.path.append('..')

# Core imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from sklearn.model_selection import train_test_split

# Import our modules
from src.vae import MLPVAE, ConvVAE, MultiModalVAE, vae_loss, multimodal_vae_loss
from src.dataset import align_multimodal_data, NumpyDataset, AudioDataset, MultiModalDatasetLazy
from src.clustering import run_all_clusterers, reduce_dimensions_2d
from src.evaluation import compute_all_metrics, compare_methods, print_metrics_summary
from src.visualization import plot_latent_space_2d, plot_training_curves, plot_metrics_comparison

# Configuration
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {DEVICE}")
print(f"PyTorch version: {torch.__version__}")

## 1. Data Loading and Alignment

Load and align multi-modal data (audio, lyrics, genre) across track IDs.

In [None]:
# Load aligned multi-modal data
kept_ids, X_lyrics, X_genre, y_genre, audio_map, F, T, lyrics_dim, genre_dim = align_multimodal_data(
    audio_dir="../Audio_Features",
    lyrics_csv="../Lyrics_Processed/lyrics_cleaned.csv",
    genre_csv="../genre_processed.csv",
    sbert_npy="../Lyrics_Processed/lyrics_sbert_embeddings.npy",
    audio_key="mfcc",
    keep_missing=True
)

print(f"\n{'='*60}")
print("Data Alignment Summary")
print(f"{'='*60}")
print(f"Total tracks: {len(kept_ids)}")
print(f"Audio shape: (1, {F}, {T})")
print(f"Lyrics dim: {lyrics_dim}")
print(f"Genre classes: {genre_dim}")
print(f"Tracks with genre labels: {(y_genre != -1).sum()}")

## 2. Easy Task: Basic MLP-VAE

Train a basic MLP-VAE on audio mean features and perform K-Means clustering.

In [None]:
# Load pre-computed features from Easy Task results
import os

if os.path.exists("../Results/EasyTask/Z_vae.npy"):
    print("Loading Easy Task results...")
    Z_easy = np.load("../Results/EasyTask/Z_vae.npy")
    results_easy = pd.read_csv("../Results/EasyTask/easy_task_metrics.csv")
    
    print("\nEasy Task Results:")
    print(results_easy)
    
    # Visualize latent space
    from src.clustering import run_kmeans
    labels, _ = run_kmeans(Z_easy, n_clusters=6, random_state=SEED)
    
    Z_2d = reduce_dimensions_2d(Z_easy[:2000], method="umap", random_state=SEED)
    plot_latent_space_2d(Z_2d, labels[:2000], title="Easy Task: MLP-VAE Latent Space (UMAP)")
else:
    print("Easy Task results not found. Run the main notebook first.")

## 3. Medium Task: ConvVAE with Hyperparameter Tuning

ConvVAE for 2D audio features + systematic hyperparameter search.

In [None]:
# Load Medium Task results
if os.path.exists("../Results/MediumTask_WithARI/BEST_clustering_results.csv"):
    print("Loading Medium Task results...")
    results_medium = pd.read_csv("../Results/MediumTask_WithARI/BEST_clustering_results.csv")
    hp_comparison = pd.read_csv("../Results/MediumTask_WithARI/hyperparameter_comparison.csv")
    
    print("\n" + "="*80)
    print("Medium Task: Hyperparameter Comparison")
    print("="*80)
    print(hp_comparison)
    
    print("\n" + "="*80)
    print("Medium Task: Best Configuration Results")
    print("="*80)
    print(results_medium)
    
    # Visualize metrics comparison
    plot_metrics_comparison(results_medium, figsize=(16, 10))
else:
    print("Medium Task results not found. Run the main notebook first.")

## 4. Hard Task: CVAE/Beta-VAE with Complete Metrics

Conditional VAE with genre conditioning + all evaluation metrics.

In [None]:
# Load Hard Task results
if os.path.exists("../Results/HardTask_CVAE/BEST_hard_results.csv"):
    print("Loading Hard Task results...")
    results_hard = pd.read_csv("../Results/HardTask_CVAE/BEST_hard_results.csv")
    cvae_comparison = pd.read_csv("../Results/HardTask_CVAE/cvae_comparison.csv")
    
    print("\n" + "="*80)
    print("Hard Task: CVAE Configuration Comparison")
    print("="*80)
    print(cvae_comparison)
    
    print("\n" + "="*80)
    print("Hard Task: Best CVAE Results (All Metrics)")
    print("="*80)
    print(results_hard)
    
    # Plot metrics comparison
    plot_metrics_comparison(results_hard, figsize=(16, 12))
else:
    print("Hard Task results not found. Run the main notebook first.")

## 5. Training Example: ConvVAE from Scratch

Demonstrate training a ConvVAE model (small example).

In [None]:
# Example: Train a small ConvVAE (demonstration only)
DEMO_MODE = True  # Set to True for quick demo, False for full training

if DEMO_MODE:
    print("Demo mode: Using small subset and few epochs")
    sample_size = 100
    epochs = 5
    batch_size = 16
else:
    sample_size = len(kept_ids)
    epochs = 30
    batch_size = 32

# Load audio features for a subset
from src.dataset import load_audio_2d

indices = np.random.choice(len(kept_ids), min(sample_size, len(kept_ids)), replace=False)
X_audio_demo = []

for idx in indices:
    tid = kept_ids[idx]
    if tid in audio_map:
        audio = load_audio_2d(audio_map[tid], key="mfcc", max_time=T)
        X_audio_demo.append(audio[np.newaxis, :])
    else:
        X_audio_demo.append(np.zeros((1, F, T), dtype=np.float32))

X_audio_demo = np.array(X_audio_demo)
print(f"Loaded {len(X_audio_demo)} audio samples: shape {X_audio_demo.shape}")

# Create model
model = ConvVAE(in_channels=1, latent_dim=32, F=F, T=T).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-3)

# Split data
train_idx, val_idx = train_test_split(np.arange(len(X_audio_demo)), test_size=0.2, random_state=SEED)
train_loader = DataLoader(AudioDataset(X_audio_demo[train_idx]), batch_size=batch_size, shuffle=True)
val_loader = DataLoader(AudioDataset(X_audio_demo[val_idx]), batch_size=batch_size, shuffle=False)

print(f"\nTraining ConvVAE: {epochs} epochs, batch_size={batch_size}")
print(f"Train samples: {len(train_idx)}, Val samples: {len(val_idx)}")

# Training loop
history = []
for epoch in range(1, epochs + 1):
    model.train()
    train_losses = []
    
    for batch in train_loader:
        batch = batch.to(DEVICE)
        optimizer.zero_grad()
        
        x_hat, mu, logvar, _ = model(batch)
        loss, recon, kl = vae_loss(batch, x_hat, mu, logvar, beta=1.0)
        
        loss.backward()
        optimizer.step()
        train_losses.append(loss.item())
    
    model.eval()
    val_losses = []
    with torch.no_grad():
        for batch in val_loader:
            batch = batch.to(DEVICE)
            x_hat, mu, logvar, _ = model(batch)
            loss, recon, kl = vae_loss(batch, x_hat, mu, logvar, beta=1.0)
            val_losses.append(loss.item())
    
    train_loss = np.mean(train_losses)
    val_loss = np.mean(val_losses)
    history.append({'epoch': epoch, 'train_loss': train_loss, 'val_loss': val_loss})
    
    print(f"Epoch {epoch}/{epochs} - Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}")

# Plot training curve
history_df = pd.DataFrame(history)
plt.figure(figsize=(10, 5))
plt.plot(history_df['epoch'], history_df['train_loss'], label='Train')
plt.plot(history_df['epoch'], history_df['val_loss'], label='Validation')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('ConvVAE Training (Demo)')
plt.legend()
plt.grid(alpha=0.3)
plt.show()

print("\nTraining complete!")

## 6. Clustering with Learned Features

Extract latent features and perform clustering.

In [None]:
# Extract latent features
model.eval()
with torch.no_grad():
    X_tensor = torch.from_numpy(X_audio_demo).to(DEVICE)
    _, mu, _, _ = model(X_tensor)
    Z_demo = mu.cpu().numpy()

print(f"Extracted latent features: {Z_demo.shape}")

# Get corresponding genre labels
y_demo = y_genre[indices]

# Run clustering
results = run_all_clusterers(
    X=Z_demo,
    y_true=y_demo,
    n_clusters=6,
    prefix="Demo_",
    compute_metrics_fn=compute_all_metrics,
    random_state=SEED
)

# Compare methods
comparison = compare_methods(results)
print("\n" + "="*80)
print("Clustering Results Comparison")
print("="*80)
print(comparison)

# Visualize best clustering
best_method, best_labels, best_metrics = results[0]
print(f"\nBest method: {best_method}")
print_metrics_summary(best_metrics, best_method)

# 2D visualization
Z_2d_demo = reduce_dimensions_2d(Z_demo, method="umap", random_state=SEED)
plot_latent_space_2d(Z_2d_demo, best_labels, title=f"Demo: {best_method}")

## 7. Results Summary

Load and display all task results for comparison.

In [None]:
print("\n" + "="*80)
print("PROJECT RESULTS SUMMARY")
print("="*80)

# Easy Task
if os.path.exists("../Results/EasyTask/easy_task_metrics.csv"):
    easy_metrics = pd.read_csv("../Results/EasyTask/easy_task_metrics.csv")
    print("\nüìä EASY TASK (MLP-VAE):")
    print(easy_metrics.to_string(index=False))

# Medium Task
if os.path.exists("../Results/MediumTask_WithARI/hyperparameter_comparison.csv"):
    medium_hp = pd.read_csv("../Results/MediumTask_WithARI/hyperparameter_comparison.csv")
    print("\nüìä MEDIUM TASK (ConvVAE) - Hyperparameter Comparison:")
    print(medium_hp.to_string(index=False))
    
    best_config = medium_hp.iloc[0]
    print(f"\nüèÜ Best Configuration: {best_config['Configuration']}")
    print(f"   Silhouette: {best_config['Best Silhouette']:.4f}")
    print(f"   Latent Dim: {best_config['Latent Dim']}, Beta: {best_config['Beta']}")

# Hard Task
if os.path.exists("../Results/HardTask_CVAE/cvae_comparison.csv"):
    hard_cvae = pd.read_csv("../Results/HardTask_CVAE/cvae_comparison.csv")
    print("\nüìä HARD TASK (CVAE/Beta-VAE) - Configuration Comparison:")
    print(hard_cvae.to_string(index=False))
    
    best_cvae = hard_cvae.iloc[0]
    print(f"\nüèÜ Best CVAE Configuration: {best_cvae['Configuration']}")
    print(f"   ARI: {best_cvae['Best ARI']:.4f}")
    print(f"   Beta: {best_cvae['Beta']}, Latent Dim: {best_cvae['Latent Dim']}")

print("\n" + "="*80)
print("‚úÖ All tasks completed successfully!")
print("üìÅ Results saved in: Results/EasyTask, Results/MediumTask_WithARI, Results/HardTask_CVAE")
print("="*80)

## Conclusion

This notebook demonstrates:
1. ‚úÖ **Easy Task**: Basic MLP-VAE with K-Means clustering
2. ‚úÖ **Medium Task**: ConvVAE with systematic hyperparameter tuning and ARI computation
3. ‚úÖ **Hard Task**: CVAE/Beta-VAE with complete metrics (Silhouette, ARI, NMI, Purity)
4. ‚úÖ **All metrics**: 6 evaluation metrics implemented and compared
5. ‚úÖ **Visualizations**: Latent space, training curves, cluster distributions

### Key Achievements:
- Multi-modal VAE for audio + lyrics + genre
- Systematic hyperparameter search
- Comprehensive baseline comparisons
- Production-quality modular code

### Next Steps:
1. Write NeurIPS-style paper using results from `Results/` directories
2. Upload repository to GitHub
3. Optional: Further hyperparameter tuning or architecture experiments