Cell 1: Setup

In [None]:
# Setup - Run this first
!pip install torch matplotlib seaborn pandas numpy scikit-learn tqdm -q

import os, warnings, random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

warnings.filterwarnings('ignore')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Set seeds for reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)

os.makedirs('data/raw', exist_ok=True)
print("Setup complete!")


Cell 2: Upload Data

In [None]:
import zipfile
import urllib.request
import os
from pathlib import Path

def check_data_files():
    """Check if required files already exist"""
    required_files = ['studentInfo.csv', 'studentVle.csv', 'studentAssessment.csv']
    existing = []
    missing = []

    for file in required_files:
        if os.path.exists(f'data/raw/{file}'):
            existing.append(file)
        else:
            missing.append(file)

    if existing:
        print(f"✅ Found existing files: {existing}")
    if missing:
        print(f"❌ Missing files: {missing}")

    return len(missing) == 0

def download_oulad_dataset():
    """Attempt to automatically download OULAD dataset"""
    print("🔄 Attempting to download OULAD dataset...")

    try:
        # Download URL (Kaggle public dataset)
        url = "https://www.kaggle.com/api/v1/datasets/download/anlgrbz/student-demographics-online-education-dataoulad"
        zip_path = "oulad_dataset.zip"

        print("📥 Downloading dataset (this may take a few minutes)...")

        # Try using curl first (more reliable for large files)
        try:
            import subprocess
            result = subprocess.run([
                'curl', '-L', '-o', zip_path, url
            ], capture_output=True, text=True, timeout=300)  # 5 minute timeout

            if result.returncode != 0:
                raise Exception(f"Curl failed: {result.stderr}")

        except (subprocess.TimeoutExpired, FileNotFoundError, Exception) as e:
            print(f"⚠️ Curl method failed: {e}")
            print("📥 Trying alternative download method...")

            # Fallback to urllib
            urllib.request.urlretrieve(url, zip_path)

        # Check if download was successful
        if not os.path.exists(zip_path) or os.path.getsize(zip_path) < 1000:  # Less than 1KB indicates failure
            raise Exception("Downloaded file is too small or doesn't exist")

        file_size_mb = os.path.getsize(zip_path) / (1024 * 1024)
        print(f"✅ Download complete: {file_size_mb:.1f} MB")

        # Extract the zip file
        print("📂 Extracting files...")

        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            # List all files in the zip
            all_files = zip_ref.namelist()
            print(f"Found {len(all_files)} files in archive")

            required_files = ['studentInfo.csv', 'studentVle.csv', 'studentAssessment.csv']
            extracted_files = []

            # Extract required files
            for file_path in all_files:
                filename = os.path.basename(file_path)

                if filename in required_files:
                    # Read file data from zip
                    with zip_ref.open(file_path) as source:
                        # Write to data/raw/
                        target_path = f'data/raw/{filename}'
                        with open(target_path, 'wb') as target:
                            target.write(source.read())

                    extracted_files.append(filename)
                    file_size = os.path.getsize(target_path) / (1024 * 1024)
                    print(f"  ✅ Extracted {filename}: {file_size:.1f} MB")

            # Check if all required files were found
            missing_files = [f for f in required_files if f not in extracted_files]
            if missing_files:
                raise Exception(f"Required files not found in archive: {missing_files}")

        # Clean up zip file
        os.remove(zip_path)
        print("🗑️ Cleaned up temporary files")

        print("🎉 Automatic download completed successfully!")
        return True

    except Exception as e:
        print(f"❌ Automatic download failed: {str(e)}")

        # Clean up failed download
        if os.path.exists(zip_path):
            try:
                os.remove(zip_path)
            except:
                pass

        return False

def manual_upload():
    """Fallback to manual file upload"""
    print("\n📤 Please upload the OULAD CSV files manually:")
    print("   Required files: studentInfo.csv, studentVle.csv, studentAssessment.csv")
    print("   Download from: https://www.kaggle.com/datasets/anlgrbz/student-demographics-online-education-dataoulad")

    try:
        from google.colab import files
        print("\n⬇️ Click 'Choose Files' below and select all 3 CSV files:")
        uploaded = files.upload()

        moved_files = []
        for filename in uploaded.keys():
            if filename.endswith('.csv'):
                # Move to data/raw/
                target_path = f'data/raw/{filename}'
                os.rename(filename, target_path)

                file_size = os.path.getsize(target_path) / (1024 * 1024)
                print(f"✅ Uploaded {filename}: {file_size:.1f} MB")
                moved_files.append(filename)

        if len(moved_files) == 3:
            print("🎉 Manual upload completed successfully!")
            return True
        else:
            print(f"⚠️ Only {len(moved_files)} files uploaded. Need 3 files.")
            return False

    except ImportError:
        print("❌ Manual upload not available (not running in Colab)")
        print("Please manually place CSV files in data/raw/ directory")
        return False
    except Exception as e:
        print(f"❌ Manual upload failed: {e}")
        return False

# Main execution
print("🔍 Checking for existing data files...")

if check_data_files():
    print("✅ All required data files already exist! Skipping download.")
else:
    print("\n🚀 Starting data acquisition process...")

    # Try automatic download first
    download_success = download_oulad_dataset()

    if not download_success:
        print("\n🔄 Automatic download failed. Trying manual upload...")
        upload_success = manual_upload()

        if not upload_success:
            print("\n❌ Both automatic and manual methods failed.")
            print("📋 Manual steps:")
            print("1. Go to: https://www.kaggle.com/datasets/anlgrbz/student-demographics-online-education-dataoulad")
            print("2. Download the dataset")
            print("3. Extract and upload the 3 CSV files using the file browser on the left")
            print("4. Place them in the data/raw/ folder")

    # Final check
    print("\n🔍 Final verification...")
    if check_data_files():
        print("🎉 All data files are ready!")

        # Show file info
        required_files = ['studentInfo.csv', 'studentVle.csv', 'studentAssessment.csv']
        print("\n📊 Dataset info:")
        total_size = 0
        for filename in required_files:
            filepath = f'data/raw/{filename}'
            if os.path.exists(filepath):
                size_mb = os.path.getsize(filepath) / (1024 * 1024)
                total_size += size_mb
                print(f"   📄 {filename}: {size_mb:.1f} MB")

        print(f"   💾 Total size: {total_size:.1f} MB")
        print("\n✅ Ready to proceed to next cell!")
    else:
        print("❌ Data files still missing. Please upload manually.")


Cell 3: Load and Process Data

In [None]:
def load_and_process_oulad():
    # Load CSV files
    print("Loading OULAD data...")
    student_info = pd.read_csv('data/raw/studentInfo.csv')
    student_vle = pd.read_csv('data/raw/studentVle.csv')
    student_assessment = pd.read_csv('data/raw/studentAssessment.csv')

    print(f"Loaded: Info={student_info.shape}, VLE={student_vle.shape}, Assessment={student_assessment.shape}")
    print(f"VLE columns: {list(student_vle.columns)}")
    print(f"Assessment columns: {list(student_assessment.columns)}")

    # Handle date columns flexibly
    vle_date_col = 'date' if 'date' in student_vle.columns else student_vle.columns[1]
    student_vle['week'] = student_vle[vle_date_col] // 7

    # Handle assessment dates
    if 'date_submitted' in student_assessment.columns:
        student_assessment['week'] = student_assessment['date_submitted'] // 7
    elif 'date' in student_assessment.columns:
        student_assessment['week'] = student_assessment['date'] // 7
    else:
        student_assessment['week'] = 0
        print("Warning: No date column found in assessments")

    # Handle click columns
    click_col = 'sum_click' if 'sum_click' in student_vle.columns else 'clicks'

    # Aggregate clicks per week
    clicks_df = student_vle.groupby(['id_student', 'week'])[click_col].sum().reset_index()
    clicks_df.columns = ['id_student', 'week', 'clicks']

    # Handle assessment scores
    if 'score' in student_assessment.columns:
        # Convert text scores to numeric if needed
        if student_assessment['score'].dtype == 'object':
            score_map = {'Pass': 70, 'Fail': 30, 'Distinction': 85, 'Withdrawn': 0}
            student_assessment['score'] = student_assessment['score'].map(score_map).fillna(0)

        submit_df = student_assessment.groupby(['id_student', 'week'])['score'].agg(['count', 'mean']).reset_index()
        submit_df.columns = ['id_student', 'week', 'submit_cnt', 'avg_score']
    else:
        submit_df = pd.DataFrame(columns=['id_student', 'week', 'submit_cnt', 'avg_score'])

    # Create complete time grid for all students
    all_students = student_info['id_student'].unique()
    all_weeks = []
    for student in all_students:
        for week in range(30):  # 30 weeks max
            all_weeks.append({'id_student': student, 'week': week})

    df = pd.DataFrame(all_weeks)

    # Merge features
    df = df.merge(clicks_df, on=['id_student', 'week'], how='left')
    if not submit_df.empty:
        df = df.merge(submit_df, on=['id_student', 'week'], how='left')
    else:
        df['submit_cnt'] = 0
        df['avg_score'] = 0

    # Fill missing values
    df.fillna(0, inplace=True)
    df = df.sort_values(['id_student', 'week']).reset_index(drop=True)

    # Create derived features
    df['has_submit'] = (df['submit_cnt'] > 0).astype(int)
    df['avg_score_sofar'] = df.groupby('id_student')['avg_score'].expanding().mean().values
    df['clicks_diff1'] = df.groupby('id_student')['clicks'].diff().fillna(0)

    print(f"Created weekly features: {df.shape}")
    print(f"Average clicks per week: {df['clicks'].mean():.1f}")

    return df

weekly_df = load_and_process_oulad()


Cell 4: Create Sequences

In [None]:
def create_sequences(df, input_weeks=4, output_weeks=2):
    """Create input-output sequences"""
    feature_cols = ['clicks', 'has_submit', 'avg_score_sofar', 'clicks_diff1']

    X_list = []
    y_list = []
    student_list = []

    print("Creating sequences...")
    for student_id, group in tqdm(df.groupby('id_student')):
        group = group.sort_values('week').reset_index(drop=True)

        if len(group) < input_weeks + output_weeks:
            continue

        for i in range(len(group) - input_weeks - output_weeks + 1):
            # Input: 4 weeks of 4 features
            X_window = group.iloc[i:i+input_weeks][feature_cols].values
            # Output: 2 weeks of clicks only
            y_window = group.iloc[i+input_weeks:i+input_weeks+output_weeks][['clicks']].values

            X_list.append(X_window)
            y_list.append(y_window)
            student_list.append(student_id)

    X = np.array(X_list, dtype=np.float32)
    y = np.array(y_list, dtype=np.float32)
    student_ids = np.array(student_list)

    print(f"Created {len(X)} sequences")
    print(f"Input shape: {X.shape}")
    print(f"Output shape: {y.shape}")

    return X, y, student_ids

def split_and_normalize(X, y, student_ids):
    """Split by students and normalize"""
    print("Splitting data by student ID...")

    # Split by student to avoid data leakage
    unique_students = np.unique(student_ids)
    train_students, test_students = train_test_split(unique_students, test_size=0.2, random_state=42)
    train_students, val_students = train_test_split(train_students, test_size=0.1, random_state=42)

    train_mask = np.isin(student_ids, train_students)
    val_mask = np.isin(student_ids, val_students)
    test_mask = np.isin(student_ids, test_students)

    X_train, y_train = X[train_mask], y[train_mask]
    X_val, y_val = X[val_mask], y[val_mask]
    X_test, y_test = X[test_mask], y[test_mask]

    print(f"Train: {len(X_train)}, Val: {len(X_val)}, Test: {len(X_test)}")

    # Normalize using training statistics only
    X_mean = X_train.mean(axis=(0, 1), keepdims=True)
    X_std = X_train.std(axis=(0, 1), keepdims=True) + 1e-8
    y_mean = y_train.mean()
    y_std = y_train.std() + 1e-8

    X_train = (X_train - X_mean) / X_std
    X_val = (X_val - X_mean) / X_std
    X_test = (X_test - X_mean) / X_std

    y_train = (y_train - y_mean) / y_std
    y_val = (y_val - y_mean) / y_std
    y_test = (y_test - y_mean) / y_std

    print("Data normalized successfully")

    return {
        'X_train': X_train, 'y_train': y_train,
        'X_val': X_val, 'y_val': y_val,
        'X_test': X_test, 'y_test': y_test,
        'norm_stats': {'y_mean': y_mean, 'y_std': y_std}
    }

# Create sequences and split data
X, y, student_ids = create_sequences(weekly_df)
data = split_and_normalize(X, y, student_ids)


Cell 5: Define Models

In [None]:
class LearningBehaviorDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.FloatTensor(X)
        self.y = torch.FloatTensor(y)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

class Seq2SeqLSTM(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.output_size = output_size

        self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.decoder = nn.LSTM(output_size, hidden_size, batch_first=True)
        self.output_proj = nn.Linear(hidden_size, output_size)

    def forward(self, x, tgt_len):
        batch_size = x.size(0)

        # Encode
        _, (hidden, cell) = self.encoder(x)

        # Decode
        decoder_input = torch.zeros(batch_size, 1, self.output_size, device=x.device)
        outputs = []

        for t in range(tgt_len):
            decoder_output, (hidden, cell) = self.decoder(decoder_input, (hidden, cell))
            output = self.output_proj(decoder_output)
            outputs.append(output)
            decoder_input = output

        return torch.cat(outputs, dim=1)

class Seq2SeqVAE(nn.Module):
    def __init__(self, input_size, hidden_size, latent_dim, output_size):
        super().__init__()
        self.hidden_size = hidden_size
        self.latent_dim = latent_dim
        self.output_size = output_size

        self.encoder = nn.LSTM(input_size, hidden_size, batch_first=True)
        self.mu_proj = nn.Linear(hidden_size, latent_dim)
        self.logvar_proj = nn.Linear(hidden_size, latent_dim)

        self.decoder = nn.LSTM(output_size + latent_dim, hidden_size, batch_first=True)
        self.output_proj = nn.Linear(hidden_size, output_size)

    def encode(self, x):
        _, (hidden, _) = self.encoder(x)
        mu = self.mu_proj(hidden[-1])
        logvar = self.logvar_proj(hidden[-1])
        return mu, logvar

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def decode(self, z, tgt_len):
        batch_size = z.size(0)
        decoder_input = torch.zeros(batch_size, 1, self.output_size, device=z.device)
        z_expanded = z.unsqueeze(1)

        outputs = []
        hidden = None

        for t in range(tgt_len):
            decoder_input_with_z = torch.cat([decoder_input, z_expanded], dim=-1)
            decoder_output, hidden = self.decoder(decoder_input_with_z, hidden)
            output = self.output_proj(decoder_output)
            outputs.append(output)
            decoder_input = output

        return torch.cat(outputs, dim=1)

    def forward(self, x, tgt_len):
        mu, logvar = self.encode(x)
        z = self.reparameterize(mu, logvar)
        recon = self.decode(z, tgt_len)
        return recon, mu, logvar

print("Models defined successfully!")


Cell 6: Train Models

In [None]:
# Create data loaders
train_dataset = LearningBehaviorDataset(data['X_train'], data['y_train'])
val_dataset = LearningBehaviorDataset(data['X_val'], data['y_val'])
test_dataset = LearningBehaviorDataset(data['X_test'], data['y_test'])

train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)

print(f"Data loaders ready: Train={len(train_loader)}, Val={len(val_loader)}, Test={len(test_loader)}")

# Train LSTM
print("\nTraining LSTM...")
lstm_model = Seq2SeqLSTM(input_size=4, hidden_size=64, output_size=1).to(device)
optimizer = optim.Adam(lstm_model.parameters(), lr=1e-3)

for epoch in range(20):
    lstm_model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = lstm_model(X_batch, 2)  # 2 weeks output
        loss = F.mse_loss(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 5 == 0:
        print(f"LSTM Epoch {epoch+1}/20: Loss = {total_loss/len(train_loader):.6f}")

print("LSTM training completed!")

# Train VAE
print("\nTraining VAE...")
vae_model = Seq2SeqVAE(input_size=4, hidden_size=64, latent_dim=16, output_size=1).to(device)
optimizer = optim.Adam(vae_model.parameters(), lr=1e-3)

for epoch in range(20):
    vae_model.train()
    total_loss = 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        recon, mu, logvar = vae_model(X_batch, 2)

        # VAE loss = MSE + KL divergence
        mse_loss = F.mse_loss(recon, y_batch)
        kl_loss = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp()) / y_batch.numel()
        loss = mse_loss + kl_loss

        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    if (epoch + 1) % 5 == 0:
        print(f"VAE Epoch {epoch+1}/20: Loss = {total_loss/len(train_loader):.6f}")

print("VAE training completed!")


Cell 7: Evaluate and Compare

In [None]:
"""Cell 7: Evaluate and Compare with Detailed Analysis"""

def evaluate_models_detailed():
    lstm_model.eval()
    vae_model.eval()

    lstm_predictions = []
    vae_predictions = []
    vae_samples = []
    targets = []

    print("Evaluating models...")

    with torch.no_grad():
        for X_batch, y_batch in test_loader:
            X_batch, y_batch = X_batch.to(device), y_batch.to(device)

            lstm_pred = lstm_model(X_batch, 2)
            lstm_predictions.append(lstm_pred.cpu().numpy())

            vae_pred, _, _ = vae_model(X_batch, 2)
            vae_predictions.append(vae_pred.cpu().numpy())

            mu, logvar = vae_model.encode(X_batch)
            batch_samples = []
            for _ in range(20):
                z = vae_model.reparameterize(mu, logvar)
                sample = vae_model.decode(z, 2)
                batch_samples.append(sample.cpu().numpy())
            vae_samples.append(np.array(batch_samples))

            targets.append(y_batch.cpu().numpy())

    lstm_preds = np.concatenate(lstm_predictions)
    vae_preds = np.concatenate(vae_predictions)
    vae_samples = np.concatenate(vae_samples, axis=1)
    all_targets = np.concatenate(targets)

    # Calculate per-sample MSE
    lstm_mse_per_sample = np.mean((lstm_preds - all_targets) ** 2, axis=(1, 2))
    vae_mse_per_sample = np.mean((vae_preds - all_targets) ** 2, axis=(1, 2))

    # VAE Best-of-N per sample
    vae_best_mse_per_sample = []
    for i in range(len(all_targets)):
        sample_mses = []
        for j in range(20):
            mse = np.mean((vae_samples[j, i] - all_targets[i]) ** 2)
            sample_mses.append(mse)
        vae_best_mse_per_sample.append(min(sample_mses))
    vae_best_mse_per_sample = np.array(vae_best_mse_per_sample)

    # Overall metrics
    lstm_mse = lstm_mse_per_sample.mean()
    vae_mse = vae_mse_per_sample.mean()
    best_of_n_mse = vae_best_mse_per_sample.mean()

    diversity = np.std(vae_samples, axis=0).mean()

    lower = np.percentile(vae_samples, 2.5, axis=0)
    upper = np.percentile(vae_samples, 97.5, axis=0)
    coverage = np.mean((all_targets >= lower) & (all_targets <= upper))

    # ===== NEW: Top-5 Regressed Cases (VAE best >> LSTM) =====
    improvement = lstm_mse_per_sample - vae_best_mse_per_sample
    top5_indices = np.argsort(-improvement)[:5]

    print("\n" + "="*80)
    print("=== Top-5 Regressed (VAE best >> LSTM) ===")
    print("="*80)

    top5_df = pd.DataFrame({
        'idx': top5_indices,
        'LSTM_MSE': lstm_mse_per_sample[top5_indices],
        'VAE_best_MSE': vae_best_mse_per_sample[top5_indices],
        'Δ(LSTM-VAE)': improvement[top5_indices],
        'y_true': [all_targets[i].flatten().tolist() for i in top5_indices],
        'y_LSTM': [lstm_preds[i].flatten().tolist() for i in top5_indices],
        'y_VAE_best': [vae_samples[:, i].mean(axis=0).flatten().tolist() for i in top5_indices],
        'Diversity_std': [np.std(vae_samples[:, i]) for i in top5_indices]
    })

    print(top5_df.to_string(index=True))

    # ===== NEW: Win-rate by Improvement Bucket =====
    print("\n" + "="*80)
    print("=== Win-rate by Improvement bucket (Δ = LSTM MSE - VAE best MSE) ===")
    print("="*80)

    buckets = [
        ('VAE差>1000', lambda x: x < -1000),
        ('VAE差200~1000', lambda x: (x >= -1000) & (x < -200)),
        ('VAE差50~200', lambda x: (x >= -200) & (x < -50)),
        ('VAE差10~50', lambda x: (x >= -50) & (x < -10)),
        ('VAE略差<10', lambda x: (x >= -10) & (x < 0)),
        ('平手±10', lambda x: (x >= 0) & (x < 10)),
        ('VAE略勝10~50', lambda x: (x >= 10) & (x < 50)),
        ('VAE勝50~200', lambda x: (x >= 50) & (x < 200)),
        ('VAE大勝200~1000', lambda x: (x >= 200) & (x < 1000)),
        ('VAE>>大勝>1000', lambda x: x >= 1000)
    ]

    bucket_stats = []
    for bucket_name, condition in buckets:
        mask = condition(improvement)
        count = np.sum(mask)
        ratio = count / len(improvement) if len(improvement) > 0 else 0
        bucket_stats.append({
            'Improvement bucket': bucket_name,
            'count': count,
            'ratio': f"{ratio:.4f}"
        })

    bucket_df = pd.DataFrame(bucket_stats)
    print(bucket_df.to_string(index=True))

    # ===== NEW: Final =====
    print("\n" + "="*80)
    print("=========== 評估結果 (原始尺度) ===========")
    print("="*80)

    # Denormalize MSE to original scale
    y_std = data['norm_stats']['y_std']
    lstm_mse_original = lstm_mse * (y_std ** 2)
    vae_mse_original = vae_mse * (y_std ** 2)
    best_of_n_original = best_of_n_mse * (y_std ** 2)

    # Calculate tau (median MSE for LSTM)
    tau = np.median(lstm_mse_per_sample) * (y_std ** 2)
    n_minus_28 = len(lstm_mse_per_sample) - 28

    print(f"LSTM    MSE (整體)        : {lstm_mse_original:.4f}")
    print(f"VAE     Best-of-N MSE     : {vae_mse_original:.4f}     (N=20)")
    print(f"VAE     Diversity (std)   : {diversity:.4f}")
    print(f"VAE     Coverage (比例)    : {coverage:.4f}     (門檻 tau = LSTM 每列 MSE 中位數 = {tau:.4f})")
    print("="*80)

    return {
        'lstm_mse': lstm_mse,
        'vae_mse': vae_mse,
        'vae_best_of_n_mse': best_of_n_mse,
        'vae_diversity': diversity,
        'vae_coverage': coverage,
        'top5_df': top5_df,
        'bucket_df': bucket_df
    }

results = evaluate_models_detailed()

Cell 8: Create Visualizations

In [None]:
# Create comparison plots
fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# MSE Comparison
axes[0,0].bar(['LSTM', 'VAE'], [results['lstm_mse'], results['vae_mse']], color=['blue', 'red'])
axes[0,0].set_title('MSE Comparison')
axes[0,0].set_ylabel('MSE')

# VAE Best-of-N vs Mean
axes[0,1].bar(['VAE Mean', 'VAE Best-of-N'], [results['vae_mse'], results['vae_best_of_n_mse']], color=['red', 'green'])
axes[0,1].set_title('VAE: Mean vs Best-of-N')
axes[0,1].set_ylabel('MSE')

# VAE Diversity
axes[1,0].bar(['VAE'], [results['vae_diversity']], color='orange')
axes[1,0].set_title('VAE Diversity')
axes[1,0].set_ylabel('Standard Deviation')

# VAE Coverage
axes[1,1].bar(['VAE'], [results['vae_coverage']], color='purple')
axes[1,1].set_title('VAE Coverage (95% CI)')
axes[1,1].set_ylabel('Proportion')

plt.tight_layout()
plt.show()

# Summary table
summary_data = {
    'Metric': ['MSE', 'Best-of-N MSE', 'Diversity', 'Coverage'],
    'LSTM': [f"{results['lstm_mse']:.6f}", 'N/A', '0 (deterministic)', 'N/A'],
    'VAE': [
        f"{results['vae_mse']:.6f}",
        f"{results['vae_best_of_n_mse']:.6f}",
        f"{results['vae_diversity']:.6f}",
        f"{results['vae_coverage']:.3f}"
    ]
}

summary_df = pd.DataFrame(summary_data)
print("\nMODEL COMPARISON TABLE:")
print(summary_df.to_string(index=False))

print("\n🎉 Experiment completed successfully!")
print("📊 Use the results above for your assignment report.")
