In [None]:
# 🔧 Environment Setup
import os
import sys
from pathlib import Path

# Check if running in Colab
try:
    import google.colab
    IN_COLAB = True
    print("🔍 Running in Google Colab")
    
    # Mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # Project paths
    DRIVE_PROJECT_PATH = "/content/drive/MyDrive/abr_project"
    LOCAL_PROJECT_PATH = "/content/abr_project"
    
except ImportError:
    IN_COLAB = False
    print("🔍 Running locally")
    LOCAL_PROJECT_PATH = os.getcwd()
    DRIVE_PROJECT_PATH = None

print(f"✅ Environment detected: {'Colab' if IN_COLAB else 'Local'}")


In [None]:
# 📁 Project Setup - Copy from Drive to Local
if IN_COLAB:
    if os.path.exists(f"{DRIVE_PROJECT_PATH}/train.py"):
        print("📁 Project found in Google Drive, copying to local...")
        !cp -r "{DRIVE_PROJECT_PATH}" "{LOCAL_PROJECT_PATH}"
        print(f"✅ Project copied to: {LOCAL_PROJECT_PATH}")
    else:
        print("❌ Project not found in Google Drive!")
        print(f"💡 Please upload your project to: {DRIVE_PROJECT_PATH}")
        print("📋 Required files: train.py, evaluate.py, src/, configs/, data/")
        raise FileNotFoundError("Project files not found in Google Drive")
    
    # Set working directory
    os.chdir(LOCAL_PROJECT_PATH)
    sys.path.insert(0, LOCAL_PROJECT_PATH)
    
else:
    sys.path.insert(0, LOCAL_PROJECT_PATH)

print(f"📂 Working directory: {os.getcwd()}")
print("✅ Project setup complete!")


In [None]:
# 📦 Install Dependencies
if IN_COLAB:
    print("📦 Installing dependencies for Colab...")
    !pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
    !pip install PyYAML scipy scikit-learn matplotlib seaborn tqdm tensorboard openpyxl
else:
    print("📦 Installing from requirements.txt...")
    if os.path.exists("requirements.txt"):
        !pip install -r requirements.txt

print("✅ Dependencies installed!")


In [None]:
# 🚀 Import Libraries and Setup
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yaml
import json
import subprocess
import time
import re
from datetime import datetime
from IPython.display import clear_output
import warnings
warnings.filterwarnings('ignore')

# GPU setup
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"🚀 Using device: {device}")
if torch.cuda.is_available():
    print(f"🎮 GPU: {torch.cuda.get_device_name(0)}")
    print(f"📊 GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    torch.cuda.empty_cache()

print("✅ Libraries imported successfully!")


In [None]:
# 📊 Training Monitor Class
class TrainingMonitor:
    def __init__(self):
        self.metrics = {'train_loss': [], 'val_loss': [], 'epochs': []}
        self.current_epoch = 0
        self.best_val_loss = float('inf')
        self.start_time = time.time()
        
    def parse_log_line(self, line):
        try:
            if 'Epoch' in line and '/' in line:
                epoch_match = re.search(r'Epoch (\d+)', line)
                if epoch_match:
                    self.current_epoch = int(epoch_match.group(1))
                    
            if 'Train Loss:' in line or 'Training Loss:' in line:
                loss_match = re.search(r'Loss: ([\d.]+)', line)
                if loss_match:
                    self.metrics['train_loss'].append(float(loss_match.group(1)))
                    
            if 'Val Loss:' in line or 'Validation Loss:' in line:
                loss_match = re.search(r'Loss: ([\d.]+)', line)
                if loss_match:
                    val_loss = float(loss_match.group(1))
                    self.metrics['val_loss'].append(val_loss)
                    if val_loss < self.best_val_loss:
                        self.best_val_loss = val_loss
        except:
            pass
    
    def plot_progress(self):
        if not self.metrics['train_loss']:
            return
            
        plt.figure(figsize=(12, 4))
        
        plt.subplot(1, 2, 1)
        epochs = range(1, len(self.metrics['train_loss']) + 1)
        plt.plot(epochs, self.metrics['train_loss'], 'b-', label='Train Loss', linewidth=2)
        if self.metrics['val_loss']:
            plt.plot(epochs, self.metrics['val_loss'], 'r-', label='Val Loss', linewidth=2)
        plt.title(f'Training Progress - Epoch {self.current_epoch}')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.legend()
        plt.grid(True, alpha=0.3)
        
        plt.subplot(1, 2, 2)
        elapsed = time.time() - self.start_time
        hours, remainder = divmod(elapsed, 3600)
        minutes, seconds = divmod(remainder, 60)
        
        stats_text = f"""Training Statistics:
        
Current Epoch: {self.current_epoch}
Best Val Loss: {self.best_val_loss:.4f}
Training Time: {int(hours):02d}:{int(minutes):02d}:{int(seconds):02d}
        """
        plt.text(0.1, 0.5, stats_text, fontsize=12, verticalalignment='center',
                bbox=dict(boxstyle="round,pad=0.3", facecolor="lightblue", alpha=0.7))
        plt.axis('off')
        
        plt.tight_layout()
        plt.show()

print("✅ Training monitor ready!")


In [None]:
# 🎯 Training Function using existing train.py
def run_training(model_type='original', epochs=50, batch_size=16, experiment_name=None):
    """Run training using the existing train.py script"""
    
    if experiment_name is None:
        experiment_name = f"{model_type}_cvae_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    
    output_dir = f"outputs_{experiment_name}"
    
    # Create a simple config for the training
    config = {
        'data': {'sequence_length': 200, 'train_split': 0.7, 'val_split': 0.15, 'test_split': 0.15},
        'model': {'type': model_type, 'static_dim': 4, 'latent_dim': 128, 'hidden_dim': 256},
        'training': {'epochs': epochs, 'batch_size': batch_size, 'output_dir': output_dir}
    }
    
    # Save config
    os.makedirs(output_dir, exist_ok=True)
    config_path = f"{output_dir}/config.yaml"
    with open(config_path, 'w') as f:
        yaml.dump(config, f)
    
    # Prepare command
    cmd = [
        sys.executable, "train.py",
        "--config", config_path,
        "--output-dir", output_dir,
        "--device", str(device),
        "--model", model_type,
        "--epochs", str(epochs),
        "--batch-size", str(batch_size)
    ]
    
    print(f"🚀 Starting {model_type} CVAE training...")
    print(f"📋 Command: {' '.join(cmd)}")
    
    # Initialize monitor
    monitor = TrainingMonitor()
    
    try:
        # Run training
        process = subprocess.Popen(
            cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
            universal_newlines=True, bufsize=1
        )
        
        line_count = 0
        for line in iter(process.stdout.readline, ''):
            line = line.strip()
            if line:
                line_count += 1
                
                # Print important lines
                if any(keyword in line for keyword in [
                    'Epoch', 'Loss:', 'Starting', 'Best', 'Saved', 'completed', 'ERROR'
                ]):
                    print(line)
                
                # Parse metrics
                monitor.parse_log_line(line)
                
                # Update plot every 50 lines
                if line_count % 50 == 0 and monitor.metrics['train_loss']:
                    clear_output(wait=True)
                    print(f"🔄 Training in progress... (Line {line_count})")
                    monitor.plot_progress()
        
        # Final results
        return_code = process.wait()
        
        if return_code == 0:
            print("✅ Training completed successfully!")
            
            # Copy results to Drive if in Colab
            if IN_COLAB and DRIVE_PROJECT_PATH:
                drive_results_dir = f"{DRIVE_PROJECT_PATH}/results/{experiment_name}"
                os.makedirs(f"{DRIVE_PROJECT_PATH}/results", exist_ok=True)
                !cp -r "{output_dir}" "{drive_results_dir}"
                print(f"💾 Results backed up to: {drive_results_dir}")
            
            return output_dir
        else:
            print(f"❌ Training failed with return code: {return_code}")
            return None
            
    except Exception as e:
        print(f"❌ Training error: {e}")
        return None

print("✅ Training function ready!")


In [None]:
# 🔬 Evaluation Function using existing evaluate.py
def run_evaluation(model_path=None, output_dir=None):
    """Run evaluation using the existing evaluate.py script"""
    
    # Find model if not specified
    if model_path is None:
        # Look for the most recent output directory
        output_dirs = [d for d in os.listdir('.') if d.startswith('outputs_')]
        if not output_dirs:
            print("❌ No training outputs found. Please run training first.")
            return None
        
        latest_output_dir = max(output_dirs, key=lambda d: os.path.getmtime(d))
        
        # Look for best checkpoint
        best_checkpoint = os.path.join(latest_output_dir, "best_checkpoint.pth")
        if os.path.exists(best_checkpoint):
            model_path = best_checkpoint
        else:
            # Look for any checkpoint
            checkpoints = [f for f in os.listdir(latest_output_dir) if f.endswith('.pth')]
            if checkpoints:
                model_path = os.path.join(latest_output_dir, checkpoints[-1])
            else:
                print(f"❌ No model checkpoints found in {latest_output_dir}")
                return None
    
    if output_dir is None:
        output_dir = f"evaluation_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
    
    # Prepare evaluation command
    cmd = [
        sys.executable, "evaluate.py",
        "--model", model_path,
        "--output-dir", output_dir,
        "--comprehensive"
    ]
    
    print(f"🔬 Starting evaluation...")
    print(f"📋 Model: {model_path}")
    print(f"📋 Command: {' '.join(cmd)}")
    
    try:
        # Run evaluation
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=1800)  # 30 min timeout
        
        if result.stdout:
            print("📋 Evaluation Output:")
            print(result.stdout)
        
        if result.stderr:
            print("⚠️ Evaluation Warnings:")
            print(result.stderr)
        
        if result.returncode == 0:
            print("✅ Evaluation completed successfully!")
            
            # Copy results to Drive if in Colab
            if IN_COLAB and DRIVE_PROJECT_PATH:
                drive_eval_dir = f"{DRIVE_PROJECT_PATH}/evaluations/{output_dir}"
                os.makedirs(f"{DRIVE_PROJECT_PATH}/evaluations", exist_ok=True)
                !cp -r "{output_dir}" "{drive_eval_dir}"
                print(f"💾 Evaluation results backed up to: {drive_eval_dir}")
            
            return output_dir
        else:
            print(f"❌ Evaluation failed with return code: {result.returncode}")
            return None
            
    except subprocess.TimeoutExpired:
        print("❌ Evaluation timed out after 30 minutes")
        return None
    except Exception as e:
        print(f"❌ Evaluation error: {e}")
        return None

print("✅ Evaluation function ready!")


In [None]:
# 🚀 Quick Start Functions
def quick_train_original(epochs=50):
    """Quick training with original CVAE model"""
    return run_training(model_type='original', epochs=epochs, experiment_name='original_quick')

def quick_train_advanced(epochs=100):
    """Quick training with advanced CVAE model"""
    return run_training(model_type='advanced', epochs=epochs, experiment_name='advanced_quick')

def quick_evaluate():
    """Quick evaluation of the most recent model"""
    return run_evaluation()

def compare_models():
    """Train and compare both models"""
    print("🔬 Starting model comparison...")
    
    # Train original model
    print("\n🚀 Training Original CVAE (30 epochs)...")
    original_output = run_training('original', epochs=30, experiment_name='comparison_original')
    
    if original_output:
        print("\n🔬 Evaluating Original CVAE...")
        run_evaluation(f"{original_output}/best_checkpoint.pth", "eval_original")
    
    # Train advanced model  
    print("\n🚀 Training Advanced CVAE (50 epochs)...")
    advanced_output = run_training('advanced', epochs=50, experiment_name='comparison_advanced')
    
    if advanced_output:
        print("\n🔬 Evaluating Advanced CVAE...")
        run_evaluation(f"{advanced_output}/best_checkpoint.pth", "eval_advanced")
    
    print("\n📊 Model comparison complete!")
    if IN_COLAB:
        print(f"📁 Check your Google Drive at: {DRIVE_PROJECT_PATH}/results/")
    
    return original_output, advanced_output

def list_results():
    """List all training and evaluation results"""
    print("📁 Local Results:")
    
    # Training outputs
    outputs = [d for d in os.listdir('.') if d.startswith('outputs_')]
    if outputs:
        print("🚀 Training Results:")
        for output in sorted(outputs):
            print(f"  - {output}")
    
    # Evaluation results
    evals = [d for d in os.listdir('.') if d.startswith('evaluation_')]
    if evals:
        print("🔬 Evaluation Results:")
        for eval_dir in sorted(evals):
            print(f"  - {eval_dir}")
    
    # Drive results (if in Colab)
    if IN_COLAB and DRIVE_PROJECT_PATH:
        print(f"\n📁 Google Drive Results: {DRIVE_PROJECT_PATH}/")
        if os.path.exists(f"{DRIVE_PROJECT_PATH}/results"):
            drive_results = os.listdir(f"{DRIVE_PROJECT_PATH}/results")
            if drive_results:
                print("🚀 Drive Training Results:")
                for result in sorted(drive_results):
                    print(f"  - {result}")
        
        if os.path.exists(f"{DRIVE_PROJECT_PATH}/evaluations"):
            drive_evals = os.listdir(f"{DRIVE_PROJECT_PATH}/evaluations")
            if drive_evals:
                print("🔬 Drive Evaluation Results:")
                for eval_dir in sorted(drive_evals):
                    print(f"  - {eval_dir}")

print("✅ Quick start functions ready!")
print("\n🚀 Available functions:")
print("  - quick_train_original(epochs=50)")
print("  - quick_train_advanced(epochs=100)")
print("  - quick_evaluate()")
print("  - compare_models()")
print("  - list_results()")
print("\n💡 Example usage:")
print("  quick_train_original()  # Train original model")
print("  quick_evaluate()        # Evaluate latest model")


In [None]:
# 🚀 START TRAINING - Run this cell to begin!

# Choose one of these options:

# Option 1: Train original CVAE model (recommended for first run)
quick_train_original(epochs=30)

# Option 2: Train advanced CVAE model (more complex, takes longer)
# quick_train_advanced(epochs=50)

# Option 3: Compare both models (takes longest but most comprehensive)
# compare_models()


In [None]:
# 🔬 EVALUATE MODEL - Run this after training completes

# Evaluate the most recent trained model
quick_evaluate()
