# AlgoSpace MARL Training Master Notebook for Google Colab Pro

This notebook implements the complete Multi-Agent Reinforcement Learning (MARL) training pipeline optimized for Google Colab Pro's 24-hour GPU sessions.

## Key Features:
- Automatic GPU setup and verification
- Google Drive integration for data and checkpoints
- Session management with automatic recovery
- Memory optimization for long training runs
- Weights & Biases integration for monitoring
- Production model export

## Requirements:
- Google Colab Pro subscription
- Google Drive with sufficient storage (>50GB)
- W&B account (optional but recommended)

## 1. Environment Setup and Dependencies

In [None]:
# Check if running in Colab
try:
    import google.colab
    IN_COLAB = True
except ImportError:
    IN_COLAB = False
    print("Warning: Not running in Google Colab. Some features may not work.")

# GPU verification
import torch
if torch.cuda.is_available():
    gpu_name = torch.cuda.get_device_name(0)
    gpu_memory = torch.cuda.get_device_properties(0).total_memory / 1e9
    print(f"✅ GPU Available: {gpu_name}")
    print(f"💾 GPU Memory: {gpu_memory:.2f} GB")
    device = torch.device('cuda')
else:
    print("❌ No GPU available. Training will be slow.")
    device = torch.device('cpu')

In [None]:
# Install required packages
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
!pip install -q numpy pandas matplotlib seaborn
!pip install -q h5py pyyaml tqdm
!pip install -q wandb tensorboard mlflow
!pip install -q optuna scikit-learn
!pip install -q gputil psutil

print("✅ Dependencies installed")

In [None]:
# Mount Google Drive
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    
    # Set up project paths
    DRIVE_BASE = "/content/drive/MyDrive/AlgoSpace"
    !mkdir -p {DRIVE_BASE}/{data,checkpoints,models,results,logs}
    
    print(f"✅ Google Drive mounted at {DRIVE_BASE}")
else:
    DRIVE_BASE = "./drive_simulation"
    import os
    os.makedirs(DRIVE_BASE, exist_ok=True)

In [None]:
# Clone AlgoSpace repository
import os
import sys

REPO_PATH = "/content/AlgoSpace"
if not os.path.exists(REPO_PATH):
    !git clone https://github.com/QuantNova/AlgoSpace.git {REPO_PATH}
    print("✅ Repository cloned")
else:
    # Pull latest changes
    !cd {REPO_PATH} && git pull
    print("✅ Repository updated")

# Add to Python path
sys.path.insert(0, REPO_PATH)
sys.path.insert(0, os.path.join(REPO_PATH, 'src'))
sys.path.insert(0, os.path.join(REPO_PATH, 'notebooks'))

## 2. Load Colab Utilities

In [None]:
# Import Colab utilities
from notebooks.utils.colab_setup import ColabSetup, SessionMonitor, setup_colab_training
from notebooks.utils.drive_manager import DriveManager, DataStreamer
from notebooks.utils.checkpoint_manager import CheckpointManager, CheckpointScheduler

# Initialize setup
setup = ColabSetup("AlgoSpace")
drive_manager = DriveManager(DRIVE_BASE)
checkpoint_manager = CheckpointManager(drive_manager)
session_monitor = SessionMonitor(max_runtime_hours=23.5)  # 30 min buffer

print("✅ Utilities loaded")
print("\n📊 System Information:")
system_info = setup.get_system_info()
for key, value in system_info.items():
    if isinstance(value, dict):
        print(f"\n{key}:")
        for k, v in value.items():
            print(f"  {k}: {v}")
    else:
        print(f"{key}: {value}")

In [None]:
# Activate keep-alive to prevent session timeout
if IN_COLAB:
    setup.keep_alive()
    print("✅ Keep-alive activated")

## 3. Load Training Configuration

In [None]:
# Load training configuration
import yaml

config_path = os.path.join(REPO_PATH, 'config/training_config.yaml')
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Adjust for Colab environment
config['training']['checkpoint_frequency'] = 100  # More frequent checkpoints
config['training']['validation_frequency'] = 50
config['training']['batch_size'] = 256  # Adjust based on GPU
config['training']['gradient_accumulation_steps'] = 4
config['training']['mixed_precision'] = True

# Session management
config['colab'] = {
    'auto_save_to_drive': True,
    'resume_from_checkpoint': True,
    'memory_optimization': True,
    'keep_alive_interval': 300,  # 5 minutes
    'checkpoint_on_interrupt': True
}

print("✅ Configuration loaded")
print(f"\n📋 Training Configuration:")
print(f"- Total Episodes: {config['training']['num_episodes']}")
print(f"- Batch Size: {config['training']['batch_size']}")
print(f"- Learning Rate: {config['training']['learning_rate']}")
print(f"- Checkpoint Frequency: {config['training']['checkpoint_frequency']} episodes")

## 4. Setup Experiment Tracking (Optional)

In [None]:
# Setup Weights & Biases (optional but recommended)
USE_WANDB = True  # Set to False if you don't want to use W&B

if USE_WANDB:
    import wandb
    
    # Login to W&B (you'll need to enter your API key)
    wandb.login()
    
    # Initialize W&B run
    run = wandb.init(
        project="algospace-marl-training",
        config=config,
        name=f"marl_training_{session_monitor.start_time.strftime('%Y%m%d_%H%M%S')}",
        resume="allow",
        id=checkpoint_manager.get_resume_info().get('wandb_id', None)
    )
    
    # Log system info
    wandb.config.update(system_info)
    
    print(f"✅ W&B initialized: {run.url}")
else:
    run = None
    print("ℹ️ W&B disabled")

## 5. Data Loading and Preparation

In [None]:
# Check available data
available_data = drive_manager.list_available('data')
print("📂 Available datasets:")
for dataset in available_data.get('data', []):
    info = drive_manager.get_info(dataset, 'data')
    size_mb = info.get('size', 0) / 1e6
    print(f"- {dataset}: {size_mb:.2f} MB")

# If no data, provide upload instructions
if not available_data.get('data'):
    print("\n⚠️ No training data found. Please upload data to:")
    print(f"   {DRIVE_BASE}/data/")
    print("\nExpected format: HDF5 files with market data")

In [None]:
# Load training data
from training.data_prep import MarketDataPipeline, DataLoader

# Initialize data pipeline
data_pipeline = MarketDataPipeline(config['data'])

# Load or download data
DATASET_NAME = "market_data_2023"  # Change to your dataset name
if DATASET_NAME in available_data.get('data', []):
    # Download from Drive to local temp
    local_data_path = drive_manager.download_data(DATASET_NAME, decompress=True)
    print(f"✅ Data loaded from Drive: {local_data_path}")
else:
    print("❌ Dataset not found. Using sample data...")
    # Create sample data for testing
    local_data_path = "/tmp/sample_data.h5"
    data_pipeline.create_sample_data(local_data_path)

# Create data loader with efficient streaming
data_streamer = DataStreamer(
    local_data_path,
    batch_size=config['training']['batch_size'],
    cache_size=1000
)

print(f"✅ Data loader initialized")
print(f"   Total samples: {len(data_streamer.h5_file['features'])}")
print(f"   Batch size: {data_streamer.batch_size}")
print(f"   Number of batches: {len(data_streamer)}")

## 6. Model Initialization

In [None]:
# Import model components
from agents.regime_detector import RegimeDetector
from agents.structure_analyzer import MarketStructureAnalyzer
from agents.tactical_trader import TacticalTrader
from agents.risk_manager import RiskManager
from coordination.multi_agent_coordinator import MultiAgentCoordinator

# Import training components
from training.marl_trainer import MAPPOTrainer
from training.environment import MultiAgentTradingEnv
from training.rewards import MultiAgentRewardSystem
from training.experience import ExperienceBuffer

print("✅ Model components imported")

In [None]:
# Check if we can resume from checkpoint
resume_info = checkpoint_manager.get_resume_info()

if resume_info['available']:
    print("📂 Checkpoint found!")
    print(f"   Episode: {resume_info['episode']}")
    print(f"   Hours since save: {resume_info.get('hours_since_save', 0):.2f}")
    print(f"   Metrics: {resume_info.get('metrics', {})}")
    
    # Ask user if they want to resume
    if IN_COLAB:
        resume = input("Resume from checkpoint? (y/n): ").lower() == 'y'
    else:
        resume = True  # Auto-resume in non-interactive mode
else:
    print("ℹ️ No checkpoint found. Starting fresh training.")
    resume = False

In [None]:
# Initialize or load models
if resume and resume_info['available']:
    # Load from checkpoint
    print("\n📂 Loading checkpoint...")
    checkpoint = checkpoint_manager.load_latest()
    
    # Restore state
    state = checkpoint['state']
    start_episode = state['episode']
    
    # Initialize agents with saved state
    agents = {}
    for agent_name, agent_state in state['models'].items():
        if agent_name == 'regime_detector':
            agent = RegimeDetector(config['agents']['regime_detector'])
        elif agent_name == 'structure_analyzer':
            agent = MarketStructureAnalyzer(config['agents']['structure_analyzer'])
        elif agent_name == 'tactical_trader':
            agent = TacticalTrader(config['agents']['tactical_trader'])
        elif agent_name == 'risk_manager':
            agent = RiskManager(config['agents']['risk_manager'])
        
        agent.load_state_dict(agent_state)
        agent.to(device)
        agents[agent_name] = agent
    
    # Initialize coordinator
    coordinator = MultiAgentCoordinator(config['coordinator'])
    coordinator.agents = agents
    
    print("✅ Models loaded from checkpoint")
    
else:
    # Initialize fresh models
    print("\n🔨 Initializing new models...")
    start_episode = 0
    
    # Initialize agents
    agents = {
        'regime_detector': RegimeDetector(config['agents']['regime_detector']).to(device),
        'structure_analyzer': MarketStructureAnalyzer(config['agents']['structure_analyzer']).to(device),
        'tactical_trader': TacticalTrader(config['agents']['tactical_trader']).to(device),
        'risk_manager': RiskManager(config['agents']['risk_manager']).to(device)
    }
    
    # Initialize coordinator
    coordinator = MultiAgentCoordinator(config['coordinator'])
    coordinator.agents = agents
    
    print("✅ Models initialized")

# Count parameters
total_params = sum(sum(p.numel() for p in agent.parameters()) for agent in agents.values())
print(f"\n📊 Total parameters: {total_params:,}")
for name, agent in agents.items():
    params = sum(p.numel() for p in agent.parameters())
    print(f"   {name}: {params:,}")

## 7. Training Setup

In [None]:
# Initialize training environment
env = MultiAgentTradingEnv(config['environment'])
reward_system = MultiAgentRewardSystem(config['rewards'])
experience_buffer = ExperienceBuffer(
    capacity=config['training']['buffer_size'],
    prioritized=config['training'].get('prioritized_replay', True)
)

# Initialize MAPPO trainer
trainer = MAPPOTrainer(
    agents=agents,
    coordinator=coordinator,
    env=env,
    config=config['training'],
    device=device
)

# Set up optimizers
if resume and resume_info['available']:
    # Restore optimizer states
    for name, optimizer_state in state.get('optimizers', {}).items():
        if hasattr(trainer, f'{name}_optimizer'):
            getattr(trainer, f'{name}_optimizer').load_state_dict(optimizer_state)

print("✅ Training environment initialized")

In [None]:
# Memory optimization settings
if config['colab']['memory_optimization']:
    # Enable gradient checkpointing
    for agent in agents.values():
        if hasattr(agent, 'enable_gradient_checkpointing'):
            agent.enable_gradient_checkpointing()
    
    # Set memory fraction
    torch.cuda.set_per_process_memory_fraction(0.9)
    
    # Enable mixed precision training
    from torch.cuda.amp import GradScaler
    scaler = GradScaler() if config['training']['mixed_precision'] else None
    
    print("✅ Memory optimization enabled")
    print(f"   Current GPU memory: {setup.check_gpu_memory()}")

## 8. Main Training Loop

In [None]:
# Training helper functions
import time
from datetime import datetime, timedelta
from IPython.display import clear_output
import matplotlib.pyplot as plt

def save_checkpoint(episode, metrics, is_best=False):
    """Save training checkpoint"""
    state = {
        'episode': episode,
        'models': {name: agent.state_dict() for name, agent in agents.items()},
        'optimizers': {name: opt.state_dict() for name, opt in trainer.optimizers.items()},
        'metrics': metrics,
        'config': config,
        'wandb_id': run.id if run else None
    }
    
    checkpoint_manager.save(state, metrics, is_best=is_best)
    print(f"💾 Checkpoint saved (episode {episode})")

def plot_training_progress(history):
    """Plot training metrics"""
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    
    # Plot rewards
    axes[0, 0].plot(history['episode'], history['reward'])
    axes[0, 0].set_title('Episode Reward')
    axes[0, 0].set_xlabel('Episode')
    axes[0, 0].set_ylabel('Reward')
    
    # Plot Sharpe ratio
    axes[0, 1].plot(history['episode'], history['sharpe_ratio'])
    axes[0, 1].set_title('Sharpe Ratio')
    axes[0, 1].set_xlabel('Episode')
    axes[0, 1].set_ylabel('Sharpe')
    
    # Plot win rate
    axes[1, 0].plot(history['episode'], history['win_rate'])
    axes[1, 0].set_title('Win Rate')
    axes[1, 0].set_xlabel('Episode')
    axes[1, 0].set_ylabel('Win Rate (%)')
    
    # Plot drawdown
    axes[1, 1].plot(history['episode'], history['max_drawdown'])
    axes[1, 1].set_title('Maximum Drawdown')
    axes[1, 1].set_xlabel('Episode')
    axes[1, 1].set_ylabel('Drawdown (%)')
    
    plt.tight_layout()
    return fig

def should_stop_training(metrics, patience=50):
    """Check if training should stop"""
    # Check if session is ending soon
    if session_monitor.is_ending_soon(buffer_minutes=20):
        return True, "Session ending soon"
    
    # Check if target performance reached
    if metrics.get('sharpe_ratio', 0) > 1.2 and metrics.get('win_rate', 0) > 0.52:
        return True, "Target performance reached"
    
    return False, ""

print("✅ Training functions defined")

In [None]:
# Main training loop
print("🚀 Starting training...")
print(f"   Starting from episode: {start_episode}")
print(f"   Total episodes: {config['training']['num_episodes']}")
print(f"   Session time remaining: {session_monitor.get_remaining_time()['remaining_hours']:.1f} hours\n")

# Training history
history = {
    'episode': [],
    'reward': [],
    'sharpe_ratio': [],
    'win_rate': [],
    'max_drawdown': []
}

# Best metrics tracking
best_sharpe = -float('inf')
episodes_since_best = 0

# Training loop
try:
    for episode in range(start_episode, config['training']['num_episodes']):
        episode_start = time.time()
        
        # Run training episode
        episode_metrics = trainer.train_episode(episode)
        
        # Update history
        history['episode'].append(episode)
        history['reward'].append(episode_metrics['total_reward'])
        history['sharpe_ratio'].append(episode_metrics.get('sharpe_ratio', 0))
        history['win_rate'].append(episode_metrics.get('win_rate', 0))
        history['max_drawdown'].append(episode_metrics.get('max_drawdown', 0))
        
        # Check if best model
        current_sharpe = episode_metrics.get('sharpe_ratio', 0)
        is_best = current_sharpe > best_sharpe
        if is_best:
            best_sharpe = current_sharpe
            episodes_since_best = 0
        else:
            episodes_since_best += 1
        
        # Log to W&B
        if run:
            wandb.log(episode_metrics, step=episode)
        
        # Checkpoint saving
        checkpoint_scheduler = CheckpointScheduler(checkpoint_manager)
        should_checkpoint, reason = checkpoint_scheduler.should_checkpoint(episode, episode_metrics)
        
        if should_checkpoint or is_best:
            save_checkpoint(episode, episode_metrics, is_best=is_best)
            checkpoint_scheduler.update_schedule(episode, episode_metrics, saved=True)
        
        # Memory optimization
        if episode % 10 == 0:
            setup.optimize_memory()
        
        # Progress display
        if episode % 10 == 0:
            clear_output(wait=True)
            
            # Display metrics
            print(f"📊 Episode {episode}/{config['training']['num_episodes']}")
            print(f"   Reward: {episode_metrics['total_reward']:.4f}")
            print(f"   Sharpe Ratio: {current_sharpe:.4f} (Best: {best_sharpe:.4f})")
            print(f"   Win Rate: {episode_metrics.get('win_rate', 0)*100:.1f}%")
            print(f"   Max Drawdown: {episode_metrics.get('max_drawdown', 0)*100:.1f}%")
            print(f"   Episode Time: {time.time() - episode_start:.1f}s")
            print(f"   Session Time Remaining: {session_monitor.get_remaining_time()['remaining_hours']:.1f}h")
            print(f"   GPU Memory: {setup.check_gpu_memory()['allocated']:.1f}GB / {setup.check_gpu_memory()['free']:.1f}GB free")
            
            # Plot progress
            if len(history['episode']) > 20:
                fig = plot_training_progress(history)
                plt.show()
                
                # Save plot to drive
                plot_path = f"{DRIVE_BASE}/results/training_progress_ep{episode}.png"
                fig.savefig(plot_path, dpi=150, bbox_inches='tight')
                plt.close()
        
        # Check stopping criteria
        should_stop, stop_reason = should_stop_training(episode_metrics)
        if should_stop:
            print(f"\n🛑 Stopping training: {stop_reason}")
            save_checkpoint(episode, episode_metrics, is_best=True)
            break
        
        # Validation
        if episode % config['training']['validation_frequency'] == 0 and episode > 0:
            print("\n🔍 Running validation...")
            val_metrics = trainer.validate()
            print(f"   Validation Sharpe: {val_metrics.get('sharpe_ratio', 0):.4f}")
            print(f"   Validation Win Rate: {val_metrics.get('win_rate', 0)*100:.1f}%")
            
            if run:
                wandb.log({f"val/{k}": v for k, v in val_metrics.items()}, step=episode)

except KeyboardInterrupt:
    print("\n⚠️ Training interrupted by user")
    save_checkpoint(episode, episode_metrics, is_best=False)
except Exception as e:
    print(f"\n❌ Training error: {e}")
    save_checkpoint(episode, episode_metrics, is_best=False)
    raise

print("\n✅ Training completed!")

## 9. Model Evaluation

In [None]:
# Load best model for evaluation
print("📂 Loading best model for evaluation...")
best_checkpoint = checkpoint_manager.load(load_best=True)

# Restore best model states
for name, agent in agents.items():
    agent.load_state_dict(best_checkpoint['state']['models'][name])
    agent.eval()

print("✅ Best model loaded")
print(f"   Episode: {best_checkpoint['state']['episode']}")
print(f"   Metrics: {best_checkpoint['metrics']}")

In [None]:
# Comprehensive evaluation
from training.monitoring import ModelEvaluator, BacktestEngine

evaluator = ModelEvaluator(config['evaluation'])
backtest_engine = BacktestEngine(config['backtest'])

print("\n🔍 Running comprehensive evaluation...")

# Evaluate on test data
test_results = evaluator.evaluate_models(
    agents=agents,
    coordinator=coordinator,
    test_data=data_streamer,
    device=device
)

# Run backtest
backtest_results = backtest_engine.run_backtest(
    agents=agents,
    coordinator=coordinator,
    historical_data=data_streamer
)

# Display results
print("\n📊 Evaluation Results:")
print(f"   Test Sharpe Ratio: {test_results['sharpe_ratio']:.4f}")
print(f"   Test Win Rate: {test_results['win_rate']*100:.1f}%")
print(f"   Test Max Drawdown: {test_results['max_drawdown']*100:.1f}%")
print(f"   Average Trade Return: {test_results['avg_return']*100:.2f}%")

print("\n📈 Backtest Results:")
print(f"   Total Return: {backtest_results['total_return']*100:.2f}%")
print(f"   Annualized Return: {backtest_results['annualized_return']*100:.2f}%")
print(f"   Sharpe Ratio: {backtest_results['sharpe_ratio']:.4f}")
print(f"   Calmar Ratio: {backtest_results['calmar_ratio']:.4f}")

# Save evaluation results
drive_manager.save_results(
    results={
        'test_results': test_results,
        'backtest_results': backtest_results,
        'training_history': history,
        'best_episode': best_checkpoint['state']['episode'],
        'config': config
    },
    name="marl_evaluation",
    plots={'training_progress': plot_training_progress(history)}
)

## 10. Model Export for Production

In [None]:
# Export models for production
print("📦 Exporting models for production...")

# Optimize models for inference
production_models = {}
for name, agent in agents.items():
    agent.eval()
    
    # Convert to TorchScript
    try:
        scripted_model = torch.jit.script(agent)
        production_models[f"{name}_scripted"] = scripted_model
        print(f"   ✅ {name}: TorchScript conversion successful")
    except Exception as e:
        print(f"   ⚠️ {name}: TorchScript conversion failed - {e}")
        production_models[name] = agent

# Save production models
model_path = drive_manager.save_model(
    models=agents,
    name="marl_production",
    configs=config,
    metrics=best_checkpoint['metrics'],
    production=True
)

print(f"\n✅ Models exported to: {model_path}")

# Create deployment package
package_path = drive_manager.create_training_package("marl_deployment_package")
print(f"✅ Deployment package created: {package_path}")

## 11. Training Summary

In [None]:
# Create training summary
summary = f"""
# AlgoSpace MARL Training Summary

## Training Details
- Start Time: {session_monitor.start_time.strftime('%Y-%m-%d %H:%M:%S')}
- End Time: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
- Total Runtime: {session_monitor.get_runtime_hours():.2f} hours
- Episodes Trained: {episode - start_episode}
- Final Episode: {episode}

## Best Model Performance
- Episode: {best_checkpoint['state']['episode']}
- Sharpe Ratio: {best_checkpoint['metrics'].get('sharpe_ratio', 0):.4f}
- Win Rate: {best_checkpoint['metrics'].get('win_rate', 0)*100:.1f}%
- Max Drawdown: {best_checkpoint['metrics'].get('max_drawdown', 0)*100:.1f}%

## Test Performance
- Test Sharpe: {test_results['sharpe_ratio']:.4f}
- Test Win Rate: {test_results['win_rate']*100:.1f}%
- Test Drawdown: {test_results['max_drawdown']*100:.1f}%

## Backtest Performance
- Total Return: {backtest_results['total_return']*100:.2f}%
- Annualized Return: {backtest_results['annualized_return']*100:.2f}%
- Sharpe Ratio: {backtest_results['sharpe_ratio']:.4f}

## System Information
- GPU: {system_info['gpu'].get('name', 'N/A')}
- GPU Memory: {system_info['gpu'].get('memory_total', 'N/A')}
- Peak GPU Usage: {max(h['allocated'] for h in [setup.check_gpu_memory()]):.1f}GB

## Files Saved
- Best Model: {model_path}
- Deployment Package: {package_path}
- Evaluation Results: {DRIVE_BASE}/results/
"""

print(summary)

# Save summary
summary_path = f"{DRIVE_BASE}/results/training_summary_{datetime.now().strftime('%Y%m%d_%H%M%S')}.md"
with open(summary_path, 'w') as f:
    f.write(summary)

print(f"\n✅ Summary saved to: {summary_path}")

In [None]:
# Cleanup and final steps
if run:
    # Finish W&B run
    wandb.finish()

# Close data connections
data_streamer.close()

# Final memory cleanup
setup.optimize_memory()

print("\n🎉 Training pipeline completed successfully!")
print("\n📋 Next Steps:")
print("1. Download the deployment package from Google Drive")
print("2. Review the evaluation results and training plots")
print("3. Test the production models in your deployment environment")
print("4. Consider running ensemble training with different seeds")
print("\nThank you for using AlgoSpace MARL Training!")