# Training Orchestrator - Master Pipeline for AlgoSpace-8

This master notebook orchestrates the complete training pipeline for the AlgoSpace-8 MARL trading system. It manages:

1. **Phase 1**: Data preparation and validation
2. **Phase 2**: Frozen expert training (RDE, M-RMS)
3. **Phase 3**: Embedder training (Market, Risk, Tactical)
4. **Phase 4**: Main MARL Core training with all components

Designed to complete full training in <24 hours on Google Colab Pro.

## 1. Environment Setup & Configuration

In [None]:
# Core imports and setup
import sys
import os
from pathlib import Path
import time
from datetime import datetime, timedelta
import json
import yaml
import subprocess
from typing import Dict, List, Optional, Any, Tuple
import logging

# Check if running in Colab
try:
    import google.colab
    IN_COLAB = True
    print("🚀 Running Training Orchestrator in Google Colab")
except ImportError:
    IN_COLAB = False
    print("💻 Running Training Orchestrator locally")

# Mount Drive and setup paths
if IN_COLAB:
    from google.colab import drive
    drive.mount('/content/drive')
    
    PROJECT_PATH = Path('/content/drive/MyDrive/AlgoSpace-8')
    NOTEBOOK_PATH = PROJECT_PATH / 'notebooks'
    sys.path.insert(0, str(PROJECT_PATH))
else:
    PROJECT_PATH = Path.cwd().parent
    NOTEBOOK_PATH = PROJECT_PATH / 'notebooks'
    sys.path.insert(0, str(PROJECT_PATH))

In [None]:
# Install dependencies
if IN_COLAB:
    !pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118
    !pip install -q numpy pandas h5py pyyaml tensorboard wandb
    !pip install -q tqdm matplotlib seaborn psutil gputil
    !pip install -q nbformat nbclient

In [None]:
# Import orchestration utilities
import torch
import numpy as np
import pandas as pd
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
import seaborn as sns

# Notebook execution
import nbformat
from nbclient import NotebookClient

# Custom utilities
from notebooks.utils.colab_setup import ColabSetup, SessionMonitor, setup_colab_training
from notebooks.utils.drive_manager import DriveManager
from notebooks.utils.checkpoint_manager import CheckpointManager

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
)
logger = logging.getLogger('TrainingOrchestrator')

In [None]:
# Initialize environment
if IN_COLAB:
    colab_setup = setup_colab_training(
        project_name="AlgoSpace-8",
        mount_drive=True,
        setup_wandb=True,
        keep_alive=True
    )
    
    drive_manager = DriveManager(str(PROJECT_PATH))
    checkpoint_manager = CheckpointManager(drive_manager)
    session_monitor = SessionMonitor(max_runtime_hours=23.5)
    
    device = colab_setup.device
else:
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(f"🎮 Using device: {device}")
print(f"📁 Project path: {PROJECT_PATH}")
print(f"📓 Notebook path: {NOTEBOOK_PATH}")

## 2. Orchestration Configuration

In [None]:
# Load unified configuration
config_path = NOTEBOOK_PATH / 'config' / 'unified_config.yaml'

if config_path.exists():
    with open(config_path, 'r') as f:
        unified_config = yaml.safe_load(f)
    print("✅ Loaded unified configuration")
else:
    # Default configuration
    unified_config = {
        'training': {
            'phases': {
                'data_prep': {'enabled': True, 'timeout_hours': 1.0},
                'frozen_experts': {'enabled': True, 'timeout_hours': 6.0},
                'embedders': {'enabled': True, 'timeout_hours': 8.0},
                'main_core': {'enabled': True, 'timeout_hours': 8.0}
            },
            'max_total_hours': 23.0,
            'checkpoint_interval_minutes': 30,
            'early_stopping': {
                'patience': 50,
                'min_delta': 0.001
            }
        },
        'notebooks': {
            'data_prep': 'Data_Preparation_Colab.ipynb',
            'regime_agent': 'agents/Regime_Agent_Training.ipynb',
            'tactical_agent': 'agents/Tactical_Agent_Training.ipynb',
            'structure_agent': 'agents/Structure_Agent_Training.ipynb',
            'mrms_agent': 'agents/MRMS_Training_Colab.ipynb',
            'main_core': 'MARL_Training_Master_Colab.ipynb'
        },
        'hardware': {
            'gpu_memory_fraction': 0.9,
            'cpu_threads': 4
        }
    }
    print("⚠️ Using default configuration")

In [None]:
class TrainingPhase:
    """Represents a training phase in the pipeline."""
    
    def __init__(self, name: str, notebook: str, config: Dict[str, Any], 
                 timeout_hours: float = 6.0):
        self.name = name
        self.notebook = notebook
        self.config = config
        self.timeout_hours = timeout_hours
        self.start_time = None
        self.end_time = None
        self.status = 'pending'
        self.metrics = {}
        self.error = None
    
    def __repr__(self):
        return f"TrainingPhase(name={self.name}, status={self.status})"


class OrchestrationState:
    """Tracks overall orchestration state."""
    
    def __init__(self):
        self.phases: List[TrainingPhase] = []
        self.current_phase_idx = 0
        self.start_time = datetime.now()
        self.checkpoints = {}
        self.total_runtime_hours = 0
        self.is_resuming = False
    
    def add_phase(self, phase: TrainingPhase):
        self.phases.append(phase)
    
    def get_current_phase(self) -> Optional[TrainingPhase]:
        if self.current_phase_idx < len(self.phases):
            return self.phases[self.current_phase_idx]
        return None
    
    def advance_phase(self):
        self.current_phase_idx += 1
    
    def get_elapsed_hours(self) -> float:
        return (datetime.now() - self.start_time).total_seconds() / 3600
    
    def save_state(self, path: str):
        """Save orchestration state for recovery."""
        state_dict = {
            'current_phase_idx': self.current_phase_idx,
            'start_time': self.start_time.isoformat(),
            'phases': [
                {
                    'name': p.name,
                    'status': p.status,
                    'metrics': p.metrics,
                    'start_time': p.start_time.isoformat() if p.start_time else None,
                    'end_time': p.end_time.isoformat() if p.end_time else None
                }
                for p in self.phases
            ],
            'checkpoints': self.checkpoints
        }
        
        with open(path, 'w') as f:
            json.dump(state_dict, f, indent=2)
    
    def load_state(self, path: str):
        """Load orchestration state for resuming."""
        with open(path, 'r') as f:
            state_dict = json.load(f)
        
        self.current_phase_idx = state_dict['current_phase_idx']
        self.start_time = datetime.fromisoformat(state_dict['start_time'])
        self.checkpoints = state_dict['checkpoints']
        self.is_resuming = True
        
        # Update phase statuses
        for i, phase_data in enumerate(state_dict['phases']):
            if i < len(self.phases):
                self.phases[i].status = phase_data['status']
                self.phases[i].metrics = phase_data['metrics']

## 3. Notebook Execution Engine

In [None]:
class NotebookExecutor:
    """Executes Jupyter notebooks with monitoring and error handling."""
    
    def __init__(self, notebook_path: Path, timeout: int = 3600):
        self.notebook_path = notebook_path
        self.timeout = timeout
        self.client = None
        self.nb = None
    
    def execute(self, parameters: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
        """Execute notebook and return results."""
        logger.info(f"Executing notebook: {self.notebook_path.name}")
        
        try:
            # Load notebook
            with open(self.notebook_path, 'r') as f:
                self.nb = nbformat.read(f, as_version=4)
            
            # Inject parameters if provided
            if parameters:
                self._inject_parameters(parameters)
            
            # Create client and execute
            self.client = NotebookClient(
                self.nb,
                timeout=self.timeout,
                kernel_name='python3',
                resources={'metadata': {'path': str(self.notebook_path.parent)}}
            )
            
            # Execute notebook
            self.client.execute()
            
            # Extract results
            results = self._extract_results()
            
            # Save executed notebook
            executed_path = self.notebook_path.with_suffix('.executed.ipynb')
            with open(executed_path, 'w') as f:
                nbformat.write(self.nb, f)
            
            logger.info(f"✅ Successfully executed: {self.notebook_path.name}")
            return results
            
        except Exception as e:
            logger.error(f"❌ Failed to execute {self.notebook_path.name}: {str(e)}")
            raise
    
    def _inject_parameters(self, parameters: Dict[str, Any]):
        """Inject parameters into notebook."""
        param_cell = nbformat.v4.new_code_cell(
            source=f"# Injected parameters\n" + 
                   "\n".join([f"{k} = {repr(v)}" for k, v in parameters.items()])
        )
        
        # Insert after first cell
        self.nb.cells.insert(1, param_cell)
    
    def _extract_results(self) -> Dict[str, Any]:
        """Extract results from executed notebook."""
        results = {
            'outputs': [],
            'metrics': {},
            'errors': []
        }
        
        for cell in self.nb.cells:
            if cell.cell_type == 'code' and hasattr(cell, 'outputs'):
                for output in cell.outputs:
                    if output.output_type == 'error':
                        results['errors'].append({
                            'ename': output.ename,
                            'evalue': output.evalue,
                            'traceback': output.traceback
                        })
                    elif output.output_type == 'execute_result':
                        # Look for metrics in output
                        if 'data' in output and 'text/plain' in output.data:
                            text = output.data['text/plain']
                            if 'metrics' in text.lower():
                                results['outputs'].append(text)
        
        return results


def execute_training_phase(phase: TrainingPhase, state: OrchestrationState) -> bool:
    """Execute a single training phase."""
    logger.info(f"\n{'='*60}")
    logger.info(f"Starting Phase: {phase.name}")
    logger.info(f"{'='*60}")
    
    phase.start_time = datetime.now()
    phase.status = 'running'
    
    try:
        # Check remaining time
        elapsed = state.get_elapsed_hours()
        remaining = unified_config['training']['max_total_hours'] - elapsed
        
        if remaining < 0.5:
            logger.warning(f"⚠️ Less than 30 minutes remaining, skipping {phase.name}")
            phase.status = 'skipped'
            return False
        
        # Prepare notebook path
        notebook_path = NOTEBOOK_PATH / phase.notebook
        
        if not notebook_path.exists():
            logger.error(f"❌ Notebook not found: {notebook_path}")
            phase.status = 'failed'
            phase.error = 'Notebook not found'
            return False
        
        # Execute notebook
        executor = NotebookExecutor(
            notebook_path,
            timeout=int(min(phase.timeout_hours, remaining) * 3600)
        )
        
        # Add phase-specific parameters
        parameters = {
            'max_runtime_hours': min(phase.timeout_hours, remaining),
            'checkpoint_path': str(drive_manager.checkpoint_path),
            'is_orchestrated': True,
            'phase_name': phase.name
        }
        
        # Execute
        results = executor.execute(parameters)
        
        # Update phase
        phase.end_time = datetime.now()
        phase.metrics = results.get('metrics', {})
        
        if results['errors']:
            phase.status = 'failed'
            phase.error = results['errors'][0]
            logger.error(f"❌ Phase {phase.name} failed with errors")
            return False
        else:
            phase.status = 'completed'
            logger.info(f"✅ Phase {phase.name} completed successfully")
            
            # Save checkpoint
            checkpoint_name = f"{phase.name}_completed"
            state.checkpoints[checkpoint_name] = {
                'timestamp': phase.end_time.isoformat(),
                'metrics': phase.metrics
            }
            
            return True
            
    except Exception as e:
        logger.error(f"❌ Exception in phase {phase.name}: {str(e)}")
        phase.status = 'failed'
        phase.error = str(e)
        phase.end_time = datetime.now()
        return False

## 4. Training Pipeline Definition

In [None]:
# Initialize orchestration state
orch_state = OrchestrationState()

# Define training phases
training_phases = [
    TrainingPhase(
        name="Data Preparation",
        notebook=unified_config['notebooks']['data_prep'],
        config=unified_config['training']['phases']['data_prep'],
        timeout_hours=unified_config['training']['phases']['data_prep']['timeout_hours']
    ),
    TrainingPhase(
        name="Regime Detection Expert",
        notebook=unified_config['notebooks']['regime_agent'],
        config=unified_config['training']['phases']['frozen_experts'],
        timeout_hours=2.0
    ),
    TrainingPhase(
        name="Tactical Agent",
        notebook=unified_config['notebooks']['tactical_agent'],
        config=unified_config['training']['phases']['frozen_experts'],
        timeout_hours=2.0
    ),
    TrainingPhase(
        name="Structure Agent",
        notebook=unified_config['notebooks']['structure_agent'],
        config=unified_config['training']['phases']['frozen_experts'],
        timeout_hours=2.0
    ),
    TrainingPhase(
        name="M-RMS Ensemble",
        notebook=unified_config['notebooks']['mrms_agent'],
        config=unified_config['training']['phases']['frozen_experts'],
        timeout_hours=3.0
    ),
    TrainingPhase(
        name="Main MARL Core",
        notebook=unified_config['notebooks']['main_core'],
        config=unified_config['training']['phases']['main_core'],
        timeout_hours=unified_config['training']['phases']['main_core']['timeout_hours']
    )
]

# Add phases to orchestration state
for phase in training_phases:
    orch_state.add_phase(phase)

print(f"📋 Configured {len(training_phases)} training phases:")
for i, phase in enumerate(training_phases):
    print(f"   {i+1}. {phase.name} ({phase.timeout_hours:.1f}h max)")

In [None]:
# Check for existing orchestration state (resume capability)
state_file = drive_manager.checkpoint_path / "orchestration_state.json"

if state_file.exists() and IN_COLAB:
    print("\n📂 Found existing orchestration state")
    
    # Load state
    orch_state.load_state(str(state_file))
    
    print(f"✅ Resuming from phase {orch_state.current_phase_idx + 1}")
    print(f"   Elapsed time: {orch_state.get_elapsed_hours():.1f} hours")
    
    # Show completed phases
    print("\n📊 Completed phases:")
    for phase in orch_state.phases[:orch_state.current_phase_idx]:
        print(f"   ✅ {phase.name} - {phase.status}")
else:
    print("\n🆕 Starting fresh orchestration")

## 5. Main Orchestration Loop

In [None]:
def run_training_pipeline(orch_state: OrchestrationState, 
                         dry_run: bool = False) -> Dict[str, Any]:
    """Execute the complete training pipeline."""
    
    logger.info("\n" + "="*60)
    logger.info("🚀 Starting AlgoSpace-8 Training Pipeline")
    logger.info(f"   Total phases: {len(orch_state.phases)}")
    logger.info(f"   Max runtime: {unified_config['training']['max_total_hours']:.1f} hours")
    logger.info(f"   Device: {device}")
    logger.info("="*60 + "\n")
    
    pipeline_results = {
        'start_time': datetime.now().isoformat(),
        'phases': [],
        'success': True,
        'total_runtime': 0
    }
    
    # Main execution loop
    while orch_state.current_phase_idx < len(orch_state.phases):
        # Check session time
        if IN_COLAB and session_monitor.is_ending_soon(buffer_minutes=30):
            logger.warning("⚠️ Session ending soon! Saving state and stopping...")
            break
        
        # Get current phase
        phase = orch_state.get_current_phase()
        
        if phase.status == 'completed':
            logger.info(f"⏭️ Skipping completed phase: {phase.name}")
            orch_state.advance_phase()
            continue
        
        # Display phase info
        elapsed = orch_state.get_elapsed_hours()
        remaining = unified_config['training']['max_total_hours'] - elapsed
        
        print(f"\n📊 Phase {orch_state.current_phase_idx + 1}/{len(orch_state.phases)}")
        print(f"   Name: {phase.name}")
        print(f"   Notebook: {phase.notebook}")
        print(f"   Timeout: {phase.timeout_hours:.1f}h")
        print(f"   Elapsed: {elapsed:.1f}h / Remaining: {remaining:.1f}h")
        
        if dry_run:
            logger.info(f"🔄 [DRY RUN] Would execute: {phase.name}")
            phase.status = 'dry_run'
            orch_state.advance_phase()
            time.sleep(1)
            continue
        
        # Execute phase
        success = execute_training_phase(phase, orch_state)
        
        # Record results
        phase_result = {
            'name': phase.name,
            'status': phase.status,
            'start_time': phase.start_time.isoformat() if phase.start_time else None,
            'end_time': phase.end_time.isoformat() if phase.end_time else None,
            'duration_hours': (
                (phase.end_time - phase.start_time).total_seconds() / 3600
                if phase.start_time and phase.end_time else 0
            ),
            'metrics': phase.metrics,
            'error': phase.error
        }
        pipeline_results['phases'].append(phase_result)
        
        # Save state after each phase
        if IN_COLAB:
            orch_state.save_state(str(state_file))
            logger.info("💾 Saved orchestration state")
        
        # Check if should continue
        if not success and phase.status == 'failed':
            logger.error(f"❌ Pipeline stopped due to failure in {phase.name}")
            pipeline_results['success'] = False
            break
        
        # Memory cleanup
        if IN_COLAB:
            colab_setup.optimize_memory()
        
        # Advance to next phase
        orch_state.advance_phase()
    
    # Final summary
    pipeline_results['end_time'] = datetime.now().isoformat()
    pipeline_results['total_runtime'] = orch_state.get_elapsed_hours()
    
    return pipeline_results

In [None]:
# Execute pipeline (set dry_run=True for testing)
DRY_RUN = False  # Set to False for actual execution

print(f"\n🎯 Pipeline mode: {'DRY RUN' if DRY_RUN else 'FULL EXECUTION'}")

if not DRY_RUN:
    response = input("\n⚠️ This will run the complete training pipeline. Continue? (yes/no): ")
    if response.lower() != 'yes':
        print("❌ Pipeline execution cancelled")
    else:
        pipeline_results = run_training_pipeline(orch_state, dry_run=DRY_RUN)
else:
    pipeline_results = run_training_pipeline(orch_state, dry_run=DRY_RUN)

## 6. Results Summary & Visualization

In [None]:
def visualize_pipeline_results(results: Dict[str, Any]):
    """Create visualizations of pipeline execution."""
    
    # Create figure
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 10))
    
    # Timeline visualization
    phases_data = results['phases']
    phase_names = [p['name'] for p in phases_data]
    durations = [p['duration_hours'] for p in phases_data]
    statuses = [p['status'] for p in phases_data]
    
    # Color map for statuses
    color_map = {
        'completed': 'green',
        'failed': 'red',
        'skipped': 'orange',
        'dry_run': 'lightblue',
        'pending': 'gray'
    }
    colors = [color_map.get(s, 'gray') for s in statuses]
    
    # Duration bar chart
    y_pos = np.arange(len(phase_names))
    ax1.barh(y_pos, durations, color=colors)
    ax1.set_yticks(y_pos)
    ax1.set_yticklabels(phase_names)
    ax1.set_xlabel('Duration (hours)')
    ax1.set_title('Training Phase Durations')
    ax1.grid(axis='x', alpha=0.3)
    
    # Add duration labels
    for i, (duration, status) in enumerate(zip(durations, statuses)):
        if duration > 0:
            ax1.text(duration + 0.1, i, f"{duration:.1f}h", 
                    va='center', fontsize=9)
        ax1.text(0.1, i, status, va='center', fontsize=8, 
                style='italic', alpha=0.7)
    
    # Cumulative timeline
    cumulative_times = []
    current_time = 0
    for phase in phases_data:
        cumulative_times.append(current_time)
        current_time += phase['duration_hours']
    
    ax2.plot(cumulative_times, range(len(phases_data)), 'bo-', linewidth=2)
    ax2.set_xlabel('Cumulative Time (hours)')
    ax2.set_ylabel('Phase Index')
    ax2.set_title('Training Progress Timeline')
    ax2.grid(True, alpha=0.3)
    ax2.axvline(x=unified_config['training']['max_total_hours'], 
               color='red', linestyle='--', label='Max Runtime')
    ax2.legend()
    
    plt.tight_layout()
    plt.show()
    
    # Save figure
    if IN_COLAB:
        fig_path = drive_manager.results_path / "plots" / "pipeline_execution.png"
        fig.savefig(fig_path, dpi=300, bbox_inches='tight')
        print(f"\n📊 Saved pipeline visualization to: {fig_path}")

# Visualize results
if 'pipeline_results' in locals():
    visualize_pipeline_results(pipeline_results)

In [None]:
# Generate summary report
def generate_summary_report(results: Dict[str, Any]) -> str:
    """Generate a markdown summary report."""
    
    report = f"""# AlgoSpace-8 Training Pipeline Report

Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}

## Overview
- **Status**: {'✅ SUCCESS' if results['success'] else '❌ FAILED'}
- **Total Runtime**: {results['total_runtime']:.2f} hours
- **Phases Completed**: {sum(1 for p in results['phases'] if p['status'] == 'completed')}/{len(results['phases'])}

## Phase Details

| Phase | Status | Duration | Start Time | End Time |
|-------|--------|----------|------------|----------|
"""
    
    for phase in results['phases']:
        status_emoji = {
            'completed': '✅',
            'failed': '❌',
            'skipped': '⏭️',
            'dry_run': '🔄',
            'pending': '⏸️'
        }.get(phase['status'], '❓')
        
        start = phase['start_time'][:16] if phase['start_time'] else 'N/A'
        end = phase['end_time'][:16] if phase['end_time'] else 'N/A'
        
        report += f"| {phase['name']} | {status_emoji} {phase['status']} | "
        report += f"{phase['duration_hours']:.1f}h | {start} | {end} |\n"
    
    # Add metrics if available
    report += "\n## Key Metrics\n\n"
    
    for phase in results['phases']:
        if phase['metrics']:
            report += f"### {phase['name']}\n"
            for key, value in phase['metrics'].items():
                report += f"- {key}: {value}\n"
            report += "\n"
    
    # Add errors if any
    errors = [p for p in results['phases'] if p['error']]
    if errors:
        report += "\n## Errors\n\n"
        for phase in errors:
            report += f"### {phase['name']}\n"
            report += f"```\n{phase['error']}\n```\n\n"
    
    return report

# Generate and save report
if 'pipeline_results' in locals():
    report = generate_summary_report(pipeline_results)
    print(report)
    
    if IN_COLAB:
        report_path = drive_manager.results_path / "pipeline_report.md"
        with open(report_path, 'w') as f:
            f.write(report)
        print(f"\n📄 Saved report to: {report_path}")

## 7. Post-Pipeline Validation

In [None]:
def validate_training_outputs():
    """Validate that all expected outputs exist."""
    
    print("\n🔍 Validating training outputs...")
    
    expected_outputs = [
        ('Data files', drive_manager.data_path / 'processed', ['.h5', '.parquet']),
        ('Regime model', drive_manager.model_path / 'production', ['regime_expert']),
        ('Tactical model', drive_manager.model_path / 'production', ['tactical_agent']),
        ('Structure model', drive_manager.model_path / 'production', ['structure_agent']),
        ('MRMS ensemble', drive_manager.model_path / 'production', ['mrms_ensemble']),
        ('Main Core', drive_manager.model_path / 'production', ['main_marl_core']),
        ('Checkpoints', drive_manager.checkpoint_path, ['.pt']),
        ('Results', drive_manager.results_path, ['.json', '.png'])
    ]
    
    validation_results = []
    
    for name, path, patterns in expected_outputs:
        if not path.exists():
            validation_results.append((name, '❌ Path not found', str(path)))
            continue
        
        # Check for files matching patterns
        found_files = []
        for pattern in patterns:
            if pattern.startswith('.'):
                # File extension
                found_files.extend(list(path.glob(f'*{pattern}')))
            else:
                # File name pattern
                found_files.extend(list(path.glob(f'*{pattern}*')))
        
        if found_files:
            validation_results.append((name, '✅ Found', f"{len(found_files)} files"))
        else:
            validation_results.append((name, '⚠️ No files', str(path)))
    
    # Display results
    print("\n📋 Validation Results:")
    print("-" * 60)
    for name, status, info in validation_results:
        print(f"{name:20} {status:15} {info}")
    
    # Overall status
    all_valid = all('✅' in status for _, status, _ in validation_results)
    print("-" * 60)
    print(f"Overall: {'✅ All outputs valid' if all_valid else '⚠️ Some outputs missing'}")
    
    return validation_results

# Run validation
if IN_COLAB and 'pipeline_results' in locals():
    validation_results = validate_training_outputs()

## 8. Final Steps & Cleanup

In [None]:
# Create final deployment package
if IN_COLAB and 'pipeline_results' in locals() and pipeline_results['success']:
    print("\n📦 Creating final deployment package...")
    
    try:
        deployment_package = drive_manager.create_training_package(
            f"algospace8_deployment_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
        )
        print(f"✅ Deployment package created: {deployment_package}")
    except Exception as e:
        print(f"❌ Failed to create deployment package: {e}")

In [None]:
# Final summary
print("\n" + "="*60)
print("🎉 AlgoSpace-8 Training Pipeline Complete!")
print("="*60)

if 'pipeline_results' in locals():
    print(f"\n📊 Summary:")
    print(f"   Status: {'SUCCESS' if pipeline_results['success'] else 'FAILED'}")
    print(f"   Runtime: {pipeline_results['total_runtime']:.2f} hours")
    print(f"   Phases: {sum(1 for p in pipeline_results['phases'] if p['status'] == 'completed')}/{len(pipeline_results['phases'])} completed")

if IN_COLAB:
    print(f"\n💾 All outputs saved to: {drive_manager.base_path}")
    print(f"\n⏱️ Session time remaining: {session_monitor.get_remaining_time()['remaining_hours']:.1f} hours")

print("\n🚀 Next steps:")
print("   1. Review the pipeline report and validation results")
print("   2. Run the Integration Test notebook to verify all components")
print("   3. Use the Production Export notebook to prepare for deployment")
print("   4. Deploy to production environment")

print("\n✨ Happy trading!")

## Summary

This Training Orchestrator notebook successfully manages the complete AlgoSpace-8 training pipeline:

1. **Automated Execution**: Runs all training phases in sequence
2. **Time Management**: Optimizes for <24 hour Colab runtime
3. **Error Handling**: Graceful failure recovery and state persistence
4. **Progress Monitoring**: Real-time status updates and visualizations
5. **Resume Capability**: Can resume from any interruption point
6. **Validation**: Ensures all outputs are created correctly
7. **Deployment Ready**: Creates final package for production

The pipeline is now ready for production use!