# PPO Trading Agent Training Notebook

This notebook trains a PPO (Proximal Policy Optimization) agent for cryptocurrency trading.

**Usage with Colab VS Code Extension:**
1. Open this notebook in Cursor
2. Select Kernel → Colab → New Colab Server
3. Run cells sequentially

**The agent uses your trained prediction models (LSTM/GRU/BiLSTM/DLSTM) as feature extractors.**


## Cell 1: Setup & Mount Drive


In [None]:
# DEBUG: Check what's in your Google Drive
# Run this cell first to see where your files are located

import os

# Mount Drive first
try:
    from google.colab import drive
    drive.mount('/content/drive')
except:
    print("Not on Colab or Drive already mounted")

# List what's in the drive root
print("Contents of /content/drive:")
for item in os.listdir('/content/drive'):
    print(f"  - {item}")

# Check MyDrive
if os.path.exists('/content/drive/MyDrive'):
    print("\nContents of /content/drive/MyDrive:")
    for item in os.listdir('/content/drive/MyDrive')[:20]:  # First 20 items
        print(f"  - {item}")
    
    # Check if Bot 2026 exists
    if os.path.exists('/content/drive/MyDrive/Bot 2026'):
        print("\n✓ Found: /content/drive/MyDrive/Bot 2026")
    else:
        print("\n✗ NOT found: /content/drive/MyDrive/Bot 2026")


ENVIRONMENT CHECK

1. Running on Colab: True
   ✓ Colab environment detected

2. /content exists: True

3. Mounting Google Drive...
   ✗ Error: mount failed


In [None]:
# Setup environment (handles Colab detection, Drive mounting, path setup)
import os
import sys
from pathlib import Path

# Check if running on Colab
IS_COLAB = 'COLAB_GPU' in os.environ or os.path.exists('/content')

if IS_COLAB:
    print("Running on Google Colab")
    
    # Mount Google Drive
    try:
        from google.colab import drive
        drive.mount('/content/drive')
    except ImportError:
        pass  # Not in Colab environment
    
    # Find the Bot 2026 folder (supports multiple Drive locations)
    possible_paths = [
        '/content/drive/MyDrive/Bot 2026',
        '/content/drive/Othercomputers/Mijn laptop/Bot 2026',
        '/content/drive/Othercomputers/My Laptop/Bot 2026',
    ]
    
    PROJECT_PATH = None
    for path in possible_paths:
        if os.path.exists(path):
            PROJECT_PATH = path
            print(f"Found project at: {path}")
            break
    
    if PROJECT_PATH is None:
        # Try to find it dynamically
        print("Searching for Bot 2026 folder...")
        for root, dirs, files in os.walk('/content/drive'):
            if 'Bot 2026' in dirs:
                PROJECT_PATH = os.path.join(root, 'Bot 2026')
                print(f"Found project at: {PROJECT_PATH}")
                break
            # Limit search depth
            if root.count(os.sep) > 5:
                break
    
    if PROJECT_PATH is None:
        raise FileNotFoundError("Could not find 'Bot 2026' folder in Google Drive!")
    
    PPO_PATH = f'{PROJECT_PATH}/PPO approach'
    
    # Add to Python path
    sys.path.insert(0, PROJECT_PATH)
    sys.path.insert(0, PPO_PATH)
    
    # Change working directory
    os.chdir(PPO_PATH)
else:
    print("Running locally")
    PROJECT_PATH = os.path.dirname(os.getcwd())
    PPO_PATH = os.getcwd()

print(f"Project path: {PROJECT_PATH}")
print(f"PPO path: {PPO_PATH}")

# Now import our modules
from colab_utils import setup_environment, get_project_path, get_ppo_path
env_info = setup_environment()


Running on Google Colab


KeyboardInterrupt: 

Running on Google Colab


KeyboardInterrupt: 

## Cell 2: Install Dependencies


In [None]:
# Install required packages (only needed on Colab)
if IS_COLAB:
    import subprocess
    subprocess.run(["pip", "install", "stable-baselines3", "gymnasium", "tensorboard", "shimmy", "-q"])
    print("Dependencies installed!")
else:
    print("Running locally - ensure dependencies are installed in your venv")


## Cell 3: Verify GPU


In [None]:
import torch

print("GPU Check:")
print(f"  CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"  GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"  GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
    print("  WARNING: No GPU detected. Training will be slower.")


## Cell 4: Configuration


In [None]:
# Training configuration
CONFIG = {
    # Dataset
    'dataset': 'ETH-EUR_1H_20240101-20251231',  # Change to your dataset
    
    # Prediction model
    'prediction_model': 'ensemble',  # 'lstm', 'gru', 'bilstm', 'dlstm', or 'ensemble'
    
    # Training
    'total_timesteps': 500000,  # Increase for better results (1M+ recommended)
    'checkpoint_freq': 50000,
    
    # Environment
    'transaction_cost': 0.0025,  # 0.25% Bitvavo fee
    'initial_capital': 10000,
    'max_episode_steps': 1000,
}

print("Configuration:")
for key, value in CONFIG.items():
    print(f"  {key}: {value}")


## Cell 5: Load Prediction Models


In [None]:
from prediction_wrapper import load_ensemble, load_prediction_model, get_available_models

# Show available models
print("Available models:")
for model in get_available_models():
    print(f"  - {model}")

# Load prediction models
print(f"\nLoading prediction models for: {CONFIG['dataset']}")

if CONFIG['prediction_model'] == 'ensemble':
    prediction_models = load_ensemble(CONFIG['dataset'])
else:
    prediction_models = load_prediction_model(CONFIG['prediction_model'], CONFIG['dataset'])

if prediction_models.loaded:
    print("\n✓ Prediction models loaded successfully!")
else:
    print("\n⚠ Warning: Could not load prediction models.")
    print("  Training will proceed without prediction features.")


## Cell 6: Create Trading Environment


In [None]:
from trading_env import TradingEnv
from colab_utils import get_datasets_path
from pathlib import Path

# Find dataset
datasets_path = get_datasets_path()
dataset_files = list(datasets_path.glob(f"*{CONFIG['dataset']}*"))

if not dataset_files:
    print(f"ERROR: Dataset not found: {CONFIG['dataset']}")
    print("Available datasets:")
    for f in datasets_path.glob("*.csv"):
        print(f"  - {f.name}")
else:
    dataset_path = dataset_files[0]
    print(f"Using dataset: {dataset_path.name}")
    
    # Create training environment
    train_env = TradingEnv(
        dataset_path=dataset_path,
        prediction_models=prediction_models if prediction_models.loaded else None,
        transaction_cost=CONFIG['transaction_cost'],
        initial_capital=CONFIG['initial_capital'],
        train_mode=True,
        train_split=0.8,
        max_episode_steps=CONFIG['max_episode_steps'],
    )
    
    # Create evaluation environment
    eval_env = TradingEnv(
        dataset_path=dataset_path,
        prediction_models=prediction_models if prediction_models.loaded else None,
        transaction_cost=CONFIG['transaction_cost'],
        initial_capital=CONFIG['initial_capital'],
        train_mode=False,  # Use test data
        train_split=0.8,
        max_episode_steps=CONFIG['max_episode_steps'],
    )
    
    print(f"\n✓ Environments created!")
    print(f"  Training steps available: {train_env.data_end_idx - train_env.data_start_idx}")
    print(f"  Evaluation steps available: {eval_env.data_end_idx - eval_env.data_start_idx}")
    print(f"  Action space: {train_env.action_space}")
    print(f"  Observation space: {train_env.observation_space}")


## Cell 7: Train PPO Agent


In [None]:
from ppo_trading_agent import train_with_checkpoints
from colab_utils import get_checkpoints_path

# Set checkpoint path
checkpoint_path = get_checkpoints_path() / f"{CONFIG['prediction_model']}_{CONFIG['dataset']}"
print(f"Checkpoints will be saved to: {checkpoint_path}")

# Train with checkpointing (for Colab timeout recovery)
print(f"\nStarting training for {CONFIG['total_timesteps']:,} timesteps...")
print("Progress will be saved automatically.")
print("-" * 50)

model = train_with_checkpoints(
    env=train_env,
    total_timesteps=CONFIG['total_timesteps'],
    checkpoint_freq=CONFIG['checkpoint_freq'],
    checkpoint_path=checkpoint_path,
    eval_env=eval_env,
    eval_freq=10000,
    n_eval_episodes=3,
    resume=True,  # Resume from checkpoint if exists
    device='auto',
)


## Cell 8: Evaluate Trained Agent


In [None]:
from evaluate_ppo import evaluate_agent
import numpy as np

print("Evaluating trained agent...")
print("-" * 50)

# Evaluate on test data
results = evaluate_agent(model, eval_env, n_episodes=10, deterministic=True)

print("\nEvaluation Results:")
print(f"  Mean Reward: {results['mean_reward']:.2f} ± {results['std_reward']:.2f}")
print(f"  Mean Return: {results.get('mean_return_pct', 0):.2f}%")
print(f"  Episodes: {results['n_episodes']}")


## Cell 9: Save Final Model


In [None]:
from colab_utils import get_ppo_models_path

# Save final model
models_path = get_ppo_models_path()
model_name = f"ppo_{CONFIG['prediction_model']}_{CONFIG['dataset']}.zip"
final_model_path = models_path / model_name

model.save(str(final_model_path))
print(f"\n✓ Model saved to: {final_model_path}")

# Verify save
if final_model_path.exists():
    size_mb = final_model_path.stat().st_size / 1e6
    print(f"  Size: {size_mb:.2f} MB")
else:
    print("  ERROR: Save failed!")


## Cell 10: Cleanup


In [None]:
# Close environments
train_env.close()
eval_env.close()

print("Training complete!")
print(f"\nNext steps:")
print(f"  1. Model saved at: {final_model_path}")
print(f"  2. Run evaluate_ppo.py for detailed analysis")
print(f"  3. Use visualize_results.py for plots")
