# TG4 - RL Agent Training
## PPO for Crypto Accumulation Strategy

In [None]:
import sys
sys.path.insert(0, '../src')

from data_fetcher import DataFetcher
from models.rl_agent import TradingEnv, train_rl_agent, load_rl_agent
from portfolio import Portfolio
from backtester import Backtester
from strategies.ripple_momentum_lstm import generate_ripple_signals
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## 1. Fetch RLUSD-Inclusive Data

In [None]:
fetcher = DataFetcher()

# Fetch main trading pairs
symbols = ['XRP/USDT', 'BTC/USDT']
data = {}

for sym in symbols:
    print(f"Fetching {sym} from Kraken...")
    df = fetcher.fetch_ohlcv('kraken', sym, '1h', 2000)
    if not df.empty:
        data[sym] = df
        print(f"  Got {len(df)} candles")

# Also fetch RLUSD pairs
print("\nFetching RLUSD pairs...")
rlusd_data = fetcher.fetch_rlusd_pairs()
data.update(rlusd_data)

## 2. Train PPO Agent

In [None]:
# Start TensorBoard in background (run in terminal: tensorboard --logdir=./tensorboard/)
print("Training PPO agent on GPU (ROCm)...")
print("Monitor progress: tensorboard --logdir=./tensorboard/")

# Use subset for faster training demo
training_data = {k: v for k, v in data.items() if k in ['XRP/USDT', 'BTC/USDT']}

if training_data:
    model = train_rl_agent(training_data, timesteps=50000)  # Quick training
else:
    print("No data available for training")

## 3. Evaluate RL Agent vs LSTM Strategy

In [None]:
# Load trained model
try:
    model = load_rl_agent()
    print("RL model loaded successfully")
except:
    print("No trained model found - run training cell first")
    model = None

In [None]:
# Compare RL vs LSTM backtest results
if 'XRP/USDT' in data:
    print("=== LSTM Strategy Backtest ===")
    signals = generate_ripple_signals(data, 'XRP/USDT')
    bt = Backtester(data)
    pf_lstm = bt.run_with_lstm_signals('XRP/USDT', signals)
    print(f"LSTM Total Return: {pf_lstm.total_return():.2%}")
    print(f"LSTM Sharpe Ratio: {pf_lstm.sharpe_ratio():.2f}")
    print(f"LSTM Max Drawdown: {pf_lstm.max_drawdown():.2%}")

In [None]:
# RL Agent evaluation
if model and training_data:
    print("\n=== RL Agent Evaluation ===")
    env = TradingEnv(training_data)
    obs, _ = env.reset()
    
    total_reward = 0
    done = False
    step = 0
    
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        total_reward += reward
        step += 1
    
    prices = env._current_prices()
    final_value = env.portfolio.get_total_usd(prices)
    
    print(f"RL Total Steps: {step}")
    print(f"RL Total Reward: {total_reward:.4f}")
    print(f"RL Final Portfolio Value: ${final_value:.2f}")
    print(f"RL Return: {(final_value - 1000) / 1000:.2%}")

## 4. Visualize Training Progress

In [None]:
# Plot portfolio value over evaluation
if model and training_data:
    env = TradingEnv(training_data)
    obs, _ = env.reset()
    
    portfolio_values = []
    done = False
    
    while not done:
        action, _ = model.predict(obs, deterministic=True)
        obs, reward, done, truncated, info = env.step(action)
        prices = env._current_prices()
        portfolio_values.append(env.portfolio.get_total_usd(prices))
    
    plt.figure(figsize=(14, 6))
    plt.plot(portfolio_values)
    plt.title('RL Agent Portfolio Value Over Time')
    plt.xlabel('Step')
    plt.ylabel('Portfolio Value (USD)')
    plt.axhline(y=1000, color='r', linestyle='--', label='Starting Value')
    plt.legend()
    plt.tight_layout()
    plt.show()