# RL Trading Agent Training

This notebook demonstrates training and evaluating the reinforcement learning trading agent.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from src.data import DataLoader, TechnicalIndicators
from src.trading_agent import TradingAgent
from config import RL_CONFIG, ENV_CONFIG

plt.rcParams['figure.figsize'] = (15, 8)

## 1. Prepare Data

In [None]:
# Load data
ticker = 'AAPL'
loader = DataLoader(ticker, period='2y')
data = loader.fetch_data()

# Add technical indicators
data_with_indicators = TechnicalIndicators.add_all_indicators(data)

# Split data
train_size = int(len(data_with_indicators) * 0.8)
train_data = data_with_indicators[:train_size]
test_data = data_with_indicators[train_size:]

print(f"Training samples: {len(train_data)}")
print(f"Testing samples: {len(test_data)}")

## 2. Initialize RL Agent

In [None]:
# Configure agent
rl_config = RL_CONFIG.copy()
rl_config['total_timesteps'] = 50000  # Reduce for faster training

agent = TradingAgent(rl_config, ENV_CONFIG)
print(f"Trading agent initialized with {rl_config['algorithm']}")

## 3. Train Agent

In [None]:
# Train the agent
print("Training RL agent... This may take several minutes.")
agent.train(train_data, verbose=1)

## 4. Backtest on Test Data

In [None]:
# Run backtest
metrics = agent.backtest(test_data)

print("Backtest Results:")
print("=" * 50)
print(f"Total Return: {metrics['total_return']:.2%}")
print(f"Final Value: ${metrics['final_value']:.2f}")
print(f"Number of Trades: {metrics['num_trades']}")
print(f"Sharpe Ratio: {metrics['sharpe_ratio']:.2f}")

## 5. Compare with Buy-and-Hold

In [None]:
# Calculate buy-and-hold return
initial_price = test_data['Close'].iloc[0]
final_price = test_data['Close'].iloc[-1]
buy_hold_return = (final_price - initial_price) / initial_price

initial_balance = ENV_CONFIG['initial_balance']
buy_hold_value = initial_balance * (1 + buy_hold_return)

print("Strategy Comparison:")
print("=" * 50)
print(f"RL Agent Return:     {metrics['total_return']:>8.2%}")
print(f"Buy-and-Hold Return: {buy_hold_return:>8.2%}")
print(f"\nRL Agent Value:      ${metrics['final_value']:>10.2f}")
print(f"Buy-and-Hold Value:  ${buy_hold_value:>10.2f}")
print(f"\nOutperformance:      {(metrics['total_return'] - buy_hold_return):>8.2%}")

## 6. Visualize Trading Actions

In [None]:
# Simulate episode to get actions
env = agent.create_environment(test_data)
obs, _ = env.reset()

actions = []
prices = []
values = []

done = False
while not done:
    action, _ = agent.model.predict(obs, deterministic=True)
    obs, reward, terminated, truncated, info = env.step(int(action))
    
    actions.append(int(action))
    prices.append(info['current_price'])
    values.append(info['total_value'])
    
    done = terminated or truncated

# Plot
fig, axes = plt.subplots(3, 1, figsize=(15, 12), sharex=True)

# Price with actions
axes[0].plot(prices, label='Price', linewidth=2)
buy_points = [i for i, a in enumerate(actions) if a == 1]
sell_points = [i for i, a in enumerate(actions) if a == 2]
axes[0].scatter(buy_points, [prices[i] for i in buy_points], 
                color='green', marker='^', s=100, label='Buy', zorder=5)
axes[0].scatter(sell_points, [prices[i] for i in sell_points], 
                color='red', marker='v', s=100, label='Sell', zorder=5)
axes[0].set_ylabel('Price ($)')
axes[0].set_title('Trading Actions')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Portfolio value
axes[1].plot(values, label='Portfolio Value', linewidth=2, color='purple')
axes[1].axhline(y=initial_balance, color='gray', linestyle='--', alpha=0.5, label='Initial Balance')
axes[1].set_ylabel('Value ($)')
axes[1].set_title('Portfolio Value Over Time')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

# Actions distribution
action_counts = [actions.count(0), actions.count(1), actions.count(2)]
axes[2].bar(['Hold', 'Buy', 'Sell'], action_counts, color=['gray', 'green', 'red'])
axes[2].set_ylabel('Count')
axes[2].set_title('Action Distribution')
axes[2].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

print(f"\nAction Distribution:")
print(f"Hold: {action_counts[0]} ({action_counts[0]/len(actions):.1%})")
print(f"Buy:  {action_counts[1]} ({action_counts[1]/len(actions):.1%})")
print(f"Sell: {action_counts[2]} ({action_counts[2]/len(actions):.1%})")

## 7. Test Different Configurations

In [None]:
# Test with different transaction costs
transaction_costs = [0.0001, 0.001, 0.005, 0.01]
results = []

for cost in transaction_costs:
    env_config = ENV_CONFIG.copy()
    env_config['transaction_cost'] = cost
    
    test_agent = TradingAgent(rl_config, env_config)
    test_agent.train(train_data, verbose=0)
    metrics = test_agent.backtest(test_data)
    
    results.append({
        'cost': cost,
        'return': metrics['total_return'],
        'sharpe': metrics['sharpe_ratio']
    })

# Plot results
fig, axes = plt.subplots(1, 2, figsize=(15, 5))

costs = [r['cost'] for r in results]
returns = [r['return'] for r in results]
sharpes = [r['sharpe'] for r in results]

axes[0].plot(costs, returns, 'o-', linewidth=2)
axes[0].set_xlabel('Transaction Cost')
axes[0].set_ylabel('Return')
axes[0].set_title('Return vs Transaction Cost')
axes[0].grid(True, alpha=0.3)

axes[1].plot(costs, sharpes, 'o-', linewidth=2, color='orange')
axes[1].set_xlabel('Transaction Cost')
axes[1].set_ylabel('Sharpe Ratio')
axes[1].set_title('Sharpe Ratio vs Transaction Cost')
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()