# Reinforcement Learning for Trading

This notebook demonstrates a simple Q-Learning agent learning to trade in a synthetic market environment.

## 1. Imports

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from trading_env import TradingEnv
from q_learning_agent import QLearningAgent

%matplotlib inline

## 2. Training Loop

We train the agent for a number of episodes. The agent explores the environment and updates its Q-Table.

In [None]:
EPISODES = 1000
env = TradingEnv(max_steps=100)
agent = QLearningAgent(state_size=env.state_space_size, action_size=env.action_space_size)

portfolio_values = []
rewards_history = []

for e in range(EPISODES):
    state = env.reset()
    total_reward = 0
    done = False
    
    while not done:
        action = agent.get_action(state)
        next_state, reward, done, info = env.step(action)
        agent.learn(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward
        
    # End of episode
    agent.decay_epsilon()
    portfolio_values.append(info['net_worth'])
    rewards_history.append(total_reward)
    
    if (e + 1) % 100 == 0:
        print(f"Episode {e+1}/{EPISODES} - Net Worth: {info['net_worth']:.2f} - Epsilon: {agent.epsilon:.2f}")

## 3. Results Visualization
Plotting the final portfolio value per episode to see if the agent improves.

In [None]:
plt.figure(figsize=(12, 6))
plt.plot(portfolio_values)
plt.title('Agent Net Worth per Episode')
plt.xlabel('Episode')
plt.ylabel('Net Worth ($)')
plt.axhline(y=10000, color='r', linestyle='--', label='Initial Balance')
plt.legend()
plt.show()

## 4. Test Run
Run one episode with exploration disabled (Greedy policy) to see the learned behavior.

In [None]:
test_env = TradingEnv(max_steps=200)
state = test_env.reset()
done = False
agent.epsilon = 0 # Disable exploration

history = {'price': [], 'net_worth': []}

while not done:
    action = agent.get_action(state)
    state, _, done, info = test_env.step(action)
    history['price'].append(info['price'])
    history['net_worth'].append(info['net_worth'])

fig, ax1 = plt.subplots(figsize=(12, 6))

ax1.set_xlabel('Step')
ax1.set_ylabel('Price', color='tab:blue')
ax1.plot(history['price'], color='tab:blue', label='Price')
ax1.tick_params(axis='y', labelcolor='tab:blue')

ax2 = ax1.twinx()
ax2.set_ylabel('Net Worth', color='tab:green')
ax2.plot(history['net_worth'], color='tab:green', label='Net Worth', linestyle='--')
ax2.tick_params(axis='y', labelcolor='tab:green')

plt.title('Test Episode Performance')
plt.show()