# Reinforcement Learning Based Trading Agent 

Follow the instructions step by step and fill in the TODOs


## 1. Install and Import Libraries

In [4]:

# Uncomment only if needed
# !pip install yfinance numpy pandas matplotlib
!pip install yfinance
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt




## 2. Download Market Data 

In [3]:
symbol = "AAPL" 


data = yf.download(symbol, start="2023-01-01", end="2024-01-01")

prices = data['Close'].values.flatten().tolist()

print("Trading days:", len(prices))
print("Sample price:", prices[0], type(prices[0]))

[*********************100%***********************]  1 of 1 completed

Trading days: 250
Sample price: 123.21119689941406 <class 'float'>





## 3. Trading Environment

In [5]:
import numpy as np

class TradingEnv:
    def __init__(self, prices):
        self.prices = prices
        self.reset()

    def reset(self):
        
        self.t = 0
        
       
        self.cash = 10000.0
        
        self.stock = 0
        
        
        self.done = False
        
       
        return self._get_state()

    def _get_state(self):
        return np.array([self.prices[self.t], self.stock], dtype=np.float32)

    def step(self, action):
        current_price = self.prices[self.t]

        if action == 1 and self.stock == 0 and self.cash >= current_price:
            self.cash -= current_price
            self.stock = 1

        elif action == 2 and self.stock == 1:
            self.cash += current_price
            self.stock = 0

       
        self.t += 1

        
        if self.t >= len(self.prices) - 1:
            self.done = True

        
        reward = self.cash + (self.stock * self.prices[self.t])

        
        return self._get_state(), reward, self.done

## 4. Q-Learning Setup

In [6]:

Q = np.zeros((len(prices), 3))

alpha = 0.1


gamma = 0.95


epsilon = 0.1

## 5. Train the Agent

In [7]:


env = TradingEnv(prices)


episodes = 1000


for episode in range(episodes):
    
    
    state = env.reset()

   
    while not env.done:
        
       
        t = env.t

       
        if np.random.rand() < epsilon:
            
            action = np.random.choice([0, 1, 2])
        else:
            
            action = np.argmax(Q[t])

        
        _, reward, done = env.step(action)

        
        if not done:
            best_future_q = np.max(Q[t + 1])
        else:
            best_future_q = 0

        Q[t, action] += alpha * (
            reward + 
            gamma * best_future_q - 
            Q[t, action]
        )


print(f"Training completed over {episodes} episodes.")



Training completed over 1000 episodes.


## 6. Evaluate Trained Agent

In [8]:


env = TradingEnv(prices)
state = env.reset()


while not env.done:
    
   
    t = env.t
    
    # TODO: select best action from Q-table (argmax means 0% exploration)
    action = np.argmax(Q[t])
    
    # TODO: apply action in environment
    # We step through the environment to update its internal state (cash/stock/t)
    env.step(action)

# TODO: compute final portfolio value
# The reward in our step function was defined as total portfolio value
# We can calculate it one last time at the final time step
final_value = env.cash + (env.stock * env.prices[env.t])

# TODO: print final result
print(f"Final Portfolio Value: ${final_value:,.2f}")
print(f"Total Profit/Loss: ${final_value - 10000:,.2f}")

Final Portfolio Value: $10,033.09
Total Profit/Loss: $33.09


## 7. Buy and Hold Baseline

In [9]:
# TODO: implement Buy-and-Hold baseline strategy
# Instructions:
# - Buy one stock on the first day
# - Hold it until the last day
# - Start with initial cash of 10000

# 1. Buy: Calculate how much cash is left after buying 1 stock at the first price
initial_cash = 10000
first_price = prices[0]
remaining_cash = initial_cash - first_price

# 2. Hold/Sell: Calculate value on the last day based on the final price
last_price = prices[-1]
buy_and_hold_value = remaining_cash + last_price

# TODO: print Buy-and-Hold portfolio value
print(f"Buy-and-Hold Final Value: ${buy_and_hold_value:,.2f}")
print(f"Buy-and-Hold Profit/Loss: ${buy_and_hold_value - 10000:,.2f}")

Buy-and-Hold Final Value: $10,067.52
Buy-and-Hold Profit/Loss: $67.52
