In [1]:
import pandas as pd
import numpy as np

import plotly.express as px

import sys
sys.dont_write_bytecode = True

In [2]:
data_path = '../data/TSLA_stock.csv'
df = pd.read_csv(data_path)
df = df.drop(columns=['Adj Close'])
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)

In [3]:
display(df)

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2010-06-29,3.800000,5.000000,3.508000,4.778000,93831500
2010-06-30,5.158000,6.084000,4.660000,4.766000,85935500
2010-07-01,5.000000,5.184000,4.054000,4.392000,41094000
2010-07-02,4.600000,4.620000,3.742000,3.840000,25699000
2010-07-06,4.000000,4.000000,3.166000,3.222000,34334500
...,...,...,...,...,...
2022-03-18,874.489990,907.849976,867.390015,905.390015,33408500
2022-03-21,914.979980,942.849976,907.090027,921.159973,27327200
2022-03-22,930.000000,997.859985,921.750000,993.979980,35289500
2022-03-23,979.940002,1040.699951,976.400024,999.109985,40225400


In [4]:
from MultiArmedBanditTrader import MultiArmedBanditTrader
from TradingAgent import TradingAgent

from TradingEnvironment import TradingEnvironment
from DeepTradingEnvironment import DeepTradingEnvironment

##### **Regular Agent**

In [5]:
trading_env = TradingEnvironment(df)
agent = MultiArmedBanditTrader(epsilon=0.5)

In [6]:
total_rewards = []

for episode in range(100): # nb of simulation episodes

    trading_env.current_step = 0
    trading_env.balance = 10000
    trading_env.shares = 0

    agent.q_values = {action: 0.0 for action in agent.actions}
    agent.action_counts = {action: 0 for action in agent.actions}

    episode_reward = 0
    done = False

    while not done:
        
        action = agent.choose_action()
        reward, done = trading_env.step(action)
        agent.update_q_values(action, reward)
        episode_reward += reward
    
    total_rewards.append(episode_reward)

##### **Random Agent**

In [7]:
trading_env = TradingEnvironment(df)
random_agent = MultiArmedBanditTrader(epsilon=1.0)

In [8]:
random_rewards = []

for episode in range(100):
   
    trading_env.current_step = 0
    trading_env.balance = 10000
    trading_env.shares = 0
    random_agent.q_values = {action: 0.0 for action in random_agent.actions}
    random_agent.action_counts = {action: 0 for action in random_agent.actions}

    episode_reward = 0
    done = False

    while not done:
        action = random_agent.choose_action()
        reward, done = trading_env.step(action)
        random_agent.update_q_values(action, reward)
        episode_reward += reward
    
    random_rewards.append(episode_reward)

##### **DQN Agent** 

In [9]:
trading_env = DeepTradingEnvironment(df, window_size=128)

In [10]:
state_size = trading_env.window_size
action_size = 3 # Buy, Sell, Hold
agent = TradingAgent(state_size, action_size, df.shape[1])

num_episodes = 100
dqn_rewards = []

for episode in range(num_episodes):
    
    state = trading_env.reset() # return a window (slice) of the dataframe
    episode_reward = 0

    while not trading_env.done:
            
        action = agent.choose_action(state) # Select the action
        next_state, reward, done = trading_env.step(action) # Execute the action
        agent.remember(state, action, reward, next_state, done) # Store the experience
        state = next_state
        episode_reward += reward

    dqn_rewards.append(episode_reward)
    
    agent.replay() # update model's weights
    print(f'Episode {episode+1}/{num_episodes}, Reward: {episode_reward:.2f}, Epsilon: {agent.epsilon:.2f}')

Episode 1/100, Reward: 425478.42, Epsilon: 0.99
Episode 2/100, Reward: 4968738.25, Epsilon: 0.98
Episode 3/100, Reward: 5459081.59, Epsilon: 0.97
Episode 4/100, Reward: 1128847.90, Epsilon: 0.96
Episode 5/100, Reward: 895190.69, Epsilon: 0.95
Episode 6/100, Reward: 292512.52, Epsilon: 0.94
Episode 7/100, Reward: 4505995.04, Epsilon: 0.93
Episode 8/100, Reward: 826828.44, Epsilon: 0.92
Episode 9/100, Reward: 647579.40, Epsilon: 0.91
Episode 10/100, Reward: 6166268.10, Epsilon: 0.90
Episode 11/100, Reward: 4456921.01, Epsilon: 0.90
Episode 12/100, Reward: 2260936.22, Epsilon: 0.89
Episode 13/100, Reward: 4581010.23, Epsilon: 0.88
Episode 14/100, Reward: 96840.27, Epsilon: 0.87
Episode 15/100, Reward: 1744473.15, Epsilon: 0.86
Episode 16/100, Reward: 3997879.96, Epsilon: 0.85
Episode 17/100, Reward: 105479.00, Epsilon: 0.84
Episode 18/100, Reward: 616978.56, Epsilon: 0.83
Episode 19/100, Reward: 11045225.04, Epsilon: 0.83
Episode 20/100, Reward: 1746359.07, Epsilon: 0.82
Episode 21/100, R

##### **Results**

In [11]:
data = pd.DataFrame({'Episode': range(len(total_rewards)), 
                     'Regular Agent': total_rewards,
                     'Random Agent': random_rewards,
                     'DQN Agent': dqn_rewards
                     })

fig = px.line(data, x='Episode', y=['Regular Agent', 'Random Agent', 'DQN Agent'], 
              title='Trading performances among some agents', 
              labels={'Episode':'Runs', 'value':'Total Rewards', 'Total Reward': 'Regular Agent'})

fig.show()

In [12]:
print(f'Average reward per season for Regular Agent {np.mean(total_rewards).item():.2f}')
print(f'Average reward per season for Random Agent {np.mean(random_rewards).item():.2f}')
print(f'Average reward per season for DQN Agent {np.mean(dqn_rewards).item():.2f}')

Average reward per season for Regular Agent 124127.06
Average reward per season for Random Agent 197594.84
Average reward per season for DQN Agent 2325446.09
