In [45]:
import os
import pandas as pd
import shutil

# Paths
meta_file_path = '/kaggle/input/stock-market-dataset/symbols_valid_meta.csv'
stocks_folder_path = '/kaggle/input/stock-market-dataset/stocks'
final_stocks_folder_path = '/kaggle/working/Final stocks'

# Load the CSV file containing metadata
df = pd.read_csv(meta_file_path)

# Define technology-related keywords
tech_keywords = ["Technology", "Tech", "Software", "Semiconductor", "Electronics", "IT", "Computing"]

# Filter stocks that contain any of the technology-related keywords in their name
tech_stocks = df[df['Security Name'].str.contains('|'.join(tech_keywords), case=False, na=False)]

# Select the first 20 stocks that match the criteria
tech_stocks_sample = tech_stocks.head(20)

# Create the "Final stocks" folder if it doesn't exist
os.makedirs(final_stocks_folder_path, exist_ok=True)

# Copy the CSV files of the selected stocks to the "Final stocks" folder
for symbol in tech_stocks_sample['Symbol']:
    stock_file_name = f"{symbol}.csv"
    source_file_path = os.path.join(stocks_folder_path, stock_file_name)
    destination_file_path = os.path.join(final_stocks_folder_path, stock_file_name)
    
    if os.path.exists(source_file_path):
        shutil.copy(source_file_path, destination_file_path)

print(f"Copied {len(tech_stocks_sample)} stock files to '{final_stocks_folder_path}'")


Copied 20 stock files to '/kaggle/working/Final stocks'


Custom trading Env with no indicators

In [46]:
import gym
from gym import spaces
import numpy as np
import pandas as pd

class SimplifiedTradingEnv(gym.Env):
    """A custom trading environment with no technical indicators."""
    def __init__(self, data, initial_balance=10000):
        super(SimplifiedTradingEnv, self).__init__()
        self.initial_balance = initial_balance
        self.data = data.copy()
        self.current_step = 0
        self.balance = initial_balance
        self.stock_held = 0
        self.done = False
        
        # Define action and observation space
        self.action_space = spaces.Discrete(3)  # 0 = Sell, 1 = Hold, 2 = Buy
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32)  # Reduced to balance, stock held, current price

    def reset(self):
        self.balance = self.initial_balance
        self.current_step = 0
        self.stock_held = 0
        self.done = False
        return self._get_observation()

    def _get_observation(self):
        current_price = self.data['Adj Close'].iloc[self.current_step]
        return np.array([self.balance, self.stock_held, current_price])

    def step(self, action):
        # Increment the current step
        self.current_step += 1

        # Ensure the current_step does not exceed the data length
        if self.current_step >= len(self.data):
            self.current_step = len(self.data) - 1
            self.done = True
        else:
            self.done = False

        # Get the current price
        current_price = self.data['Adj Close'].iloc[self.current_step]

        # Execute the action
        if action == 0:  # Sell
            if self.stock_held > 0:
                self.balance += self.stock_held * current_price
                self.stock_held = 0
                reward = self.stock_held * current_price * 0.01  # Reward based on a fraction of the sell value
            else:
                reward = -1  # Lower penalty for trying to sell with no stock held
        elif action == 2:  # Buy
            if self.balance >= current_price:
                self.stock_held = self.balance / current_price
                self.balance = 0
                reward = -1  # Lower penalty for each buy action
            else:
                reward = -1  # Lower penalty if unable to buy due to insufficient balance
        else:  # Hold
            reward = 0  # No reward or penalty for holding

        # Calculate the next portfolio value
        if self.current_step + 1 < len(self.data):
            next_price = self.data['Adj Close'].iloc[self.current_step + 1]
        else:
            next_price = current_price

        portfolio_value = self.balance + self.stock_held * next_price
        obs = self._get_observation()

        return obs, reward, self.done, {}



    def render(self, mode='human'):
        print(f'Step: {self.current_step}, Balance: {self.balance}, Stock Held: {self.stock_held}, Portfolio Value: {self.balance + self.stock_held * self.data["Adj Close"].iloc[self.current_step]}')

    def close(self):
        pass


In [47]:
import torch
import torch.nn as nn
import torch.optim as optim

class AlphaZeroNetwork(nn.Module):
    def __init__(self, input_dim, action_dim):
        super(AlphaZeroNetwork, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.fc2 = nn.Linear(256, 128)

        # Output for policy (action probabilities)
        self.policy_head = nn.Linear(128, action_dim)
        self.softmax = nn.Softmax(dim=-1)

        # Output for value (expected reward)
        self.value_head = nn.Linear(128, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))

        policy = self.softmax(self.policy_head(x))
        value = torch.tanh(self.value_head(x))

        return policy, value


In [48]:
from collections import defaultdict
import torch
import numpy as np

class MCTS:
    def __init__(self, network, env, num_simulations=100):
        self.network = network
        self.env = env
        self.num_simulations = num_simulations
        self.Q = defaultdict(float)  # Q-values
        self.N = defaultdict(int)    # Visit counts
        self.P = {}                  # Policy estimates from the network

    def run(self, state):
        for _ in range(self.num_simulations):
            self.simulate(state)
        
        # Convert state to a tuple for hashing
        state_key = tuple(state)
        
        # Select action with maximum visit count
        action_visits = {a: self.N[(state_key, a)] for a in range(self.env.action_space.n)}
        best_action = max(action_visits, key=action_visits.get)
        return best_action

    def simulate(self, state):
        path = []
        while True:
            # Convert state to a tuple for hashing
            state_key = tuple(state)
            
            # Move state_tensor to the same device as the network
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(next(self.network.parameters()).device)
            
            # Forward pass through the network
            policy, value = self.network(state_tensor)
            policy, value = policy.detach().cpu().numpy()[0], value.item()  # Detach and move to CPU for further processing

            total_N = sum(self.N[(state_key, a)] for a in range(self.env.action_space.n))
            ucb1 = {
                a: self.Q[(state_key, a)] + policy[a] * np.sqrt(total_N) / (1 + self.N[(state_key, a)])
                for a in range(self.env.action_space.n)
            }
            action = max(ucb1, key=ucb1.get)

            next_state, reward, done, _ = self.env.step(action)
            path.append((state_key, action))

            if done:
                break
            
            state = next_state

        # Backpropagate the value along the path
        for state_key, action in path:
            self.N[(state_key, action)] += 1
            self.Q[(state_key, action)] += (value - self.Q[(state_key, action)]) / self.N[(state_key, action)]


In [49]:
import torch

def train_alpha_zero(env, network, optimizer, num_episodes=1000, num_simulations=50, device=torch.device("cuda")):
    """
    Trains the AlphaZero agent in the provided environment.

    Args:
    - env: The trading environment.
    - network: The AlphaZero network that outputs both policy and value estimates.
    - optimizer: The optimizer used for training the network.
    - num_episodes: Number of training episodes.
    - num_simulations: Number of MCTS simulations to run per action decision.
    - device: The device (CPU or GPU) on which the computations will be performed.
    """
    network.to(device)  # Ensure the network is on the correct device

    for episode in range(num_episodes):
        state = env.reset()  # Reset the environment to the initial state
        done = False
        episode_memory = []
        mcts = MCTS(network, env, num_simulations=num_simulations)

        while not done:
            action = mcts.run(state)  # Use MCTS to decide the best action
            next_state, reward, done, _ = env.step(action)  # Take the action in the environment
            episode_memory.append((state, action, reward))  # Store the state, action, and reward
            state = next_state  # Move to the next state

        # Train the network based on the episode's experience
        train_on_episode(episode_memory, network, optimizer, device)
        print(f"Episode {episode + 1}/{num_episodes} complete.")

def train_on_episode(episode_memory, network, optimizer, device=torch.device("cuda")):
    """
    Trains the AlphaZero network on a single episode's experience.

    Args:
    - episode_memory: List of (state, action, reward) tuples from a single episode.
    - network: The AlphaZero network that outputs both policy and value estimates.
    - optimizer: The optimizer used for training the network.
    - device: The device (CPU or GPU) on which the computations will be performed.
    """
    states, actions, rewards = zip(*episode_memory)  # Unpack the episode's experience

    # Convert to tensors and move to the correct device
    states = torch.tensor(states, dtype=torch.float32).to(device)
    actions = torch.tensor(actions, dtype=torch.long).to(device)
    rewards = torch.tensor(rewards, dtype=torch.float32).to(device)

    optimizer.zero_grad()

    # Forward pass through the network
    policy, value = network(states)

    # Calculate the policy loss (cross-entropy loss)
    policy_loss = -torch.mean(torch.log(policy.gather(1, actions.unsqueeze(1)).squeeze()) * rewards)

    # Calculate the value loss (mean squared error)
    value_loss = torch.mean((value.squeeze() - rewards) ** 2)

    # Total loss
    loss = policy_loss + value_loss
    loss.backward()  # Backpropagate the loss
    optimizer.step()  # Update the network's weights


In [50]:
import numpy as np
import torch

def evaluate_alpha_zero(env, network, num_episodes=10, device=torch.device("cuda")):
    total_rewards = []
    total_profits = []
    
    for episode in range(num_episodes):
        state = env.reset()
        done = False
        total_reward = 0
        initial_net_worth = env.initial_balance
        total_profit = 0

        while not done:
            state_tensor = torch.tensor(state, dtype=torch.float32).unsqueeze(0).to(device)
            policy, _ = network(state_tensor)
            action = torch.argmax(policy).item()
            
            # Print action and state for diagnostic purposes

            state, reward, done, _ = env.step(action)
            total_reward += reward
            
            # Calculate the current net worth
            current_price = state[2]  # The third element in state is the current price
            current_net_worth = env.balance + env.stock_held * current_price
            profit = current_net_worth - initial_net_worth
            
            total_profit = profit  # Since profit is calculated relative to the initial balance

        total_rewards.append(total_reward)
        total_profits.append(total_profit)

    average_reward = np.mean(total_rewards)
    average_profit = np.mean(total_profits)
    
    print(f"Average Reward over {num_episodes} episodes: {average_reward}")
    print(f"Average Profit over {num_episodes} episodes: {average_profit}")
    
    return average_reward, average_profit


In [51]:
import os
import pandas as pd

# Paths
folder_path = '/kaggle/working/Final stocks'  # Path to the folder containing the stock CSVs

# Loop through each CSV file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith('.csv'):
        # Construct the full file path
        file_path = os.path.join(folder_path, filename)
        
        # Read the CSV file
        df = pd.read_csv(file_path)
        
        # Convert the 'Date' column to datetime format
        df['Date'] = pd.to_datetime(df['Date'])
        
        # Create training data for 2018-2019
        train_df = df[(df['Date'] >= '2018-01-01') & (df['Date'] <= '2019-12-31')]
        
        # Create testing data for 2020
        test_df = df[(df['Date'] >= '2020-01-01') & (df['Date'] <= '2020-12-31')]
        
        # Get the stock symbol from the filename (e.g., 'AA' from 'AA.csv')
        symbol = os.path.splitext(filename)[0]
        
        # Dynamically create variable names for train and test DataFrames
        globals()[f"train_{symbol}_df"] = train_df
        globals()[f"test_{symbol}_df"] = test_df

        print(f"DataFrames for {symbol} created: train_{symbol}_df, test_{symbol}_df")


DataFrames for AB created: train_AB_df, test_AB_df
DataFrames for ABEV created: train_ABEV_df, test_ABEV_df
DataFrames for ACBI created: train_ACBI_df, test_ACBI_df
DataFrames for ACGLO created: train_ACGLO_df, test_ACGLO_df
DataFrames for AACG created: train_AACG_df, test_AACG_df
DataFrames for ACTTU created: train_ACTTU_df, test_ACTTU_df
DataFrames for ACH created: train_ACH_df, test_ACH_df
DataFrames for ACGL created: train_ACGL_df, test_ACGL_df
DataFrames for ACP created: train_ACP_df, test_ACP_df
DataFrames for AAOI created: train_AAOI_df, test_AAOI_df
DataFrames for ACTT created: train_ACTT_df, test_ACTT_df
DataFrames for ACAMU created: train_ACAMU_df, test_ACAMU_df
DataFrames for ACC created: train_ACC_df, test_ACC_df
DataFrames for ACGLP created: train_ACGLP_df, test_ACGLP_df
DataFrames for ACLS created: train_ACLS_df, test_ACLS_df
DataFrames for A created: train_A_df, test_A_df
DataFrames for ACAM created: train_ACAM_df, test_ACAM_df
DataFrames for AC created: train_AC_df, tes

In [36]:
print(device)

cuda


In [37]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_AB_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_AB_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [38]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ABEV_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ABEV_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [39]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ACP_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ACP_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [40]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ACC_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ACC_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [41]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ACAM_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ACAM_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [42]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_AACG_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_AACG_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [52]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ACBI_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ACBI_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [53]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_A_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_A_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [54]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ACLS_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ACLS_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl

In [55]:
import torch
import torch.optim as optim

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the training environment
train_env = SimplifiedTradingEnv(train_ACAMU_df)

# Initialize the AlphaZero network
input_dim = train_env.observation_space.shape[0]
action_dim = train_env.action_space.n
network = AlphaZeroNetwork(input_dim, action_dim).to(device)  # Move the network to GPU

# Set up the optimizer
optimizer = optim.Adam(network.parameters(), lr=0.0001)

# Train the AlphaZero agent
train_alpha_zero(train_env, network, optimizer, num_episodes=1000, num_simulations=50, device=device)

# Evaluate the AlphaZero agent on the training environment
average_reward = evaluate_alpha_zero(train_env, network, num_episodes=500)

# Test the AlphaZero agent on the test environment
test_env = SimplifiedTradingEnv(test_ACAMU_df)
average_reward = evaluate_alpha_zero(test_env, network, num_episodes=500)


Episode 1/1000 complete.
Episode 2/1000 complete.
Episode 3/1000 complete.
Episode 4/1000 complete.
Episode 5/1000 complete.
Episode 6/1000 complete.
Episode 7/1000 complete.
Episode 8/1000 complete.
Episode 9/1000 complete.
Episode 10/1000 complete.
Episode 11/1000 complete.
Episode 12/1000 complete.
Episode 13/1000 complete.
Episode 14/1000 complete.
Episode 15/1000 complete.
Episode 16/1000 complete.
Episode 17/1000 complete.
Episode 18/1000 complete.
Episode 19/1000 complete.
Episode 20/1000 complete.
Episode 21/1000 complete.
Episode 22/1000 complete.
Episode 23/1000 complete.
Episode 24/1000 complete.
Episode 25/1000 complete.
Episode 26/1000 complete.
Episode 27/1000 complete.
Episode 28/1000 complete.
Episode 29/1000 complete.
Episode 30/1000 complete.
Episode 31/1000 complete.
Episode 32/1000 complete.
Episode 33/1000 complete.
Episode 34/1000 complete.
Episode 35/1000 complete.
Episode 36/1000 complete.
Episode 37/1000 complete.
Episode 38/1000 complete.
Episode 39/1000 compl