In [110]:
import os

# Print the current working directory
print(os.getcwd())
# List all files in the current directory
print(os.listdir())

/Users/tom1/Documents/GitHub/Blockhouse-Work-Trial/code
['data_intergration.ipynb', 'TomiCode.py', 'Readme.md', 'benchmark_costs_script.py']


In [111]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Load market data
file_path = '../data/AAPL_Quotes_Data.csv'
data = pd.read_csv(file_path)

# Verify the columns in the loaded DataFrame
print("Columns after loading data:", data.columns)

# Verify and rename columns to match expected names
expected_columns = {'Close': 'close', 'Volume': 'volume', 'Ask_price_1': 'ask_price_1', 'Bid_price_1': 'bid_price_1'}
for col, new_col in expected_columns.items():
    if col in data.columns:
        data.rename(columns={col: new_col}, inplace=True)

# Print columns after renaming
print("Columns after renaming:", data.columns)

# Add placeholder columns for ask_price_1 and bid_price_1 if necessary
if 'ask_price_1' not in data.columns:
    data['ask_price_1'] = np.nan  # or 0
if 'bid_price_1' not in data.columns:
    data['bid_price_1'] = np.nan  # or 0

# Print columns after adding placeholder columns
print("Columns after adding placeholders:", data.columns)

# Normalize the necessary columns
scaler = MinMaxScaler()
try:
    data[['close', 'ask_price_1', 'bid_price_1', 'volume']] = scaler.fit_transform(data[['close', 'ask_price_1', 'bid_price_1', 'volume']])
except KeyError as e:
    print("Error with columns:", e)
    print("Available columns at scaling:", data.columns)

# Ensure data is sorted by 'Date'
# if 'Date' in data.columns:
#     data.sort_values(by='Date', inplace=True)
#     data.reset_index(drop=True, inplace=True)
# else:
#     raise KeyError("'Date' column not found to sort the DataFrame")

Columns after loading data: Index(['timestamp', 'bid_price_1', 'bid_price_2', 'bid_price_3', 'bid_price_4',
       'bid_price_5', 'bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4',
       'bid_size_5', 'ask_price_1', 'ask_price_2', 'ask_price_3',
       'ask_price_4', 'ask_price_5', 'ask_size_1', 'ask_size_2', 'ask_size_3',
       'ask_size_4', 'ask_size_5'],
      dtype='object')
Columns after renaming: Index(['timestamp', 'bid_price_1', 'bid_price_2', 'bid_price_3', 'bid_price_4',
       'bid_price_5', 'bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4',
       'bid_size_5', 'ask_price_1', 'ask_price_2', 'ask_price_3',
       'ask_price_4', 'ask_price_5', 'ask_size_1', 'ask_size_2', 'ask_size_3',
       'ask_size_4', 'ask_size_5'],
      dtype='object')
Columns after adding placeholders: Index(['timestamp', 'bid_price_1', 'bid_price_2', 'bid_price_3', 'bid_price_4',
       'bid_price_5', 'bid_size_1', 'bid_size_2', 'bid_size_3', 'bid_size_4',
       'bid_size_5', 'ask_price_1

In [112]:
import gym
from gym import spaces
import numpy as np

class CustomTradingEnv(gym.Env):
    def __init__(self, data, initial_inventory=1000):
        super(CustomTradingEnv, self).__init__()
        self.data = data
        self.inventory = initial_inventory
        self.current_step = 0
        self.action_space = spaces.Discrete(3)  # Actions: sell_some, sell_more, do_nothing
        self.observation_space = spaces.Box(low=0, high=1, shape=(data.shape[1],), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.inventory = 1000
        return self.data.iloc[self.current_step].values

    def step(self, action):
        current_state = self.data.iloc[self.current_step].values
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1 or self.inventory <= 0

        # Define how much to sell based on action
        if action == 0:  # sell_some
            traded_shares = min(10, self.inventory)
        elif action == 1:  # sell_more
            traded_shares = min(20, self.inventory)
        else:  # do_nothing
            traded_shares = 0

        # Simulate the trade execution
        execution_price = self.data.iloc[self.current_step]['close']
        reward = - (execution_price * traded_shares)  # Negative because we want to minimize cost
        self.inventory -= traded_shares

        next_state = self.data.iloc[self.current_step].values
        return next_state, reward, done, {}

    def render(self):
        pass

data = pd.read_csv('../data/AAPL_Quotes_Data.csv')
env = CustomTradingEnv(data)

In [113]:
import gymnasium as gym
from gymnasium import spaces
import numpy as np

class CustomTradingEnv(gym.Env):
    """
    Custom Trading Environment that follows Gymnasium interface.
    """
    
    def __init__(self, data):
        super(CustomTradingEnv, self).__init__()
        self.data = data
        self.current_step = 0
        
        # Define action and observation space
        self.action_space = spaces.Discrete(3)  # Example: Buy, Hold, Sell
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(data.shape[1],), dtype=np.float32)  # Adjust based on observation structure

    def reset(self):
        """
        Reset the environment to the initial state and return the initial observation.
        """
        self.current_step = 0
        return self._next_observation()

    def step(self, action):
        """
        Execute one time step within the environment.

        Args:
            action (int): The action taken by the agent.

        Returns:
            observation (array): Next observation.
            reward (float): Rewardearned for the action.
            done (bool): Whether the episode has ended.
            info (dict): Additional information.
        """
        self.current_step += 1
        reward = 0  # Define the reward logic here
        done = self.current_step >= len(self.data)
        return self._next_observation(), reward, done, {}

    def _next_observation(self):
        """
        Get the next observation.
        """
        return self.data[self.current_step]

    def render(self, mode='human'):
        """
        Render the environment.
        """
        pass

    def close(self):
        """
        Clean up resources.
        """
        pass

In [114]:
import pandas as pd
import numpy as np
from stable_baselines3 import SAC
from stable_baselines3.common.env_checker import check_env
import gym

# Example data creation for demonstration purposes
data = pd.DataFrame({
    'close': np.random.rand(100),  # This should be actual market data 'close' prices
    'volume': np.random.randint(1, 100, 100)  # This should be actual market data 'volume'
})

def twap_strategy(initial_inventory, num_steps):
    return [(initial_inventory // num_steps) for _ in range(num_steps)]

def vwap_strategy(data, initial_inventory):
    total_volume = data['volume'].sum()
    return [(initial_inventory * (volume / total_volume)) for volume in data['volume']]

def backtest_strategy(strategy, data, initial_inventory):
    total_cost = 0
    inventory = initial_inventory
    for step, shares_to_trade in enumerate(strategy):
        execution_price = data.iloc[step]['close']
        total_cost += execution_price * shares_to_trade
        inventory -= shares_to_trade
    return total_cost

# Check if 'close' column exists
assert 'close' in data.columns, "'close' column is missing from data"

# Backtest TWAP
twap_trades = twap_strategy(1000, len(data))
twap_cost = backtest_strategy(twap_trades, data, 1000)

# Backtest VWAP
vwap_trades = vwap_strategy(data, 1000)
vwap_cost = backtest_strategy(vwap_trades, data, 1000)

# Define the custom environment for SAC
class CustomTradingEnv(gym.Env):
    def __init__(self, data):
        super(CustomTradingEnv, self).__init__()
        self.data = data
        self.action_space = gym.spaces.Discrete(len(data))
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(len(data.columns),), dtype=np.float32)
        self.current_step = 0

    def reset(self):
        self.current_step = 0
        return self.data.iloc[self.current_step].values

    def step(self, action):
        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = self.data.iloc[self.current_step].values if not done else None
        reward = -abs(action - self.data['close'].iloc[self.current_step])  
        return obs, reward, done, {}

# Verify the custom environment
env = CustomTradingEnv(data)
check_env(env)

# Define and train the SAC model
model = SAC('MlpPolicy', env, verbose=1)
model.learn(total_timesteps=10000)

# Optional: Save the trained model
model.save("sac_trading_model")

# Backtest the SAC model
obs = env.reset()
done = False
sac_total_cost = 0
while not done:
    action, _states = model.predict(obs)
    obs, rewards, done, info = env.step(action)
    sac_total_cost += rewards

print(f"TWAP Cost: {twap_cost}")
print(f"VWAP Cost: {vwap_cost}")
print(f"SAC Model Cost: {sac_total_cost}")

AssertionError: Your environment must inherit from the gymnasium.Env class cf. https://gymnasium.farama.org/api/env/

In [None]:
obs = env.reset()  # Reset the environment to the initial state
for _ in range(len(data)):
    action, _states = model.predict(obs, deterministic=True)  # Predict action based on current observation
    obs, reward, done, info = env.step(action)  # Execute action in the environment
    if done:
        break  # Break if the episode is done

# Print final results
print(f"Final inventory: {env.inventory}")
print(f"Final cash: {env.cash}")

NameError: name 'model' is not defined