## Importing Libraries

In [4]:
import pandas as pd
import numpy as np

import ta
from stable_baselines3 import PPO

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch.nn.functional import mse_loss
from torch.optim.lr_scheduler import ReduceLROnPlateau

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer

from scipy.stats import norm

import gym
from gym import spaces


## Creation of Technical Indicators as specified in PPO

In [5]:
class TechnicalIndicators:
    def __init__(self, data):
        self.data = data

    def add_momentum_indicators(self):
        # Using ta.momentum.RSIIndicator
        rsi_indicator = ta.momentum.RSIIndicator(self.data['Close'], window=14)
        self.data['RSI'] = rsi_indicator.rsi()

        # Using ta.trend.MACD
        macd = ta.trend.MACD(self.data['Close'])
        self.data['MACD'] = macd.macd()
        self.data['MACD_signal'] = macd.macd_signal()
        self.data['MACD_hist'] = macd.macd_diff()

        # Using ta.momentum.StochasticOscillator
        stoch = ta.momentum.StochasticOscillator(self.data['High'], self.data['Low'], self.data['Close'],
                                                window=14, smooth_window=3)
        self.data['Stoch_k'] = stoch.stoch()
        self.data['Stoch_d'] = stoch.stoch_signal()

    def add_volume_indicators(self):
        # Using ta.volume.OnBalanceVolumeIndicator
        obv = ta.volume.OnBalanceVolumeIndicator(self.data['Close'], self.data['Volume'])
        self.data['OBV'] = obv.on_balance_volume()

    def add_volatility_indicators(self):
        # Using ta.volatility.BollingerBands
        bb = ta.volatility.BollingerBands(self.data['Close'], window=20)
        self.data['Upper_BB'] = bb.bollinger_hband()
        self.data['Middle_BB'] = bb.bollinger_mavg()
        self.data['Lower_BB'] = bb.bollinger_lband()

        # Using ta.volatility.AverageTrueRange
        atr = ta.volatility.AverageTrueRange(self.data['High'], self.data['Low'], self.data['Close'], window=1)
        self.data['ATR_1'] = atr.average_true_range()
        atr.window = 2
        self.data['ATR_2'] = atr.average_true_range()
        atr.window = 5
        self.data['ATR_5'] = atr.average_true_range()
        atr.window = 10
        self.data['ATR_10'] = atr.average_true_range()
        atr.window = 20
        self.data['ATR_20'] = atr.average_true_range()

    def add_trend_indicators(self):
        # Using ta.trend.ADXIndicator
        adx = ta.trend.ADXIndicator(self.data['High'], self.data['Low'], self.data['Close'], window=14)
        self.data['ADX'] = adx.adx()
        self.data['+DI'] = adx.adx_pos()
        self.data['-DI'] = adx.adx_neg()

        # Using ta.trend.CCIIndicator
        cci = ta.trend.CCIIndicator(self.data['High'], self.data['Low'], self.data['Close'], window=5)
        self.data['CCI'] = cci.cci()

    def add_other_indicators(self):
        # Calculate other custom indicators
        self.data['DLR'] = np.log(self.data['Close'] / self.data['Close'].shift(1))
        self.data['TWAP'] = self.data['Close'].expanding().mean()
        self.data['VWAP'] = (self.data['Volume'] * (self.data['High'] + self.data['Low']) / 2).cumsum() / self.data['Volume'].cumsum()

    def add_all_indicators(self):
        self.add_momentum_indicators()
        self.add_volume_indicators()
        self.add_volatility_indicators()
        self.add_trend_indicators()
        self.add_other_indicators()
        return self.data


## PreProcessing Data

In [9]:
data = pd.read_csv('/kaggle/input/trade-recommendations-dataset/xnas-itch-20230703.tbbo.csv')

data['transformer'] = 0

pd.set_option('display.max_columns', None)

# Preprocessing to create necessary columns
data['price']=data['price']/1e9
data['bid_px_00']=data['bid_px_00']/1e9
data['ask_px_00']=data['ask_px_00']/1e9

data['Close'] = data['price']
data['Volume'] = data['size']
data['High'] = data[['bid_px_00', 'ask_px_00']].max(axis=1)
data['Low'] = data[['bid_px_00', 'ask_px_00']].min(axis=1)
data['Open'] = data['Close'].shift(1).fillna(data['Close'])


ti = TechnicalIndicators(data)
df_with_indicators = ti.add_all_indicators()

In [10]:
df_with_indicators

Unnamed: 0,ts_recv,ts_event,rtype,publisher_id,instrument_id,action,side,depth,price,size,flags,ts_in_delta,sequence,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,transformer,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
0,1688371200660869841,1688371200660704717,1,2,32,T,B,0,194.12,1,130,165124,303634,193.63,194.12,27,27,1,1,AAPL,0,194.12,1,194.12,193.63,194.12,,,,,,,1,,,,0.49,0.49,0.49,0.49,0.49,0.000000,0.000000,0.000000,,,194.120000,193.875000
1,1688371201201402566,1688371201201237816,1,2,32,T,B,0,194.11,2,130,164750,304724,193.90,194.11,5,400,1,1,AAPL,0,194.11,2,194.11,193.90,194.12,,,,,,,-1,,,,0.22,0.22,0.22,0.22,0.22,0.000000,0.000000,0.000000,,-0.000052,194.115000,193.961667
2,1688371201233688992,1688371201233524761,1,2,32,T,B,0,194.11,8,130,164231,304850,193.90,194.11,5,398,1,1,AAPL,0,194.11,8,194.11,193.90,194.11,,,,,,,7,,,,0.21,0.21,0.21,0.21,0.21,0.000000,0.000000,0.000000,,0.000000,194.113333,193.993182
3,1688371201317556361,1688371201317392163,1,2,32,T,B,0,194.11,2,130,164198,305101,193.90,194.11,5,390,1,1,AAPL,0,194.11,2,194.11,193.90,194.11,,,,,,,9,,,,0.21,0.21,0.21,0.21,0.21,0.000000,0.000000,0.000000,,0.000000,194.112500,193.995000
4,1688371201478520666,1688371201478356044,1,2,32,T,B,0,194.00,7,130,164622,306430,193.90,194.00,5,200,1,1,AAPL,0,194.00,7,194.00,193.90,194.11,,,,,,,2,,,,0.21,0.21,0.21,0.21,0.21,0.000000,0.000000,0.000000,-74.468085,-0.000567,194.090000,193.979250
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,1688417954514485218,1688417954514320323,1,2,32,T,B,0,192.44,6,130,164895,252532002,192.40,192.44,40,7,1,1,AAPL,0,192.44,6,192.44,192.40,192.40,63.061829,0.005002,0.006368,-0.001366,100.000000,33.333333,7305825,192.444977,192.4175,192.390023,0.04,0.04,0.04,0.04,0.04,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211
59267,1688417961020718430,1688417961020553920,1,2,32,T,B,0,192.44,1,130,164510,252532102,192.40,192.44,40,1,1,1,AAPL,0,192.44,1,192.44,192.40,192.44,63.061829,0.006355,0.006365,-0.000011,100.000000,66.666667,7305826,192.447411,192.4195,192.391589,0.04,0.04,0.04,0.04,0.04,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211
59268,1688417973297905504,1688417973297741235,1,2,32,T,A,0,192.40,5,130,164269,252532347,192.40,192.46,40,6,1,1,AAPL,0,192.40,5,192.46,192.40,192.44,44.499081,0.004151,0.005922,-0.001771,0.000000,66.666667,7305821,192.447411,192.4195,192.391589,0.06,0.06,0.06,0.06,0.06,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211
59269,1688417996889779362,1688417996889614660,1,2,32,T,B,0,192.45,3,130,164702,252532944,192.40,192.45,35,16,1,1,AAPL,0,192.45,3,192.45,192.40,192.40,60.250043,0.006366,0.006011,0.000355,83.333333,61.111111,7305824,192.451394,192.4220,192.392606,0.05,0.05,0.05,0.05,0.05,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211


In [11]:
# Dropping irrelevant columns
df_with_indicators = df_with_indicators.drop(['rtype', 'publisher_id', 'instrument_id', 'action', 'depth', 'sequence'], axis=1)

In [12]:
print(df_with_indicators['side'].unique())
print(df_with_indicators['flags'].unique())

['B' 'A' 'N']
[130   0 128]


In [16]:
# Converting columns having categorical values to numeric
market_features_df = df_with_indicators[35:]

side_mapping = {'A': 1, 'B': 2, 'N': 0}
market_features_df['side'] = market_features_df['side'].map(side_mapping)

flag_mapping = {130: 1, 128: 2, 0: 0}
market_features_df['flags'] = market_features_df['flags'].map(flag_mapping)
market_features_df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  market_features_df['side'] = market_features_df['side'].map(side_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  market_features_df['flags'] = market_features_df['flags'].map(flag_mapping)


Unnamed: 0,ts_recv,ts_event,side,price,size,flags,ts_in_delta,bid_px_00,ask_px_00,bid_sz_00,ask_sz_00,bid_ct_00,ask_ct_00,symbol,transformer,Close,Volume,High,Low,Open,RSI,MACD,MACD_signal,MACD_hist,Stoch_k,Stoch_d,OBV,Upper_BB,Middle_BB,Lower_BB,ATR_1,ATR_2,ATR_5,ATR_10,ATR_20,ADX,+DI,-DI,CCI,DLR,TWAP,VWAP
35,1688371214386057385,1688371214385893078,0,194.05,50,1,164307,194.0,194.30,3101,19,4,10,AAPL,0,194.05,50,194.30,194.0,194.05,66.345998,0.004881,-0.002740,0.007621,24.242424,52.525253,1339,194.065621,194.0170,193.968379,0.30,0.30,0.30,0.30,0.30,97.054201,29.840825,0.207333,166.666667,0.000000,194.020000,194.021894
36,1688371214386063777,1688371214385899379,0,194.05,50,1,164398,194.0,194.30,3101,19,4,10,AAPL,0,194.05,50,194.30,194.0,194.05,66.345998,0.005738,-0.001045,0.006783,24.242424,38.383838,1389,194.068990,194.0200,193.971010,0.30,0.30,0.30,0.30,0.30,97.166043,22.648210,0.157359,83.333333,0.000000,194.020811,194.025188
37,1688371215804852019,1688371215804687301,2,194.21,10,1,164718,194.0,194.21,3101,29,4,1,AAPL,0,194.21,10,194.21,194.0,194.05,91.245439,0.019108,0.002986,0.016122,72.727273,40.404040,1399,194.125889,194.0305,193.935111,0.21,0.21,0.21,0.21,0.21,97.269897,19.165760,0.133163,79.268293,0.000824,194.025789,194.025596
38,1688371219671476629,1688371219671312224,0,194.14,10,1,164405,194.0,194.16,3101,400,4,1,AAPL,0,194.14,10,194.16,194.0,194.21,67.659853,0.023781,0.007145,0.016636,51.515152,49.494949,1389,194.142928,194.0375,193.932072,0.21,0.21,0.21,0.21,0.21,97.366332,16.442961,0.114245,-3.205128,-0.000360,194.028718,194.025873
39,1688371223368835585,1688371223368671235,2,194.13,10,1,164350,194.0,194.13,3101,400,4,1,AAPL,0,194.13,10,194.13,194.0,194.14,65.072133,0.026373,0.010990,0.015382,48.484848,57.575758,1379,194.155247,194.0440,193.932753,0.14,0.14,0.14,0.14,0.14,97.455880,14.921072,0.103671,-113.095238,-0.000052,194.031250,194.026071
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
59266,1688417954514485218,1688417954514320323,2,192.44,6,1,164895,192.4,192.44,40,7,1,1,AAPL,0,192.44,6,192.44,192.4,192.40,63.061829,0.005002,0.006368,-0.001366,100.000000,33.333333,7305825,192.444977,192.4175,192.390023,0.04,0.04,0.04,0.04,0.04,15.474528,6.612534,3.765166,118.055556,0.000208,192.701135,192.722211
59267,1688417961020718430,1688417961020553920,2,192.44,1,1,164510,192.4,192.44,40,1,1,1,AAPL,0,192.44,1,192.44,192.4,192.44,63.061829,0.006355,0.006365,-0.000011,100.000000,66.666667,7305826,192.447411,192.4195,192.391589,0.04,0.04,0.04,0.04,0.04,16.329018,5.953254,3.389772,83.333333,0.000000,192.701131,192.722211
59268,1688417973297905504,1688417973297741235,1,192.40,5,1,164269,192.4,192.46,40,6,1,1,AAPL,0,192.40,5,192.46,192.4,192.44,44.499081,0.004151,0.005922,-0.001771,0.000000,66.666667,7305821,192.447411,192.4195,192.391589,0.06,0.06,0.06,0.06,0.06,19.013869,9.751295,2.919558,12.820513,-0.000208,192.701125,192.722211
59269,1688417996889779362,1688417996889614660,2,192.45,3,1,164702,192.4,192.45,35,16,1,1,AAPL,0,192.45,3,192.45,192.4,192.40,60.250043,0.006366,0.006011,0.000355,83.333333,61.111111,7305824,192.451394,192.4220,192.392606,0.05,0.05,0.05,0.05,0.05,21.506945,8.671762,2.596344,100.000000,0.000260,192.701121,192.722211


In [17]:
features_to_use = [
    'price', 'size', 'side', 'Volume',
    'bid_px_00', 'ask_px_00', 'size', 'flags',
    'RSI', 'MACD', 'ATR_1', 'ATR_2', 'ATR_5', 'ATR_10', 'ATR_20',
     'CCI', 'DLR', 'TWAP', 'VWAP' 
]

In [23]:
# Taking only those columns which are useful
market_df = market_features_df[features_to_use]

# lists to hold input sequences and corresponding target prices
input_sequences = []
target_prices = []

# window size for sequence creation
window_size = 5

# Creating input sequences
for i in range(len(market_df) - window_size):
    sequence = market_df.iloc[i:i+window_size].to_numpy().flatten()
    input_sequences.append(sequence)
    # The target price is the 'price' value after the end of the sequence
    target_prices.append(market_df.iloc[i + window_size]['price'])

In [24]:
# column names for the DataFrame
column_names = []
for i in range(window_size):
    for feature in features_to_use:
        column_names.append(f"{feature}_{i+1}")

In [25]:
# DataFrame with the input sequences and their corresponding target prices
sequence_data_with_features = pd.DataFrame(input_sequences, columns=column_names)
sequence_data_with_features['next_price'] = target_prices

sequence_data_with_features.head()

Unnamed: 0,price_1,size_1,side_1,Volume_1,bid_px_00_1,ask_px_00_1,size_1.1,flags_1,RSI_1,MACD_1,ATR_1_1,ATR_2_1,ATR_5_1,ATR_10_1,ATR_20_1,CCI_1,DLR_1,TWAP_1,VWAP_1,price_2,size_2,side_2,Volume_2,bid_px_00_2,ask_px_00_2,size_2.1,flags_2,RSI_2,MACD_2,ATR_1_2,ATR_2_2,ATR_5_2,ATR_10_2,ATR_20_2,CCI_2,DLR_2,TWAP_2,VWAP_2,price_3,size_3,side_3,Volume_3,bid_px_00_3,ask_px_00_3,size_3.1,flags_3,RSI_3,MACD_3,ATR_1_3,ATR_2_3,ATR_5_3,ATR_10_3,ATR_20_3,CCI_3,DLR_3,TWAP_3,VWAP_3,price_4,size_4,side_4,Volume_4,bid_px_00_4,ask_px_00_4,size_4.1,flags_4,RSI_4,MACD_4,ATR_1_4,ATR_2_4,ATR_5_4,ATR_10_4,ATR_20_4,CCI_4,DLR_4,TWAP_4,VWAP_4,price_5,size_5,side_5,Volume_5,bid_px_00_5,ask_px_00_5,size_5.1,flags_5,RSI_5,MACD_5,ATR_1_5,ATR_2_5,ATR_5_5,ATR_10_5,ATR_20_5,CCI_5,DLR_5,TWAP_5,VWAP_5,next_price
0,194.05,50.0,0.0,50.0,194.0,194.3,50.0,1.0,66.345998,0.004881,0.3,0.3,0.3,0.3,0.3,166.666667,0.0,194.02,194.021894,194.05,50.0,0.0,50.0,194.0,194.3,50.0,1.0,66.345998,0.005738,0.3,0.3,0.3,0.3,0.3,83.333333,0.0,194.020811,194.025188,194.21,10.0,2.0,10.0,194.0,194.21,10.0,1.0,91.245439,0.019108,0.21,0.21,0.21,0.21,0.21,79.268293,0.000824,194.025789,194.025596,194.14,10.0,0.0,10.0,194.0,194.16,10.0,1.0,67.659853,0.023781,0.21,0.21,0.21,0.21,0.21,-3.205128,-0.00036,194.028718,194.025873,194.13,10.0,2.0,10.0,194.0,194.13,10.0,1.0,65.072133,0.026373,0.14,0.14,0.14,0.14,0.14,-113.095238,-5.2e-05,194.03125,194.026071,194.13
1,194.05,50.0,0.0,50.0,194.0,194.3,50.0,1.0,66.345998,0.005738,0.3,0.3,0.3,0.3,0.3,83.333333,0.0,194.020811,194.025188,194.21,10.0,2.0,10.0,194.0,194.21,10.0,1.0,91.245439,0.019108,0.21,0.21,0.21,0.21,0.21,79.268293,0.000824,194.025789,194.025596,194.14,10.0,0.0,10.0,194.0,194.16,10.0,1.0,67.659853,0.023781,0.21,0.21,0.21,0.21,0.21,-3.205128,-0.00036,194.028718,194.025873,194.13,10.0,2.0,10.0,194.0,194.13,10.0,1.0,65.072133,0.026373,0.14,0.14,0.14,0.14,0.14,-113.095238,-5.2e-05,194.03125,194.026071,194.13,100.0,2.0,100.0,194.01,194.13,100.0,1.0,65.072133,0.028103,0.12,0.12,0.12,0.12,0.12,-64.102564,0.0,194.033659,194.028188,194.02
2,194.21,10.0,2.0,10.0,194.0,194.21,10.0,1.0,91.245439,0.019108,0.21,0.21,0.21,0.21,0.21,79.268293,0.000824,194.025789,194.025596,194.14,10.0,0.0,10.0,194.0,194.16,10.0,1.0,67.659853,0.023781,0.21,0.21,0.21,0.21,0.21,-3.205128,-0.00036,194.028718,194.025873,194.13,10.0,2.0,10.0,194.0,194.13,10.0,1.0,65.072133,0.026373,0.14,0.14,0.14,0.14,0.14,-113.095238,-5.2e-05,194.03125,194.026071,194.13,100.0,2.0,100.0,194.01,194.13,100.0,1.0,65.072133,0.028103,0.12,0.12,0.12,0.12,0.12,-64.102564,0.0,194.033659,194.028188,194.02,10.0,0.0,10.0,194.01,194.16,10.0,0.0,43.733638,0.020364,0.15,0.15,0.15,0.15,0.15,-113.425926,-0.000567,194.033333,194.02846,194.01
3,194.14,10.0,0.0,10.0,194.0,194.16,10.0,1.0,67.659853,0.023781,0.21,0.21,0.21,0.21,0.21,-3.205128,-0.00036,194.028718,194.025873,194.13,10.0,2.0,10.0,194.0,194.13,10.0,1.0,65.072133,0.026373,0.14,0.14,0.14,0.14,0.14,-113.095238,-5.2e-05,194.03125,194.026071,194.13,100.0,2.0,100.0,194.01,194.13,100.0,1.0,65.072133,0.028103,0.12,0.12,0.12,0.12,0.12,-64.102564,0.0,194.033659,194.028188,194.02,10.0,0.0,10.0,194.01,194.16,10.0,0.0,43.733638,0.020364,0.15,0.15,0.15,0.15,0.15,-113.425926,-0.000567,194.033333,194.02846,194.01,1.0,1.0,1.0,194.01,194.16,1.0,0.0,42.373281,0.013271,0.15,0.15,0.15,0.15,0.15,-90.909091,-5.2e-05,194.032791,194.028488,194.01
4,194.13,10.0,2.0,10.0,194.0,194.13,10.0,1.0,65.072133,0.026373,0.14,0.14,0.14,0.14,0.14,-113.095238,-5.2e-05,194.03125,194.026071,194.13,100.0,2.0,100.0,194.01,194.13,100.0,1.0,65.072133,0.028103,0.12,0.12,0.12,0.12,0.12,-64.102564,0.0,194.033659,194.028188,194.02,10.0,0.0,10.0,194.01,194.16,10.0,0.0,43.733638,0.020364,0.15,0.15,0.15,0.15,0.15,-113.425926,-0.000567,194.033333,194.02846,194.01,1.0,1.0,1.0,194.01,194.16,1.0,0.0,42.373281,0.013271,0.15,0.15,0.15,0.15,0.15,-90.909091,-5.2e-05,194.032791,194.028488,194.01,100.0,1.0,100.0,194.01,194.16,100.0,1.0,42.373281,0.007562,0.15,0.15,0.15,0.15,0.15,-61.22449,0.0,194.032273,194.031073,194.0


In [26]:
# Separating features and target prices
features = sequence_data_with_features.drop(columns=['next_price']).values
prices = sequence_data_with_features['next_price'].values

In [27]:
# Normalizing the features using StandardScaler
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features)

In [28]:
# Convert to PyTorch tensors
features_tensor = torch.tensor(features_scaled, dtype=torch.float32)
prices_tensor = torch.tensor(prices, dtype=torch.float32).unsqueeze(1)  # Adding dimension for target

In [29]:
# Creating Train, Test and Validation data

dataset = TensorDataset(features_tensor, prices_tensor)
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

val_size = int(0.1 * len(train_dataset))
train_size = len(train_dataset) - val_size
train_dataset, val_dataset = random_split(train_dataset, [train_size, val_size])

# Creating DataLoaders 
batch_size = 64
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [30]:
features_tensor.size()

torch.Size([59231, 95])

## Transformer Model

In [31]:
# Define the Transformer Model
class TradeTransformer(nn.Module):

    def __init__(self, input_dim, num_heads, num_layers, dim_feedforward, output_dim):
        super().__init__()
        # Define the Transformer encoder with the specified parameters
        self.transformer_encoder = nn.TransformerEncoder(nn.TransformerEncoderLayer(d_model=input_dim, nhead=num_heads, dim_feedforward=dim_feedforward), num_layers=num_layers)
        # Define a fully connected layer to produce the output
        self.fc_out = nn.Linear(input_dim, output_dim)

    def forward(self, src):
        out = self.transformer_encoder(src)
        out = self.fc_out(out[:, -1, :])
        return out

# Instantiate the model
model = TradeTransformer(input_dim=features_tensor.shape[1], num_heads=5, num_layers=2, dim_feedforward=256, output_dim=1)
loss_fn = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



## Fine-Tuning

In [14]:
# Function to train Transformer
def train(dataloader, model, loss_fn, optimizer):
    model.train()   # Setting model to training mode
    total_loss = 0

    for batch, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        pred = model(X.unsqueeze(0)) 
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(dataloader)     # Return the avg loss over all batches

In [15]:
# function to evaluate transformer
def evaluate(dataloader, model, loss_fn):
    model.eval()
    total_loss = 0
    with torch.no_grad():
        for batch, (X, y) in enumerate(dataloader):
            pred = model(X.unsqueeze(0))  
            loss = loss_fn(pred, y)
            total_loss += loss.item()
    return total_loss / len(dataloader)

In [16]:
# Grid search to find hyperparameters
def grid_search(train_loader, val_loader, input_dim):
    best_model = None
    best_loss = float('inf')
    best_params = {}

    param_grid = {
        'batch_size': [64, 128],
        'lr': [1e-4, 1e-3],
        'num_heads': [5],
        'num_layers': [2, 3],
        'dim_feedforward': [256, 512]
    }

    for batch_size in param_grid['batch_size']:
        for lr in param_grid['lr']:
            for num_heads in param_grid['num_heads']:
                for num_layers in param_grid['num_layers']:
                    for dim_feedforward in param_grid['dim_feedforward']:
                        print(f'Training with batch_size={batch_size}, lr={lr}, num_heads={num_heads}, num_layers={num_layers}, dim_feedforward={dim_feedforward}')

                        # Model, criterion, optimizer, scheduler
                        model = TradeTransformer(input_dim=input_dim, num_heads=num_heads, num_layers=num_layers, dim_feedforward=dim_feedforward, output_dim=1)
                        criterion = nn.MSELoss()
                        optimizer = Adam(model.parameters(), lr=lr)
                        scheduler = ReduceLROnPlateau(optimizer, 'min', patience=5, factor=0.5)

                        # DataLoader
                        train_loader = DataLoader(train_loader.dataset, batch_size=batch_size, shuffle=True)
                        val_loader = DataLoader(val_loader.dataset, batch_size=batch_size, shuffle=False)

                        # Train model
                        best_val_loss = float('inf')
                        patience_counter = 0
                        for epoch in range(50):
                            train_loss = train(train_loader, model, criterion, optimizer)
                            val_loss = evaluate(val_loader, model, criterion)
                            scheduler.step(val_loss)

                            print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')

                            if val_loss < best_val_loss:
                                best_val_loss = val_loss
                                patience_counter = 0
                            else:
                                patience_counter += 1

                            if patience_counter >= 10:  # Early stopping
                                print("Early stopping")
                                break

                        if best_val_loss < best_loss:
                            best_loss = best_val_loss
                            best_model = model
                            best_params = {
                                'batch_size': batch_size,
                                'lr': lr,
                                'num_heads': num_heads,
                                'num_layers': num_layers,
                                'dim_feedforward': dim_feedforward
                            }

    print('Best hyperparameters:', best_params)
    return best_model, best_params

In [17]:
best_model, best_params = grid_search(train_loader, val_loader, input_dim=features_tensor.shape[1])
print('Training complete with best hyperparameters:', best_params)

Training with batch_size=64, lr=0.0001, num_heads=5, num_layers=2, dim_feedforward=256

Epoch 1, Train Loss: 35238.3377, Val Loss: 33959.8274

Epoch 2, Train Loss: 32857.3110, Val Loss: 31407.8030

Epoch 3, Train Loss: 30000.2723, Val Loss: 28262.2780

Epoch 4, Train Loss: 26693.1941, Val Loss: 24809.2748

Epoch 5, Train Loss: 23169.5755, Val Loss: 21238.3834

Epoch 6, Train Loss: 19591.6958, Val Loss: 17678.1422

Epoch 7, Train Loss: 16075.9464, Val Loss: 14246.8369

Epoch 8, Train Loss: 12739.2882, Val Loss: 11042.1385

Epoch 9, Train Loss: 9675.5333, Val Loss: 8157.7662

Epoch 10, Train Loss: 6967.2916, Val Loss: 5670.5850

Epoch 11, Train Loss: 4688.1615, Val Loss: 3637.1538

Epoch 12, Train Loss: 2881.2767, Val Loss: 2090.9715

Epoch 13, Train Loss: 1565.0189, Val Loss: 1029.8614

Epoch 14, Train Loss: 712.1031, Val Loss: 403.7897

Epoch 15, Train Loss: 251.7169, Val Loss: 111.4498

Epoch 16, Train Loss: 61.4950, Val Loss: 17.3315

Epoch 17, Train Loss: 8.9618, Val Loss: 1.0358

E

In [18]:
model = best_model

In [19]:
model

TransformerModel(
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0-2): 3 x TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=65, out_features=65, bias=True)
        )
        (linear1): Linear(in_features=65, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=65, bias=True)
        (norm1): LayerNorm((65,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((65,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
    )
  )
  (fc_out): Linear(in_features=65, out_features=1, bias=True)
)

## PPO Integration

In [20]:
# Function to get price prediction from transformer model
def predict_price(model, features):
    model.eval()
    with torch.no_grad():
        features_tensor = torch.tensor(features, dtype=torch.float32)
        prediction = model(features_tensor).item()
    return prediction

In [21]:
class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit, transformer_model):
        super(TradingEnvironment, self).__init__()
        self.data = data
        self.transformer_model = transformer_model
        self.daily_trading_limit = daily_trading_limit
        self.current_step = 0
        
        self.transformer_features = features_to_use
        self.state_columns = features_to_use + ['transformer']

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0 
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32)

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        current_data = self.data[self.state_columns].iloc[self.current_step].values

        if self.current_step > 5:

            features = self.data[self.transformer_features].iloc[self.current_step-6:self.current_step-1].to_numpy().flatten()
          
            n_features = 95  
            n_samples = len(features) // n_features 

            if len(features) % n_features != 0:
                raise ValueError("The total number of elements in 'features' is not a multiple of 95")


            features_reshaped = features.reshape(n_samples, n_features)


            features_scaled = scaler.transform(features_reshaped)


            features_tensor = torch.tensor(features_scaled, dtype=torch.float32)


            features_tensor = features_tensor.unsqueeze(0)

            # Get prediction
            self.transformer_model.eval()  # Set the model to evaluation mode
            with torch.no_grad():
                predicted_price = self.transformer_model(features_tensor)

            current_data[13] = predicted_price[0][0]        

        return current_data

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0
        
        if self.current_step >= len(self.data) - 1:
            self.current_step = 0
        
        if action != 0:
            transaction_cost= self._calculate_transaction_cost(self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean())
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
        
            self.cumulative_reward += reward
        
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100*transaction_time/1e9

        done = self.current_step == len(self.data) - 1
        obs = self._next_observation()

        info = {
        'step': self.current_step,
        'action': action,
        'price': actual_price,
        'shares': self.trades[-1]['shares'] if self.trades else 0
    }
        self.current_step += 1

        return obs, reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1 and self.total_shares_traded < self.daily_trading_limit:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if(shares_bought>0):
                self.trades.append(trade_info)
                
        elif action == 2 and self.total_shares_traded < self.daily_trading_limit:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if(shares_sold>0):
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100*transaction_time/1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward
    
    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)
    
    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        # download all trades in a pandas dataframe using .csv
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_ppo.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")



In [22]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000

ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
env = TradingEnvironment(ticker_data, daily_trading_limit, model)

# Define the best hyperparameters
best_hyperparameters = {'learning_rate': 0.0009931989008886031,'n_steps': 512,'batch_size': 128, 'gamma': 0.9916829193042708,'clip_range': 0.21127653449387027,'n_epochs': 6} # type: ignore

# Create the RL model with the best hyperparameters
# model = DQN('MlpPolicy', env, verbose=1)
ppo_model = PPO('MlpPolicy', env, verbose=1, **best_hyperparameters)

# Train the model
ppo_model.learn(total_timesteps=10000)

# Save the model
ppo_model.save("transformer_PPO_model")

# Evaluate the model
obs = env.reset()
for _ in range(len(ticker_data)):
    action, _states = ppo_model.predict(obs)
    obs, rewards, done, info = env.step(action)
    if done:
        break

# Render the final state
env.render()


Using cuda device

Wrapping the env with a `Monitor` wrapper

Wrapping the env in a DummyVecEnv.

----------------------------

| time/              |     |

|    fps             | 120 |

|    iterations      | 1   |

|    time_elapsed    | 4   |

|    total_timesteps | 512 |

----------------------------

----------------------------------------

| time/                   |            |

|    fps                  | 122        |

|    iterations           | 2          |

|    time_elapsed         | 8          |

|    total_timesteps      | 1024       |

| train/                  |            |

|    approx_kl            | 0.01316618 |

|    clip_fraction        | 0.185      |

|    clip_range           | 0.211      |

|    entropy_loss         | -1.09      |

|    explained_variance   | -0.215     |

|    learning_rate        | 0.000993   |

|    loss                 | 0.132      |

|    n_updates            | 6          |

|    policy_gradient_loss | -0.0239    |

|    value_loss     

## Evaluating Model (Trade Blotter)

In [23]:
INITIAL_CASH = 10_000_000  # $10 million

def preprocess_data(df):
    df['liquidity'] = df['bid_sz_00'] * df['bid_px_00'] + df['ask_sz_00'] * df['ask_px_00']
    return df

def calculate_rsi(data, window=14):
    delta = data.diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
    rs = gain / loss
    rsi = 100 - (100 / (1 + rs))
    return rsi

def calculate_vol_and_liquidity(price_df, volume_df, window_size):
    # Calculate rolling statistics
    rolling_mean_vol = price_df.pct_change().rolling(window=window_size).mean()
    rolling_std_vol = price_df.pct_change().rolling(window=window_size).std()
    rolling_mean_liq = volume_df.rolling(window=window_size).mean()
    rolling_std_liq = volume_df.rolling(window=window_size).std()
    
    return rolling_mean_vol, rolling_std_vol, rolling_mean_liq, rolling_std_liq

def get_percentile(current_value, mean, std):
    if std > 0:
        z_score = (current_value - mean) / std
        percentile = norm.cdf(z_score)
    else:
        percentile = 0.5  # No variation
    return percentile

def get_trade_price(base_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction):
    vol_percentile = get_percentile(current_vol, mean_vol, std_vol)
    liq_percentile = get_percentile(current_liq, mean_liq, std_liq)

    # Define price adjustment scenarios based on market conditions
    if vol_percentile >= 0.9 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.25, -0.15)
    elif vol_percentile <= 0.1 and liq_percentile < 0.1:
        price_adjustment_percent = np.random.uniform(-0.10, -0.05)
    elif vol_percentile >= 0.9 and liq_percentile >= 0.9:
        price_adjustment_percent = np.random.uniform(-0.05, +0.10)
    else:
        price_adjustment_percent = np.random.uniform(-0.05, +0.05)  # Default for normal conditions

    # Adjust price based on trade direction
    if trade_direction == 'BUY':
        adjusted_price = base_price * (1 - price_adjustment_percent)
    else:  # SELL
        adjusted_price = base_price * (1 + price_adjustment_percent)
    
    return adjusted_price


In [24]:
class TradingEnvironmentwithBlotter:
    def __init__(self, data, daily_trading_limit, window_size):
        self.data = preprocess_data(data)
        self.daily_trading_limit = daily_trading_limit
        self.window_size = window_size
        self.state_columns = ['price', 'liquidity', 'RSI', 'MACD', 'MACD_signal', 'MACD_hist', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
        self.reset()

    def reset(self):
        self.current_step = 0
        self.balance = INITIAL_CASH
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        self.portfolio = {'cash': self.balance, 'holdings': {ticker: 0 for ticker in self.data['symbol'].unique()}}
        self.data['RSI'] = calculate_rsi(self.data['price'])
        self.data['pct_change'] = self.data['price'].pct_change()
        self.data['rolling_mean_vol'], self.data['rolling_std_vol'], self.data['rolling_mean_liq'], self.data['rolling_std_liq'] = calculate_vol_and_liquidity(self.data['price'], self.data['liquidity'], self.window_size)

    def step(self):
        row = self.data.iloc[self.current_step]
        current_price = row['price']
        current_time = pd.to_datetime(row['ts_event'])
        current_rsi = row['RSI']
        current_vol = row['pct_change']
        current_liq = row['liquidity']
        mean_vol = row['rolling_mean_vol']
        std_vol = row['rolling_std_vol']
        mean_liq = row['rolling_mean_liq']
        std_liq = row['rolling_std_liq']

        if current_rsi < 30:  # Entry signal based on RSI
            trade_direction = 'BUY'
            trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
            trade_size = (self.portfolio['cash'] * np.random.uniform(0.001, 0.005)) / trade_price
            if self.portfolio['cash'] >= trade_size * trade_price:
                self.portfolio['cash'] -= trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] += trade_size
                trade_status = 'filled'
            else:
                trade_status = 'cancelled'
        elif current_rsi > 70:  # Exit signal based on RSI
            trade_direction = 'SELL'
            if self.portfolio['holdings'][row['symbol']] > 0:
                trade_size = min(self.portfolio['holdings'][row['symbol']], self.portfolio['cash']*np.random.uniform(0.001, 0.005) / current_price)
                trade_price = get_trade_price(current_price, current_vol, current_liq, mean_vol, std_vol, mean_liq, std_liq, trade_direction)
                self.portfolio['cash'] += trade_size * trade_price
                self.portfolio['holdings'][row['symbol']] -= trade_size
                trade_status = 'filled'
            else:
                trade_size = 0
                trade_status = 'cancelled'
        else:
            trade_direction = 'HOLD'
            trade_size = 0
            trade_price = current_price
            trade_status = 'skipped'

        if trade_size > 0:
            expected_price = row['ask_px_00']
            actual_price = row['price']
            transaction_time = row['ts_in_delta']
            transaction_cost = self._calculate_transaction_cost(row['Volume'], 0.3, self.data['Volume'].mean())
            slippage = expected_price - actual_price
            time_penalty = 1000 * transaction_time / 1e9
            reward = - (slippage + time_penalty + transaction_cost)
        
            self.cumulative_reward += reward
            self.trades.append({
                'step': self.current_step,
                'timestamp': current_time,
                'action': trade_direction,
                'price': trade_price,
                'shares': trade_size,
                'symbol': row['symbol'],
                'reward': reward,
                'transaction_cost': transaction_cost,
                'slippage': slippage,
                'time_penalty': time_penalty
            })

            

        self.current_step += 1
        if self.current_step >= len(self.data) - 1:
            done=True
            self.current_step = 0

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data)):
            self.step()
        return self.cumulative_reward, self.trades

    def render(self):
        print(f'Cumulative reward: {self.cumulative_reward}')
        row = self.data.iloc[self.current_step]
        print(f'Total portfolio value: {self.portfolio["cash"] + self.portfolio["holdings"][row["symbol"]]*row["Close"]}')
        # get trades in a pandas dataframe
        trades_df = pd.DataFrame(self.trades)
        # Save a csv
        trades_df.to_csv('trades_blotter.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Symbol: {trade['symbol']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")

In [25]:
# Filter data for the specified ticker
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

window_size = 60
daily_trading_limit = 1000
# Create the trading environment
env = TradingEnvironmentwithBlotter(ticker_data, daily_trading_limit=1000, window_size=window_size)  # Daily trading limit of 1000 shares

# Run the environment
cumulative_reward, trades = env.run()

# Render the results
env.render()

Cumulative reward: -12231.516067279417

Total portfolio value: 9907757.760285439

Step: 105, Timestamp: 2023-07-03 08:05:13.717864037, Action: BUY, Price: 185.66552587341758, Shares: 118.38063742644275, Symbol: AAPL, Reward: -0.38225036012723784, Transaction Cost: 0.08423636012724241, Slippage: 0.12999999999999545, Time Penalty: 0.168014

Step: 124, Timestamp: 2023-07-03 08:05:50.024437448, Action: SELL, Price: 189.00098642693018, Shares: 118.38063742644275, Symbol: AAPL, Reward: -0.27620100928928953, Transaction Cost: 0.07183700928926906, Slippage: 0.040000000000020464, Time Penalty: 0.164364

Step: 134, Timestamp: 2023-07-03 08:06:13.187564319, Action: BUY, Price: 203.3619701056925, Shares: 153.3453079896369, Symbol: AAPL, Reward: -0.2850712182043049, Transaction Cost: 0.025398218204301576, Slippage: 0.09000000000000341, Time Penalty: 0.169673

Step: 135, Timestamp: 2023-07-03 08:06:13.187564319, Action: BUY, Price: 190.17379393309585, Shares: 194.00449562516292, Symbol: AAPL, Reward