In [17]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
market_features_df=pd.read_csv("/content/market_features_df_new.csv")
state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'Stoch_k', 'Stoch_d',
                 'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']
#market_features_df[state_columns] = scaler.fit_transform(market_features_df[state_columns])

###Transformer structure

In [61]:
import torch
import torch.nn as nn
import torch.optim as optim
class TransformerModel(nn.Module):
    def __init__(self, input_dim, model_dim, num_heads, num_layers, output_dim):
        super(TransformerModel, self).__init__()
        self.embedding = nn.Linear(input_dim, model_dim)
        encoder_layer = nn.TransformerEncoderLayer(d_model=model_dim, nhead=num_heads)
        self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        self.fc = nn.Linear(model_dim, output_dim)

    def forward(self, src):
        src = self.embedding(src)
        src = src.permute(1, 0, 2)  # Transformer expects (seq_len, batch_size, feature_dim)
        transformer_output = self.transformer_encoder(src)
        output = self.fc(transformer_output.mean(dim=0))
        return output

# Model parameters
input_dim = len(state_columns)
model_dim = 64
num_heads = 4
num_layers = 2
output_dim = 3

###Trading Environment for transformer

In [62]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit, transformer_model_path, seq_length=50):
        super(TradingEnvironment, self).__init__()
        self.data = data.reset_index(drop=True)  # Ensure the data index is reset
        self.daily_trading_limit = daily_trading_limit
        self.seq_length = seq_length
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Fit the scaler on the training data
        self.scaler = StandardScaler()
        self.scaler.fit(self.data[self.state_columns])

        # Load the transformer model
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = TransformerModel(len(self.state_columns), 64, 4, 2, 3).to(self.device)
        self.model.load_state_dict(torch.load(transformer_model_path, map_location=self.device))
        self.model.eval()

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def _get_scaled_observation(self):
        obs_df = self.data[self.state_columns].iloc[[self.current_step]]
        scaled_obs = self.scaler.transform(obs_df).flatten()
        return scaled_obs

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            done = True
        else:
            done = False
        if action != 0:
            transaction_cost = self._calculate_transaction_cost(
                self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean()
            )
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100 * transaction_time / 1e9

        info = {
            'step': self.current_step,
            'action': action,
            'price': actual_price,
            'shares': self.trades[-1]['shares'] if self.trades else 0
        }
        self.current_step += 1

        return self._next_observation(), reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if shares_bought > 0:
                self.trades.append(trade_info)
        elif action == 2:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if shares_sold > 0:
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100 * transaction_time / 1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data) - 1):
            scaled_obs = self._get_scaled_observation()
            with torch.no_grad():
                obs_tensor = torch.tensor(scaled_obs, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(self.device)  # Add batch and sequence dimension
                action_probs = self.model(obs_tensor)
                action = torch.argmax(action_probs, dim=1).item()
            obs, reward, done, info = self.step(action)
            print(f"Step: {info['step']}, Action: {info['action']}, Price: {info['price']}, Shares: {info['shares']}, Reward: {reward}")
            if done:
                break
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        trades_df = pd.DataFrame(self.trades)
        trades_df.to_csv('trades_transformer.csv', index=False)
        #for trade in self.trades:
        #    print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")


###Implement the environment for trade

In [60]:
# Define the daily trading limit (total number of shares to trade per day)
daily_trading_limit = 1000
market_features_df=pd.read_csv("/content/market_features_df_new.csv")
ticker = 'AAPL'  # Specify the ticker you want to trade
ticker_data = market_features_df[market_features_df['symbol'] == ticker]

# Create the trading environment
transformer_model_path = 'transformer_model_v1.pth'
env = TradingEnvironment(ticker_data, daily_trading_limit, transformer_model_path, seq_length=50)

# Evaluate the transformer model in the environment
obs = env.reset()
for _ in range(len(ticker_data) - 1):
    scaled_obs = env._get_scaled_observation()
    with torch.no_grad():
        obs_tensor = torch.tensor(scaled_obs, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(env.device)  # Add batch and sequence dimension
        action_probs = env.model(obs_tensor)
        action = torch.argmax(action_probs, dim=1).item()
    obs, reward, done, info = env.step(action)
    #print(f"Step: {info['step']}, Action: {info['action']}, Price: {info['price']}, Shares: {info['shares']}, Reward: {reward}")
    if done:
        break

# Render the final state
env.render()




Step: 59235
Balance: 9226381.18500003
Shares held: 4007.0
Total shares traded: 4007.0
Total portfolio value: 9997528.33500003
Cumulative reward: -12027.286438573909


#Playground

In [53]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit, transformer_model_path, seq_length=50):
        super(TradingEnvironment, self).__init__()
        self.data = data.reset_index(drop=True)  # Ensure the data index is reset
        self.daily_trading_limit = daily_trading_limit
        self.seq_length = seq_length
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Fit the scaler on the training data
        self.scaler = StandardScaler()
        self.scaler.fit(self.data[self.state_columns])

        # Load the transformer model
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = TransformerModel(len(self.state_columns), 64, 4, 2, 3).to(self.device)
        self.model.load_state_dict(torch.load(transformer_model_path, map_location=self.device))
        self.model.eval()

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def _get_scaled_observation(self):
        obs = self.data[self.state_columns].iloc[self.current_step].values
        scaled_obs = self.scaler.transform(obs.reshape(1, -1)).flatten()
        return scaled_obs

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            done = True
        else:
            done = False
        if action != 0:
            transaction_cost = self._calculate_transaction_cost(
                self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean()
            )
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100 * transaction_time / 1e9

        info = {
            'step': self.current_step,
            'action': action,
            'price': actual_price,
            'shares': self.trades[-1]['shares'] if self.trades else 0
        }
        self.current_step += 1

        return self._next_observation(), reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if shares_bought > 0:
                self.trades.append(trade_info)
        elif action == 2:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if shares_sold > 0:
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100 * transaction_time / 1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data) - 1):
            scaled_obs = self._get_scaled_observation()
            with torch.no_grad():
                obs_tensor = torch.tensor(scaled_obs, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(self.device)  # Add batch and sequence dimension
                action_probs = self.model(obs_tensor)
                action = torch.argmax(action_probs, dim=1).item()
            obs, reward, done, info = self.step(action)
            print(f"Step: {info['step']}, Action: {info['action']}, Price: {info['price']}, Shares: {info['shares']}, Reward: {reward}")
            if done:
                break
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        trades_df = pd.DataFrame(self.trades)
        trades_df.to_csv('trades_transformer.csv', index=False)
        for trade in self.trades:
            print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")


In [59]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import StandardScaler

class TradingEnvironment(gym.Env):
    metadata = {'render.modes': ['human']}

    def __init__(self, data, daily_trading_limit, transformer_model_path, seq_length=50):
        super(TradingEnvironment, self).__init__()
        self.data = data.reset_index(drop=True)  # Ensure the data index is reset
        self.daily_trading_limit = daily_trading_limit
        self.seq_length = seq_length
        self.current_step = 0

        # Extract state columns
        self.state_columns = ['Close', 'Volume', 'RSI', 'MACD', 'MACD_signal', 'Stoch_k', 'Stoch_d',
                              'OBV', 'Upper_BB', 'Middle_BB', 'Lower_BB', 'ATR_1', 'ADX', '+DI', '-DI', 'CCI']

        # Fit the scaler on the training data
        self.scaler = StandardScaler()
        self.scaler.fit(self.data[self.state_columns])

        # Load the transformer model
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = TransformerModel(len(self.state_columns), 64, 4, 2, 3).to(self.device)
        self.model.load_state_dict(torch.load(transformer_model_path, map_location=self.device))
        self.model.eval()

        # Initialize balance, shares held, and total shares traded
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0

        # Define action space: [Hold, Buy, Sell]
        self.action_space = spaces.Discrete(3)

        # Define observation space based on state columns
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(len(self.state_columns),), dtype=np.float32
        )

    def reset(self):
        self.current_step = 0
        self.balance = 10_000_000.0  # $10 million
        self.shares_held = 0
        self.total_shares_traded = 0
        self.cumulative_reward = 0
        self.trades = []
        return self._next_observation()

    def _next_observation(self):
        return self.data[self.state_columns].iloc[self.current_step].values

    def _get_scaled_observation(self):
        obs_df = self.data[self.state_columns].iloc[[self.current_step]]
        scaled_obs = self.scaler.transform(obs_df).flatten()
        return scaled_obs

    def step(self, action):
        expected_price = self.data.iloc[self.current_step]['ask_px_00']
        actual_price = self.data.iloc[self.current_step]['price']
        transaction_time = self.data.iloc[self.current_step]['ts_in_delta']
        self._take_action(action)
        reward = 0

        if self.current_step >= len(self.data) - 1:
            done = True
        else:
            done = False
        if action != 0:
            transaction_cost = self._calculate_transaction_cost(
                self.data.iloc[self.current_step]['Volume'], 0.3, self.data['Volume'].mean()
            )
            reward = self._calculate_reward(expected_price, actual_price, transaction_time, transaction_cost)
            self.cumulative_reward += reward
            if self.trades:
                self.trades[-1]['reward'] = reward
                self.trades[-1]['transaction_cost'] = transaction_cost
                self.trades[-1]['slippage'] = expected_price - actual_price
                self.trades[-1]['time_penalty'] = 100 * transaction_time / 1e9

        info = {
            'step': self.current_step,
            'action': action,
            'price': actual_price,
            'shares': self.trades[-1]['shares'] if self.trades else 0
        }
        self.current_step += 1

        return self._next_observation(), reward, done, info

    def _take_action(self, action):
        current_price = self.data.iloc[self.current_step]['Close']
        current_time = pd.to_datetime(self.data.iloc[self.current_step]['ts_event'])
        trade_info = {'step': self.current_step, 'timestamp': current_time, 'action': action, 'price': current_price, 'shares': 0, 'reward': 0, 'transaction_cost': 0, 'slippage': 0, 'time_penalty': 0}

        if action == 1:  # Buy
            shares_bought = (self.balance * np.random.uniform(0.001, 0.005)) // current_price
            self.balance -= shares_bought * current_price
            self.shares_held += shares_bought
            self.total_shares_traded += shares_bought
            trade_info['shares'] = shares_bought
            if shares_bought > 0:
                self.trades.append(trade_info)
        elif action == 2:  # Sell
            shares_sold = min((self.balance * np.random.uniform(0.001, 0.005)) // current_price, self.shares_held)
            self.balance += shares_sold * current_price
            self.shares_held -= shares_sold
            self.total_shares_traded -= shares_sold
            trade_info['shares'] = shares_sold
            if shares_sold > 0:
                self.trades.append(trade_info)

    def _calculate_reward(self, expected_price, actual_price, transaction_time, transaction_cost):
        slippage = expected_price - actual_price
        time_penalty = 100 * transaction_time / 1e9
        reward = - (slippage + time_penalty + transaction_cost)
        return reward

    def _calculate_transaction_cost(self, volume, volatility, daily_volume):
        return volatility * np.sqrt(volume / daily_volume)

    def run(self):
        self.reset()
        for _ in range(len(self.data) - 1):
            scaled_obs = self._get_scaled_observation()
            with torch.no_grad():
                obs_tensor = torch.tensor(scaled_obs, dtype=torch.float32).unsqueeze(0).unsqueeze(0).to(self.device)  # Add batch and sequence dimension
                action_probs = self.model(obs_tensor)
                action = torch.argmax(action_probs, dim=1).item()
            obs, reward, done, info = self.step(action)
            print(f"Step: {info['step']}, Action: {info['action']}, Price: {info['price']}, Shares: {info['shares']}, Reward: {reward}")
            if done:
                break
        return self.cumulative_reward, self.trades

    def render(self, mode='human', close=False):
        print(f'Step: {self.current_step}')
        print(f'Balance: {self.balance}')
        print(f'Shares held: {self.shares_held}')
        print(f'Total shares traded: {self.total_shares_traded}')
        print(f'Total portfolio value: {self.balance + self.shares_held * self.data.iloc[self.current_step]["Close"]}')
        print(f'Cumulative reward: {self.cumulative_reward}')
        self.print_trades()

    def print_trades(self):
        trades_df = pd.DataFrame(self.trades)
        trades_df.to_csv('trades_transformer.csv', index=False)
        #for trade in self.trades:
        #    print(f"Step: {trade['step']}, Timestamp: {trade['timestamp']}, Action: {trade['action']}, Price: {trade['price']}, Shares: {trade['shares']}, Reward: {trade['reward']}, Transaction Cost: {trade['transaction_cost']}, Slippage: {trade['slippage']}, Time Penalty: {trade['time_penalty']}")
