In [None]:
import gym
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import pandas as pd
import plotly.graph_objects as go
from sklearn.preprocessing import StandardScaler
import random

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import random

In [4]:
data = pd.read_csv('000300.SH.csv', index_col = 0, parse_dates = True)
data.tail()

Unnamed: 0,OPEN,HIGH,LOW,CLOSE,VWAP,VOLUME,AMT,TURN,TOTAL_SHARES,FREE_FLOAT_SHARES,MKT_CAP_ARD,MKT_FREESHARES,PE_TTM,VAL_PB_WGT,DIVIDENDYIELD2
2024-10-25,3931.3205,3992.8918,3921.9396,3956.421,19.565324,21069488500,412231000000.0,0.6671,4175790000000.0,1075110000000.0,56171950000000.0,18350750000000.0,12.8649,1.3826,2.924
2024-10-28,3953.1063,3964.1569,3917.2042,3964.1569,18.062983,21616041500,390450000000.0,0.6844,4175870000000.0,1075170000000.0,56158900000000.0,18387280000000.0,12.8785,1.378,2.8955
2024-10-29,3970.008,3991.3091,3921.5795,3924.649,19.771602,21729398400,429625000000.0,0.688,4175840000000.0,1076030000000.0,55665600000000.0,18200500000000.0,12.8074,1.3629,2.9242
2024-10-30,3904.1595,3926.0581,3864.3051,3889.4487,17.545411,21943662600,385011000000.0,0.6948,4175850000000.0,1074650000000.0,55079500000000.0,18039530000000.0,12.6339,1.3415,2.9597
2024-10-31,3888.4229,3923.8997,3859.1079,3891.0396,17.148066,30063671800,515534000000.0,0.9519,4175850000000.0,1081020000000.0,55029500000000.0,18052510000000.0,12.391,1.3163,2.9679


In [5]:
WINDOWS = {"3y": 756, "1y": 252, "6m": 126, "3m": 63, "1m": 21, "2w": 10}

new_columns = {}
for col in data.columns:
    for window_name, window in WINDOWS.items():
        rolling_mean = data[col].rolling(window=window).mean()
        rolling_std = data[col].rolling(window=window).std()

        z_score = (data[col] - rolling_mean) / rolling_std
        z_score = z_score.clip(lower=-10, upper=10)

        new_columns[f"{col}_{window_name}_Z"] = z_score

data = pd.concat([data, pd.DataFrame(new_columns)], axis = 1)

data["DAILY_RETURN"] = data["OPEN"].pct_change()
data["DAILY_RETURN_DIFF1"] = data["DAILY_RETURN"].diff(1)
data["VOLUME_DIFF1"] = data["VOLUME"].diff(1)
data["VOLUME_DIFF2"] = data["VOLUME"].diff(2)

In [6]:
open_prices = data["OPEN"].values

for n in [3, 5, 7, 10, 15]:
    max_dailyized_returns = []

    for i in range(len(open_prices)):
        if i < n:
            max_dailyized_returns.append(np.nan)
            continue

        max_ret = float("-inf")
        for k in range(1, n + 1):  # subwindow length
            for offset in range(i - n, i - k + 1):
                if offset < 0:
                    continue
                p_start = open_prices[offset]
                p_end = open_prices[offset + k]
                if p_start <= 0:
                    continue
                dailyized_ret = (p_end / p_start) ** (1 / k) - 1
                max_ret = max(max_ret, dailyized_ret)

        max_dailyized_returns.append(max_ret)

    data[f"MAX_DAILYIZED_{n}"] = max_dailyized_returns

data.dropna(inplace = True)

In [8]:
data.tail()

Unnamed: 0,OPEN,HIGH,LOW,CLOSE,VWAP,VOLUME,AMT,TURN,TOTAL_SHARES,FREE_FLOAT_SHARES,...,DIVIDENDYIELD2_2w_Z,DAILY_RETURN,DAILY_RETURN_DIFF1,VOLUME_DIFF1,VOLUME_DIFF2,MAX_DAILYIZED_3,MAX_DAILYIZED_5,MAX_DAILYIZED_7,MAX_DAILYIZED_10,MAX_DAILYIZED_15
2024-10-25,3931.3205,3992.8918,3921.9396,3956.421,19.565324,21069488500,412231000000.0,0.6671,4175790000000.0,1075110000000.0,...,0.157109,-0.004955,-0.003838,3597082000.0,-3517906000.0,0.008047,0.044067,0.044067,0.044067,0.156946
2024-10-28,3953.1063,3964.1569,3917.2042,3964.1569,18.062983,21616041500,390450000000.0,0.6844,4175870000000.0,1075170000000.0,...,-1.22925,0.005542,0.010497,546553000.0,4143635000.0,0.005542,0.008047,0.044067,0.044067,0.156946
2024-10-29,3970.008,3991.3091,3921.5795,3924.649,19.771602,21729398400,429625000000.0,0.688,4175840000000.0,1076030000000.0,...,0.053689,0.004276,-0.001266,113356900.0,659909900.0,0.005542,0.008047,0.044067,0.044067,0.044067
2024-10-30,3904.1595,3926.0581,3864.3051,3889.4487,17.545411,21943662600,385011000000.0,0.6948,4175850000000.0,1074650000000.0,...,1.41724,-0.016586,-0.020862,214264200.0,327621100.0,0.005542,0.005542,0.008047,0.044067,0.044067
2024-10-31,3888.4229,3923.8997,3859.1079,3891.0396,17.148066,30063671800,515534000000.0,0.9519,4175850000000.0,1081020000000.0,...,1.825202,-0.004031,0.012556,8120009000.0,8334273000.0,0.004276,0.005542,0.008047,0.044067,0.044067


<br><br><br><br><br><br><br><br><br><br>

# CSI 300 Trading Environment

In [None]:
class CSI300TradingEnv(Env):

    def __init__(
        self,
        data: pd.DataFrame,
        window_size: int = 1,                # Number of past observations for a filtration
        lookback: int = 50,                  # Number of past actions and returns to consider for reward calculation
        ignored: int = 3,                    # Number of most recent actions/returns to ignore in reward calculation
        mode: str = "train",                 # 'train', 'test', 'oos'
        transaction_cost: float = 0.0015,
        gamma: float = 0.99,
        reward_phase: int = 1,
    ):


        self.data = data
        self.window_size = window_size
        self.lookback = lookback
        self.ignored = ignored
        self.mode = mode
        self.transaction_cost = transaction_cost
        self.gamma = gamma
        self.reward_phase = reward_phase


        self.action_space = Discrete(2)
        self.action_mapping = {0: -1, 1: 1}
        self.action_label = {0: 'liquidate', 1: 'enter'}


        # Filtration Shape TODO
        self.observation_space = None


        self.index_position = self.window_size
        self.position = -1
        self.done = False


        self.position_history = []
        self.price_history = []
        self.next_price = None
        self.cumulative_log_return = 0.0


        self.data_all = data.copy(deep = True)
        self.splits = {
            'train': (0, int(0.75 * len(self.data_all))),
            'test': (int(0.75 * len(self.data_all)), int(0.9 * len(self.data_all))),
            'oos': (int(0.9 * len(self.data_all)), len(self.data_all))
        }


        train_start, train_end = self.splits['train']
        scaler = StandardScaler()
        scaler.fit(self.data_all.iloc[train_start:train_end])
        scaled_values = scaler.transform(self.data_all)
        self.data_all = pd.DataFrame(scaled_values, columns=self.data_all.columns, index=self.data_all.index)

        self._set_data_for_mode()


        # debug
        self.price_history_dated  []
        self.position_history_dated = []

<br>

In [None]:
def _set_data_for_mode(self):
    start, end = self.splits[self.mode]
    self.data = self.data_all.iloc[start:end]

CSI300TradingEnv._set_data_for_mode = _set_data_for_mode

<br>

In [None]:
def set_mode(self, mode: str):
    if mode not in self.splits:
        raise ValueError(f"Invalid mode: {mode}. Must be one of {list(self.splits.keys())}")
    self.mode = mode
    self._set_data_for_mode()
    self.reset()

CSI300TradingEnv.set_mode = set_mode

<br>

In [None]:
def set_lookback(self, new_lookback: int):
    if new_lookback < 1:
        raise ValueError("Lookback must be at least 1")
    self.lookback = new_lookback

CSI300TradingEnv.set_lookback = set_lookback

<br>

In [None]:
def reset(self):
    self.index_position = self.window_size
    self.position = -1
    self.done = False
    self.position_history = []
    self.price_history = []
    self.next_price = None
    self.cumulative_log_return = 0.0

    self.price_history_dated = []
    self.position_history_dated = []

    return self._get_observation()

CSI300TradingEnv.reset = reset

<br>

In [None]:
def _get_observation(self):
    indices = self.data.index[self.index_position - self.window_size : self.index_position]
    window = self.data.loc[indices]
    return window.values.flatten().astype(np.float32)

CSI300TradingEnv._get_observation = _get_observation

<br>

In [None]:
def reward(self, reward_phase: int):
    reward = 0.0

    if reward_phase not in {1, 2, 3}:
        raise ValueError(f"Invalid reward_phase: {reward_phase}. Must be 1, 2, or 3.")


    # Phase 1
    if reward_phase == 1:
        if len(self.price_history) < 1 or self.next_price is None:
            return 0

        today_price = self.price_history[-1]
        next_price = self.next_price

        log_ret = np.log(next_price / today_price)
        reward = self.position * log_ret

        if len(self.position_history) >= 2:
            prev_position = self.position_history[-2]
            if prev_position != self.position:
                reward -= self.transaction_cost


    # Phase 2
    if reward_phase == 2:
        if len(self.price_history) < self.lookback or self.next_price is None:
            return 0

        reward_window = self.lookback - self.ignored

        returns = [
            ((self.next_price / self.price_history[i]) - 1) * self.position_history[i]
            for i in range(reward_window)
        ]

        discounts = [self.gamma ** i for i in range(reward_window)][::-1]
        reward = sum(r * d for r, d in zip(returns, discounts))


    # Phase 3
    if reward_phase == 3:
        if len(self.price_history) < self.lookback or self.next_price is None:
            return 0

        # TODO



    return reward

CSI300TradingEnv.reward = reward

<br>

In [None]:
def step(self, action):


<br><br><br><br><br><br><br><br><br><br>

# Shallow NN Trader

In [1]:
class Shallow_NN_Agent:
    def __init__(self, input_dim, hidden_dims = [128, 64, 32], action_dim = 3, lr = 1e-3, gamma = 0.99, lookback = 50, ignored = 5):
        self.gamma = gamma
        self.lookback = lookback
        self.ignored = ignored
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

        layers = []
        prev_dim = input_dim

        for hidden_dim in hidden_dims:
            layers.append(nn.Linear(prev_dim, hidden_dim))
            layers.append(nn.ReLU())
            prev_dim = hidden_dim

        layers.append(nn.Linear(prev_dim, 1))
        layers.append(nn.Sigmoid())

        self.net = nn.Sequential(*layers).to(self.device)
        self.optimizer = optim.Adam(self.net.parameters(), lr = lr)

        self.history = []

    def act(self, filtration, do_debug):
        x = torch.tensor(filtration, dtype=torch.float32, device=self.device)
        prob = self.net(x).squeeze()
        dist = torch.distributions.Bernoulli(probs=prob)
        action = dist.sample()

        if do_debug:
            print(f"Probs: {prob.detach().cpu().numpy()}, Action: {action.item()}")


        self.history.append((x, action, dist.log_prob(action)))

        return int(action.item())

    def update(self, reward):
        if len(self.history) < self.lookback:
            return

        useful = self.history[:self.lookback - self.ignored]

        loss = 0
        n = len(useful)

        for t, (x, action, log_prob) in enumerate(useful):
            discounted = reward * (self.gamma ** (n - 1 - t))
            loss -= log_prob * discounted

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        self.history.clear()