In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (15,5)
plt.rcParams['axes.grid'] = False
import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})

In [4]:
from tqdm import tqdm
from functools import partial
from Utils.add_features import add_fisher, add_inverse_fisher, add_constance_brown
from Utils.add_features import max_over_lookback, min_over_lookback, sma, x, shift
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from Optimisers.Optimiser import Optimiser

In [5]:
from Data.data_retrieval import get_data
data = get_data('.NSEI', 'D')

Adding target forward returns

In [6]:
lookforwards = [3,5,10,21,45]

In [7]:
for i in range(1,max(lookforwards)):
    data[f"FReturn{i}"] = data["Close"].shift(-i)/data["Close"]-1
for lookforward in lookforwards:
    data[f"MaxFReturn{lookforward}"] = data[[f"FReturn{i}" for i in range(1,lookforward)]].max( axis=1)
    data[f"MinFReturn{lookforward}"] = data[[f"FReturn{i}" for i in range(1,lookforward)]].min( axis=1)
data = data.drop(columns=[f"FReturn{i}" for i in range(1,max(lookforwards)) if i not in lookforwards])
data.dropna(inplace=True)

Defining F and G

In [8]:
max_lookback = 400
f_functions = [add_fisher, add_inverse_fisher, add_constance_brown]
g_functions = [x, max_over_lookback, min_over_lookback, sma, shift]

MCMC

In [9]:
def prior(self, params):
    if (params[0] < 0) | (params[0] > len(f_functions)):
        return 0
    if (params[1] < 1) | (params[1] > max_lookback):
        return 0
    if (params[2] < 0) | (params[2] > len(g_functions)):
        return 0
    if (params[3] < 1) | (params[3] > max_lookback):
        return 0
    return 1

def alpha(args):
    df = data.copy()
    f_function = f_functions[args[0]]
    f_lookback = round(args[1])
    g_function = g_functions[args[2]]
    g_lookback = round(args[3])

    df = f_function([df, f_lookback])
    f = df.columns[-1]
    df[f] = MinMaxScaler().fit_transform(df[[f]])
    df = df.iloc[max_lookback:].reset_index(drop=True)
    df = g_function(df, f, g_lookback)
    g = df.columns[-1]
    df = df.iloc[max_lookback:].reset_index(drop=True)
    return df[g].to_numpy()

In [10]:
guess_length = 3
guess_list = [[np.random.randint(0,len(f_functions)), np.random.randint(0,max_lookback+1),
               np.random.randint(0,len(g_functions)), np.random.randint(0,max_lookback+1)] for i in range(10)]


In [11]:
guess = guess_list[0]
target = "MaxFReturn45"

In [12]:
guess

[1, 93, 2, 86]

In [13]:
opt = Optimiser(method="MCMC")
opt.define_alpha_function(alpha)
opt.define_prior(prior)
opt.define_guess(guess=guess)
opt.define_iterations(5)
opt.define_optim_function(None)
opt.define_target(data[target].iloc[2*max_lookback:].to_numpy())
opt.define_lower_and_upper_limit(0, max_lookback+1)
mc, rs = opt.optimise()

[(0, (-1000, 1000)), (1, (-1000, 1000)), (2, (-1000, 1000)), (3, (-1000, 1000)), ...]
[((0, 1, 2, 3, 4, 5), 100000), ...]
[0.2, 2.0, 0.2, 2.0, ...]


IndexError: list index out of range

In [None]:
res_iter = []
for i in range(100):
    d = {}
    for j in range(guess_length):
        key = params[j]
        val = mc.analyse_results(rs, top_n=100)[0][i][j]
        d[key] = val
        d.update({'NMIS': mc.analyse_results(rs, top_n=100)[1][i]})
    res_iter.append(d)
res_iter = pd.DataFrame(res_iter)
res = pd.concat([res, res_iter], axis=0)


In [None]:
res = pd.DataFrame(columns=params + ["NMIS"])
for guess in guess_list:
    opt = Optimiser(method="MCMC")
    opt.define_alpha_function(alpha_alias)
    opt.define_prior(prior)
    opt.define_guess(guess=guess)
    opt.define_iterations(100)
    opt.define_optim_function(None)
    opt.define_target(df[target].to_numpy())
    opt.define_lower_and_upper_limit(0, 1)
    mc, rs = opt.optimise()
    res_iter = []
    for i in range(100):
        d = {}
        for j in range(guess_length):
            key = params[j]
            val = mc.analyse_results(rs, top_n=100)[0][i][j]
            d[key] = val
            d.update({'NMIS': mc.analyse_results(rs, top_n=100)[1][i]})
        res_iter.append(d)
    res_iter = pd.DataFrame(res_iter)
    res = pd.concat([res, res_iter], axis=0)

res = res.sort_values("NMIS", axis=0, ascending=False).reset_index(drop=True)
max_val = res.loc[0, 'NMIS']
print(f" The maximum NMI is {max_val}")
if max_val < threshold:
    chosen_weights = pd.DataFrame([0 for param in params])
else:
    chosen_weights = pd.DataFrame([res.iloc[0][param] for param in params])


Add function F

In [None]:
f_functions = [add_fisher, add_inverse_fisher, add_constance_brown]
f_columns = []`
for lookback in tqdm(range(5,400,1)):
    for t in f_functions:
        data = t([data, lookback])
        f_columns.append((data.columns[-1]))
data["ConstanceBrown"] = (data["ConstanceBrown"]-data["ConstanceBrown"].mean())/data["ConstanceBrown"].std()
data = data.iloc[lookback:].reset_index(drop=True)

Add function G

In [None]:
g_functions = [x, max_over_lookback, min_over_lookback, sma, shift]
for t in g_functions:
    for f in tqdm(f_columns):
        for lookback in range(5,400,1):
            data = t(data, f, lookback)
data = data.iloc[lookback:].reset_index(drop=True)

In [None]:
import pickle
with open(f'Features.pkl', 'wb') as file:
    pickle.dump(data, file)

Defining MCMC  functions

In [None]:
def add_feature(args):

    f_lookback =


Adding f(x): Fisher

Adding g(x):

In [None]:
from tqdm import tqdm
from Utils.add_features import max_over_lookback, min_over_lookback, sma, x


In [None]:
mc = MCMC(alpha_fn=alpha_fn, alpha_fn_params_0=alpha_fn_params_0, target=target, num_iters=num_iters, prior=prior,
              optimize_fn=optimize_fn, lower_limit=lower_limit, upper_limit=upper_limit)
rs = mc.optimize()

In [None]:
data = data.iloc[lookback:].reset_index(drop=True)

In [None]:
features = [feature for feature in data.columns if "Fisher" in feature]

In [None]:
from Utils.utils import calc_NMI
res = []
for feature in tqdm(features):
    res.append({"feature": feature, "NMI": calc_NMI(data[feature], data["FReturn"])})

Reinforcement Learning Returns

In [None]:
from stable_baselines3 import A2C, PPO
from stable_baselines3.common.vec_env import DummyVecEnv
import gym
from gym import spaces
import numpy as np
import random

In [None]:
class StockTradingEnv(gym.Env):

    def __init__(self, df, features, init_account_balance, window_shape, random_start = False):
        # initialize environment
        super(StockTradingEnv, self).__init__()
        # raw dataset
        self.df = df
        # how much $ do we have?
        self.init_account_balance = init_account_balance
        self.balance = self.init_account_balance
        self.net_worth = self.init_account_balance

        # Actions of the format Buy x%, Sell x%, Hold, etc.
        self.window_shape = window_shape
        self.action_space = spaces.Discrete(2)
        self.random_start = random_start

        # current step, starts with window size or random
        if not self.random_start:
            self.current_step = self.window_shape
        else:
            self.current_step = random.randint(self.window_shape, len(self.df) - self.window_shape)

        self.signal_features = features

        # process data initially
        self.process_data()

        # share costs
        self.in_position = False
        self.position_value = 0.0
        self.price_bought = 0.0
        self.bet_bought = 0.0

    def reset(self):
        # Reset the state of the environment to an initial state
        self.balance = self.init_account_balance
        self.net_worth = self.init_account_balance
        if not self.random_start:
            self.current_step = self.window_shape
        else:
            self.current_step = random.randint(self.window_shape, len(self.df) - self.window_shape)
        self.in_position = False
        self.position_value = 0.0
        self.price_bought = 0.0
        self.bet_bought = 0.0
        return self.get_observation()

    def process_data(self):
        self.start, self.end = self.window_shape, len(self.df)
        self.prices = self.df.iloc[self.start:self.end]
        self.signal_features = self.signal_features.iloc[self.start:self.end]
        # Prices contains the OHCL values for the last five prices
        self.observation_space = spaces.Box(
            low=-1, high=1, shape=(self.window_shape, self.signal_features.shape[-1]), dtype=np.float16
        )

    def get_observation(self):
        market_state = self.signal_features.iloc[self.current_step-self.window_shape:self.current_step]
        return market_state

    def step(self, action):
        self.take_action(action)
        self.current_step += 1

        if self.current_step > len(self.df) - self.window_shape:
            self.current_step = self.window_shape

        reward = self.net_worth

        done = False
        if (self.current_step >= (len(self.df) - self.window_shape)):  #(self.net_worth <= 0) or
            done = True

        obs = self.get_observation()
        return obs, reward, done, {}

    def take_action(self, action):

        # Set the current price to a random price within the time step
        current_price = self.df.iloc[self.current_step]["Close"]
        self.current_price = current_price

        if not self.in_position:
            if action == 1: # OPEN LONG
                self.in_position = True
                self.price_bought = current_price
                self.bet_bought = self.balance
                self.balance -= self.bet_bought
                self.position_value = self.bet_bought
            else: # KEEP LOOKING
                pass
        else:
            market_return = ((current_price - self.price_bought) / self.price_bought)
            if action == 1: # HOLD LONG
                self.position_value = self.bet_bought * (1.0 + market_return)
            else: # CLOSE LONG
                self.balance += self.bet_bought * (1.0 + market_return)
                self.in_position = False
                self.price_bought = 0.0
                self.bet_bought = 0.0
                self.position_value = 0.0

        self.net_worth = self.balance + self.position_value

    def render(self, mode='human'):
        return {
            'step': self.current_step,
            'price': self.current_price,
            'balance': self.balance,
            'position': self.position_value,
            'net_worth': self.net_worth,
            'profit': self.net_worth - self.init_account_balance
        }

In [None]:
def evaluate_agent(env, df, model):
    obs = env.reset()
    history = {
        'balance': [],
        'action': [],
        'position': [],
        'net_worth': [],
        'price': []
    }

    for i in range(len(df) - HISTORICAL_STATES):
        action, _states = model.predict(obs, deterministic=True)
        obs, reward, done, info = env.step(action)
        if done[0] == True:
            break
        state = env.render()
        history['action'].append(action[0])
        history['balance'].append(state['balance'])
        history['net_worth'].append(state['net_worth'])
        history['position'].append(state['position'])
        history['price'].append(state['price'])

    actions_over_time = np.array(history['action'])
    return history, actions_over_time


In [None]:
df = data.copy()
for feature in tqdm(features):
    print("*"*100)
    print(feature)
    INIT_NET_WORTH = 10000
    HISTORICAL_STATES = 5
    LR = 0.001
    RANDOM_SEED = 11111
    train_percent = 0.8

    signal_features = pd.DataFrame(index = df.index)
    signal_features[feature] = df[feature]

    train_df = df.iloc[:int(train_percent*len(df))].reset_index(drop=True)
    test_df = df.iloc[int(train_percent*len(df)):].reset_index(drop=True)
    train_signal_features = signal_features.iloc[:int(train_percent*len(signal_features))].reset_index(drop=True)
    test_signal_features = signal_features.iloc[int(train_percent*len(signal_features)):].reset_index(drop=True)
    N_TIME_STEPS = len(train_df) - HISTORICAL_STATES
    env_train = DummyVecEnv([lambda: StockTradingEnv(train_df, train_signal_features, INIT_NET_WORTH, HISTORICAL_STATES)])
    model = A2C('MlpPolicy', env_train, verbose=0, learning_rate=LR, seed=RANDOM_SEED)
    model.learn(total_timesteps=N_TIME_STEPS, log_interval=100)
    history, actions_over_time = evaluate_agent(env_train, train_df, model)
    env_test = DummyVecEnv([lambda: StockTradingEnv(test_df, test_signal_features, INIT_NET_WORTH, HISTORICAL_STATES)])
    history_test, actions_over_time_test = evaluate_agent(env_test, test_df, model)
    for r in res:
        if r['feature']==feature:
            r['RL_Test_Return'] = history_test['net_worth'][-1]/history_test['net_worth'][0]-1