In [4]:
%load_ext autoreload
%autoreload 2

In [5]:
import warnings
warnings.filterwarnings("ignore")

In [6]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (15,5)
plt.rcParams['axes.grid'] = False
import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})


In [7]:
import warnings
warnings.filterwarnings("ignore")

import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (15,5)
plt.rcParams['axes.grid'] = False
import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})

import numpy as np
import itertools
import pandas as pd
from tqdm import tqdm
from keras.models import Sequential
from keras.layers import InputLayer
from keras.layers import Dense
from keras_visualizer import visualizer
import neptune.new as neptune
from datetime import datetime

import os
os.environ["PATH"] += os.pathsep + 'C:/Users/suprabhashsahu/Desktop/StrategyResearch/venv/Graphviz/bin/'

from Utils.add_features import add_fisher
from Data.data_retrieval import get_data
from Utils.neptune_ai_api_key import API_KEY
from sklearn.preprocessing import MinMaxScaler
np.random.seed(12)

In [8]:
def get_stock_data(symbol):
    if symbol == 'sinx':
        df = get_data(".NSEI", 'D')
        df.drop(columns=["Volume"], inplace=True)
        df["Close"] = df["Open"] = df["High"] = df["Low"] = np.sin(df.index / 10 ) +2
    else:
        df = get_data(symbol, 'D')
    df.set_index("Datetime", inplace=True)
    df.dropna(inplace=True)
    return df

def scale(series, lookback, train_pct):
    scaler = MinMaxScaler(feature_range=(-1, 1))
    scaler.fit(series.iloc[lookback:int(df.shape[0]*train_pct)])
    return scaler.transform(series)

def add_features(df, features):
    lookbacks = []
    for feature, lookback in [(feature["feature"], feature["lookback"]) for feature in features]:
        lookbacks.append(lookback)
        if feature.startswith("Fisher"):
            df[feature] = add_fisher([df, lookback])[[f"Fisher{lookback}"]]
        if feature.startswith("Momentum"):
            df[feature] = df["Close"].diff(lookback)
        df[f"{feature}_normalized"] = scale(df[[feature]], lookback, 0.8)
    df = df.iloc[max(lookbacks):]
    return df


# Data Prep

Get data

In [9]:
df = get_stock_data('sinx')
train_len = int(df.shape[0]*0.8)

Add Features

In [10]:
features = [
        {"feature": "Close", "lookback": 0},
        {"feature": "Momentum", "lookback": 1}
    ]

In [11]:
df = add_features(df, features)

In [12]:
train_df = df.iloc[:train_len, :]
test_df = df.iloc[train_len:, :]

# MLP as a function approximation for QMatrix

Define all actions

In [13]:
all_actions = {0: 'neutral', 1: 'long'}

In [14]:
train_df

Unnamed: 0_level_0,Close,High,Low,Open,Close_normalized,Momentum,Momentum_normalized
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2007-09-18,2.099833,2.099833,2.099833,2.099833,0.099829,0.099833,0.998750
2007-09-19,2.198669,2.198669,2.198669,2.198669,0.198665,0.098836,0.988771
2007-09-20,2.295520,2.295520,2.295520,2.295520,0.295517,0.096851,0.968912
2007-09-21,2.389418,2.389418,2.389418,2.389418,0.389415,0.093898,0.939373
2007-09-24,2.479426,2.479426,2.479426,2.479426,0.479423,0.090007,0.900447
...,...,...,...,...,...,...,...
2019-05-30,2.073410,2.073410,2.073410,2.073410,0.073405,0.099931,0.999725
2019-05-31,2.172607,2.172607,2.172607,2.172607,0.172603,0.099197,0.992387
2019-06-03,2.270080,2.270080,2.270080,2.270080,0.270076,0.097473,0.975133
2019-06-04,2.364854,2.364854,2.364854,2.364854,0.364851,0.094774,0.948136


In [15]:
def plot_performance(prices, actions_history, equity_curve):
    fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(15, 7))
    ax1.plot(prices, label='Close')
    ax1_copy = ax1.twinx()
    ax1_copy.plot(actions_history, label='Actions')
    ax2.plot(actions_history, label='Actions')
    ax2_copy = ax2.twinx()
    for feature in [feature["feature"] for feature in features]:
        ax2_copy.plot(df[feature], label=feature, color='green', ls='dotted')
    ax2_copy.axhline(0.0, ls='--', color='grey')
    ax3.plot(equity_curve, label='Net worth')
    ax3.plot([price*10000 / prices[0] for price in prices], label='Benchmark')
    ax1.legend()
    ax2.legend()
    ax3.legend()
    plt.show()

In [104]:
def get_model(num_features, state_lookback):
    model = Sequential()
    model.add(Dense(5, input_dim=num_features*state_lookback, activation='relu'))
    model.add(Dense(5, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [126]:
def act(features, model, threshold=0.2, actions_size=2):
    if np.random.uniform(0, 1) < threshold:
        action_priority = np.arange(0,actions_size)
        np.random.shuffle(action_priority)
    else:
        action = model.predict(features)   #The actual value can be used as a probability/strength of signal
        if action>0.5:
            action_priority = np.array([1,0])
        else:
            action_priority = np.array([0,1])
    return action_priority

In [127]:
def train_q_learning(train, features, state_lookback, model, alpha, epsilon, gamma, episodes, metric="absolute"):
    train_df = train.copy()
    train_data = train_df[["Close"]+[f"{feature['feature']}_normalized" for feature in features]]
    max_episode_return = 0
    model_best = None

    for ii in tqdm(range(episodes)):

        #Backtester initialisation
        balance = 10000
        net_worth = balance
        in_position = False
        position_value = 0.0
        price_bought = 0.0
        bet_bought = 0.0
        actions_history = []
        equity_curve = []
        rewards = []
        states = []
        next_states = []
        prices = []

        for i in range(len(train_data)):
            if i<state_lookback:
                continue
            current_adj_close = train_data.iloc[i]["Close"]
            prices.append(current_adj_close)
            prev_adj_close = train_data.iloc[i-1]["Close"]
            features_inp = np.expand_dims(np.array(train_data.iloc[i-state_lookback+1:i+1][[f"{feature['feature']}_normalized" for feature in features]]).reshape(1,-1)[0], axis=0)
            # decide action
            if epsilon > 0.1:
                epsilon = epsilon / 1.2

            action_priority = act(features_inp, model, threshold=epsilon, actions_size=2)
            action = action_priority[0]
            actions_history.append(action)

            if not in_position:
                if action == 1:  # OPEN LONG
                    in_position = True
                    price_bought = current_adj_close
                    bet_bought = balance
                    balance -= bet_bought
                    position_value = bet_bought
                    rewards.append(0)
                else:  # KEEP LOOKING
                    rewards.append(0)
            else:
                market_return = ((current_adj_close - price_bought) / price_bought)
                if action == 1:  # HOLD LONG
                    position_value = bet_bought * (1.0 + market_return)
                    if metric=="absolute":
                        rewards.append(bet_bought*market_return)
                    else:
                        rewards.append(market_return)
                else:  # CLOSE LONG
                    balance += bet_bought * (1.0 + market_return)
                    in_position = False
                    price_bought = 0.0
                    bet_bought = 0.0
                    position_value = 0.0
                    rewards.append(0)

            net_worth = balance + position_value
            equity_curve.append(net_worth)

            try:
                next_states.append(np.expand_dims(np.array(train_data.iloc[i-state_lookback+2:i+2][[f"{feature['feature']}_normalized" for feature in features]]).reshape(1,-1)[0], axis=0))
            except:
                break

        for state, action, reward, next_state in zip(states, actions_history, rewards, next_states):
            # update q table
            q.loc[state, action] = (1. - alpha) * q.loc[state, action] + alpha * (reward + gamma * (q.loc[next_state].max()))

            target = reward + gamma * np.max(model.predict(np.identity(env.observation_space.n)[new_state:new_state + 1]))
            target_vector = model.predict(np.identity(env.observation_space.n)[state:state + 1])[0]
            target_vector[action] = target
            model.fit(np.identity(env.observation_space.n)[state:state + 1], target_vec.reshape(-1, env.action_space.n), epochs=1, verbose=0)




    #     episode_return = equity_curve[-1]/equity_curve[0]-1
    #     if episode_return>max_episode_return:
    #         max_episode_return = episode_return
    #         q_best = q
    #     print(f"Episode Number: {ii+1}, Total return of episode: {equity_curve[-1]/equity_curve[0]-1}")
    #     plot_performance(prices, actions_history, equity_curve)
    #
    # return q_best


In [None]:
np.argmax(model.predict(np.identity(env.observation_space.n)[state:state + 1]))

In [138]:
np.identity(2)

array([[0., 1.]])

In [128]:
alpha = 0.1
epsilon = 0.1
gamma = 0.1
episodes = 10
metric = "percent"
state_lookback = 3

In [None]:
model = Sequential()
model.add(InputLayer(batch_input_shape=(1, )))
model.add(Dense(20, activation='relu'))
model.add(Dense(env.action_space.n, activation='linear'))
model.compile(loss='mse', optimizer='adam', metrics=['mae'])

In [129]:
#Print model
model = get_model(len(features), state_lookback)
# visualizer(model, format='png', view=True)

In [130]:
train_q_learning(train_df, features, state_lookback, model, alpha, epsilon, gamma, episodes, metric)

  0%|          | 0/10 [00:00<?, ?it/s]

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
1
1
1
0
1
0
0
0
0
1
1
1
1
0
0
1
1
1
1
1
1
1
1
1


Exception ignored in: <function ScopedTFGraph.__del__ at 0x000001F016456820>
Traceback (most recent call last):
  File "c:\users\suprabhashsahu\desktop\strategyresearch\venv\lib\site-packages\tensorflow\python\framework\c_api_util.py", line 54, in __del__
    self.deleter(self.graph)
KeyboardInterrupt: 


1
1
1
1
1
1


  0%|          | 0/10 [00:02<?, ?it/s]


1
1
1
0
1



KeyboardInterrupt



In [None]:
def eval_q_learning(test_data, q, state_lookback):

    test_data = test_data[["Close", "state"]]

    #Backtester initialisation
    balance = 10000
    net_worth = balance
    in_position = False
    position_value = 0.0
    price_bought = 0.0
    bet_bought = 0.0
    actions_history = []
    equity_curve = []
    rewards = []
    states = []
    prices = []

    for i, val in enumerate(np.array(test_data)):
        if i<state_lookback:
            continue
        current_adj_close, state = val
        prices.append(current_adj_close)
        prev_adj_close, _ = np.array(test_data)[i - 1]
        states.append(''.join(list(test_data.iloc[i - state_lookback:i]["state"])[::-1]))

        action_priority = act(state, q, threshold=0, actions_size=2)
        action = action_priority[0]
        actions_history.append(action)

        if not in_position:
            if action == 1:  # OPEN LONG
                in_position = True
                price_bought = current_adj_close
                bet_bought = balance
                balance -= bet_bought
                position_value = bet_bought
                rewards.append(0)
            else:  # KEEP LOOKING
                rewards.append(0)
        else:
            market_return = ((current_adj_close - price_bought) / price_bought)
            if action == 1:  # HOLD LONG
                position_value = bet_bought * (1.0 + market_return)
                if metric=="absolute":
                    rewards.append(bet_bought*market_return)
                else:
                    rewards.append(market_return)
            else:  # CLOSE LONG
                balance += bet_bought * (1.0 + market_return)
                in_position = False
                price_bought = 0.0
                bet_bought = 0.0
                position_value = 0.0
                rewards.append(0)

        net_worth = balance + position_value
        equity_curve.append(net_worth)
    plot_performance(prices, actions_history, equity_curve)


In [None]:
eval_q_learning(test_df, q, state_lookback)
