In [1]:
import pandas as pd
%load_ext autoreload
%autoreload 2

In [2]:
import warnings
warnings.filterwarnings("ignore")

In [3]:

%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (15,5)
plt.rcParams['axes.grid'] = False
import seaborn as sns
sns.set_style("whitegrid", {'axes.grid' : False})

In [4]:
from tqdm import tqdm
import numpy as np
from V8Suprabhash import get_stock_data, add_features,  plot_performance

In [5]:
from keras.models import Sequential
from keras.layers import InputLayer
from keras.layers import Dense
########################## Changes made here ##################################
def get_model(num_features, num_actions, num_dense_layers, neurons_per_layer):
    model = Sequential()
    model.add(InputLayer(batch_input_shape=(1, num_features+num_actions)))
    for i in range(num_dense_layers):
        model.add(Dense(neurons_per_layer, activation='relu'))
    model.add(Dense(num_actions, activation='sigmoid'))
    model.compile(loss='mse', optimizer='adam', metrics=['mae'])
    return model
###############################################################################

In [6]:
features = [
    {"feature": "Close_as_a_feature", "lookback": 0},
    {"feature": "diff_of_close", "lookback": 1},
]

#Experiment params
ticker = 'sinx'#'NIFc1'
tune = False
train_percent = 0.8
state_lookback = 1
num_dense_layers = 2
num_dense_layers_by_num_features = 2
alpha = 0.5
epsilon = 0.1
gamma = 0.1
episodes = 100
metric = "percent"

############################################################################################################################################

#Get data
df = get_stock_data(ticker)
df = add_features(df, features, state_lookback, 0.8)
train_df = df.iloc[:int(0.8 * len(df)), :]
test_df = df.iloc[int(0.8 * len(df)):, :]


########################## Changes made here ##################################
all_actions = {0: 'percentage_invested_in_equity'}
###############################################################################

model = get_model(len(features)*state_lookback, len(all_actions), num_dense_layers, len(features)*state_lookback*num_dense_layers_by_num_features)
# visualizer(model, format='png', view=True)

In [None]:
def train_q_learning(train, state_lookback, model, alpha, epsilon, gamma, episodes, all_actions, metric, features, number_of_random_samples, plot=True):

    train_data = train.copy()
    returns_vs_episodes = []
    best_episode_return = 0
    weights_best_performing = None

    arr = np.empty(shape=(0,len(features)*state_lookback+len(all_actions)))
    arr1 = train_data[[feature['feature'] for feature in features] + [f"{col}_shift{i}" for col in [feature['feature'] for feature in features] for i in range(1, state_lookback)]].values
    for i in range(len(arr1)):
        random_actions = np.random.uniform(0,1,number_of_random_samples)
        for j in range(number_of_random_samples):
            arr = np.vstack((arr,np.r_[arr1[i], random_actions[j]]))

    for ii in tqdm(range(episodes)):

        #Backtester initialisation
        balance = 10000
        net_worth = balance
        in_position = False
        number_of_units_in_position = 0
        position_value = 0.0
        actions_history = []
        equity_curve = []
        rewards = []
        states = []
        prices = []
        current_q_all_states = []
        next_q_all_states = []

        q = model.predict(arr)
        current_qs = []
        next_qs = []
        actions = []
        for i in range(len(arr1)):
            q_list = []
            next_q_list = []
            for j in range(number_of_random_samples):
                q_list.append(q[i*number_of_random_samples+j])
            for j in range(number_of_random_samples):
                next_q_list.append(q[(i+1)*number_of_random_samples+j])
            next_qs.append(max(next_q_list))
            current_qs.append(max(q_list))
            actions.append(arr[q_list.index(max(q_list))][-1])

        current_qs = current_qs.reshape(-1,1)
        next_qs = next_qs.reshape(-1,1)
        actions = actions.reshape(-1,1)

        for i in range(1,len(train_data)):
            current_adj_close = train_data.iloc[i]["Close"]
            last_day_adj_close = train_data.iloc[i - 1]["Close"]
            prices.append(current_adj_close)
            states.append(arr[i])
            current_q_all_states.append(current_qs[i][0])

            # decide action
            if epsilon > 0.1:
                epsilon = epsilon / 1.2

            if np.random.uniform(0, 1) < epsilon:
                action = np.random.uniform(0,1)
            else:
                action= actions[i][0]

            actions_history.append(action)

            if not in_position:
                if action == 1:  # OPEN LONG
                    in_position = True
                    number_of_units_in_position = balance/current_adj_close
                    balance = balance - (number_of_units_in_position*current_adj_close)
                    position_value = number_of_units_in_position*current_adj_close
                    net_worth = balance + position_value
                    equity_curve.append(net_worth)
                    rewards.append(0)
                else:
                    net_worth = balance + position_value
                    equity_curve.append(net_worth)
                    rewards.append(0)
            else:
                if action == 1:  # HOLD LONG
                    position_value = number_of_units_in_position*current_adj_close
                    net_worth = balance + position_value
                    equity_curve.append(net_worth)
                    try:
                        if metric == "absolute":
                            rewards.append(equity_curve[-1] - equity_curve[-2])
                        else:
                            rewards.append((equity_curve[-1] - equity_curve[-2]) / equity_curve[-2])
                    except:
                        rewards.append(0)
                else:  # CLOSE LONG
                    balance = balance + (number_of_units_in_position*current_adj_close)
                    in_position = False
                    position_value = 0.0
                    number_of_units_in_position = 0
                    net_worth = balance + position_value
                    equity_curve.append(net_worth)
                    rewards.append(0)

            try:
                next_q_all_states.append(next_qs[i][0])
            except:
                break

        arr_fit_X = np.empty(shape=(0,len(features)*state_lookback)+1)
        arr_fit_Y = np.empty(shape=(0,len(all_actions)))
        for state, action, reward, cq, nq in zip(states, actions_history, rewards, current_q_all_states, next_q_all_states):
            target = ((1. - alpha) * cq) + alpha * (reward + gamma * nq)
            arr_fit_X = np.vstack((arr_fit_X,np.r_[state, action]))
            arr_fit_Y = np.vstack((arr_fit_Y,np.array([target]).reshape(-1,1)))
        model.fit(arr_fit_X,arr_fit_Y,epochs=30, verbose=0)
        episode_return = equity_curve[-1]/equity_curve[0]-1
        print(f"Episode Number: {ii+1}, Total return of episode: {episode_return}")
        if plot:
            plot_performance(train_data, prices, features, actions_history, equity_curve)

        if episode_return>best_episode_return:
            weights_best_performing = model.get_weights()
            best_episode_return = episode_return

        returns_vs_episodes.append(episode_return)

    return model, returns_vs_episodes, weights_best_performing

In [None]:
number_of_random_samples = 10
model, returns_vs_episodes, weights = train_q_learning(train_df, state_lookback, model, alpha, epsilon, gamma, episodes, all_actions, metric, features, number_of_random_samples, plot=True)





In [None]:
model, returns_vs_episodes, weights = train_q_learning(train_df, state_lookback, model, alpha, epsilon, gamma, episodes, all_actions, metric, features,plot=True)

In [None]:
features = [
    {"feature": "Close_as_a_feature", "lookback": 0},
    {"feature": "diff_of_close", "lookback": 1},
]

#Experiment params
ticker = 'sinx'#'NIFc1'
tune = False
train_percent = 0.8
state_lookback = 1
num_dense_layers = 2
num_dense_layers_by_num_features = 2
alpha = 0.5
epsilon = 0.1
gamma = 0.1
episodes = 100
metric = "percent"

############################################################################################################################################

#Get data
df = get_stock_data(ticker)
save["features"] = features

df = add_features(df, features, state_lookback, 0.8)
train_df = df.iloc[:int(0.8 * len(df)), :]
test_df = df.iloc[int(0.8 * len(df)):, :]


########################## Changes made here ##################################
all_actions = {0: 'percentage_invested_in_equity'}
###############################################################################

model = get_model(len(features)*state_lookback, len(all_actions), num_dense_layers, len(features)*state_lookback*num_dense_layers_by_num_features)
# visualizer(model, format='png', view=True)

model, returns_vs_episodes, weights = train_q_learning(train_df, state_lookback, model, alpha, epsilon, gamma, episodes, all_actions, metric, features,plot=True)

In [None]:
features = [
    {"feature": "Close_as_a_feature", "lookback": 0},
    {"feature": "diff_of_close", "lookback": 1},
]

#Experiment params
ticker = 'sinx'#'NIFc1'
tune = False
train_percent = 0.8
state_lookback = 1
num_dense_layers = 2
num_dense_layers_by_num_features = 2
alpha = 0.5
epsilon = 0.1
gamma = 0.1
episodes = 100
metric = "percent"

############################################################################################################################################

#Get data
df = get_stock_data(ticker)
save["features"] = features

df = add_features(df, features, state_lookback, 0.8)
train_df = df.iloc[:int(0.8 * len(df)), :]
test_df = df.iloc[int(0.8 * len(df)):, :]


########################## Changes made here ##################################
all_actions = {0: 'percentage_invested_in_equity'}
###############################################################################

model = get_model(len(features)*state_lookback, len(all_actions), num_dense_layers, len(features)*state_lookback*num_dense_layers_by_num_features)
# visualizer(model, format='png', view=True)

model, returns_vs_episodes, weights = train_q_learning(train_df, state_lookback, model, alpha, epsilon, gamma, episodes, all_actions, metric, features,plot=True)