In [23]:
#CAPM model
#Weight => as in from asset allocation 
# rPortfolio = summationi(weighti (betai*rMarket)+ alphai)
#Technical indicators
# n is window size (20)
# 1. Momentum -> (price[t]/price[t-n]) -1
# 2. Simple moving average(SMA) -> (price[t]/price[t-n:t].mean())-1
# 3. Bollinger band -> (price[t] - SMA[t]/2*std[t])-1
# +2 std => value >1.0
# -2 std => value <1.0

# Range of these technical indicators
# SMA => -0.5 to +0.5
# MOMENTUM => -0.5 to +0.5
# BB -> -1.0 to +1.0
# PE ratio -> 1 to 300 (Fundamental)

# These factors need to be normalized before using the combination of these indicators
# normed = (values - mean)/values.std()

# Stock split causes price changes. Data anomoly. So we use Adjusted price.

# one should use survival bias free data

In [24]:
# ML dataset
# X- axis => price momentum, Bolinger value, current price , PE ratio
# Y- axis => future price, future return

In [25]:
import pandas as pd
import numpy as np
import math
import random
from random import sample 
import time

# import envs import TradingEnv


import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

In [26]:
def test_run(symbols,date):
    
    dates= pd.date_range(date[0],date[1])
    
    #create empty dataframe
    df1 = pd.DataFrame(index=dates)

    for symbol in symbols:
        df_temp=pd.read_csv("data/{}.csv".format(symbol),index_col="Date",
                        parse_dates=True,usecols=['Date','Adj Close'],
                        na_values=['nan'])
        #rename to prevent clash
        df_temp=df_temp.rename(columns={'Adj Close':symbol})
        df1=df1.join(df_temp,how="inner")
    return df1

In [27]:
def compute_daily_returns(df):
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:]/df[:-1].values) -1
    daily_returns.iloc[0,:]=0
    return daily_returns
def feature(df,window):
    momentum = df.copy()
    for i in range(len(df)):
        if i < window:
            momentum.iloc[i]=0
        else:
            momentum.iloc[i] = (df.iloc[i]/df.iloc[i-window]) -1
    momentum = (momentum - momentum.mean())/momentum.std()
    
    SMA = df.copy()
    for i in range(len(df)):
        if i < window:
            SMA.iloc[i]=0
        else:
            SMA.iloc[i] = (df.iloc[i]/df.iloc[i-window:i].mean()) -1
    
    SMA_std = df.copy()
    for i in range(len(df)):
        if i < window:
            SMA_std.iloc[i]=0
        else:
            SMA_std.iloc[i] = (df.iloc[i]/df.iloc[i-window:i].std()) -1
    
    BBU = SMA + SMA_std*2
    BBL = SMA - SMA_std*2
    BBU = (BBU - BBU.mean())/BBU.std()
    BBL = (BBL - BBL.mean())/BBL.std()
    
    #Calculating adjusted_close / SMA
    ad_SMA = df.copy()
    for i in range(len(df)):
        if i < window:
            ad_SMA.iloc[i]=0
        else:
            ad_SMA.iloc[i] = (df.iloc[i]/SMA.iloc[i]) -1
    return ad_SMA, momentum, BBU, BBL

In [28]:
symbols=['ITC.NS']
date=["2017-4-26","2018-10-9"]
windows = 20
df = test_run(symbols,date)
df = df.fillna(method='ffill')
df = df.fillna(method='bfill')
# print(df)

df['ad_SMA'], df['momentum'], df['BBU'], df['BBL'] = feature(df,windows)
df = df.iloc[windows:]
print(df)
print(df.shape)

                ITC.NS       ad_SMA  momentum       BBU       BBL
2017-05-25  284.594421  4584.250083  0.393744 -0.747375  0.749432
2017-05-26  293.178284  3174.252695  1.182025 -0.785800  0.788902
2017-05-29  299.580597  2683.180942  2.042001 -0.911207  0.914971
2017-05-30  295.407257  3322.968907  1.717024 -1.054889  1.057864
2017-05-31  295.739197  3520.358921  1.860412 -1.111014  1.113819
...                ...          ...       ...       ...       ...
2018-10-03  289.651154 -9946.804827 -1.314721 -0.415955  0.414872
2018-10-04  281.703400 -5426.739724 -1.413367 -0.278994  0.277128
2018-10-05  271.302643 -3270.811862 -1.784571 -0.434445  0.431503
2018-10-08  268.064667 -3015.816451 -1.981481 -0.742765  0.739610
2018-10-09  263.453003 -2661.508489 -2.244085 -0.929183  0.925674

[342 rows x 5 columns]
(342, 5)


In [46]:
%run envs.ipynb
length = len(df)
train_dataset = df[:math.floor((length*0.8))]
test_dataset = df[math.ceil((length*0.8)):]
# env = TradingEnv(train_dataset)
train_dataset.iloc[0,0]

env = TradingEnv(train_dataset)
print(env._get_val())
print(env._get_obs())

20000.0
[0, 285.0, 20000]


In [47]:
def create_model(state_shape,out_shape):
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(100, activation='relu', input_shape=state_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Dense(out_shape, activation=None)
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.005),
        loss="mean_squared_error")
    return model

In [48]:
def DQN(env_name,count_episode,count_per_episode,discount_rate,exploration_decay):
    #Generate Environment
    # action 0 Hold 1 Buy/sell depending upon holding
    state_shape = (5,)
    action_space = [0,1]
    action_shape = len(action_space)
    
    #Create initial and target Model
    model= create_model(state_shape,action_shape)
    target_model = create_model(state_shape,action_shape)
    
    #Initialize lists
    print_reward = []
    memoization_list = []
    
    #setting up exploration rates
    exploration_rate = 1
    min_exploration_rate = 0.01
    
    #Running for every episode
    for episode in range(count_episode):
        env = TradingEnv(train_dataset)
        state = [df.iloc[0]['ad_SMA'],df.iloc[0]['momentum'],df.iloc[0]['BBU'],0,0]
        state = np.array(state).reshape(1,state_shape[0])
        holding = 0
        
        #Dacaying the exploration rate after every episode
        exploration_rate =  exploration_rate * exploration_decay
        exploration_rate = max(exploration_rate,min_exploration_rate)
        
        #Total Reward will be the learning curve
        totalReward = 0
        
        for step in range(count_per_episode):

            #explore or exploit
            tmp = random.uniform(0,1)
            if tmp<exploration_rate:
                action = random.sample(action_space, 1)
#                 print("random",action)
            else:
                action = np.argmax(model.predict(state)[0])
#                 print("predicted",action)
            
            #Apply action on environment
            new_state,obs, reward,done,holding = env._step(action,holding)
#             print(holding)
            print(obs)

            new_state = np.array(new_state).reshape(1,state_shape[0])
            
            #store state action information
#             print(reward)
            totalReward +=reward
            memoization_list.append([state,action,reward,new_state,done])
            
            #Fit into model
            if(len(memoization_list)>20):
                samples = sample(memoization_list,20)
    #             eachSample = memoization_list[step]
                for eachSample in samples:
                    old_state, action, reward, new_state, done = eachSample
                    target = target_model.predict(old_state)
                    if done:
                        target[0][action] = reward
                    else:
                        expected_reward = max(target_model.predict(new_state)[0])
                        target[0][action] = reward + expected_reward * discount_rate
                    model.fit(old_state, target, epochs=1, verbose=0)
            
            #Updating weights into target model
            state = new_state
            weights = model.get_weights()
            target_weights = target_model.get_weights()
            for i in range(len(target_weights)):
                target_weights[i] = weights[i] * 0.1 + target_weights[i] * 0.9
            target_model.set_weights(target_weights)
            if done:
                break
        print_reward.append(totalReward)
        print(totalReward)
    return print_reward

In [49]:
DQN("MountainCar-v0",100,len(train_dataset)-1,0.99,0.95)

[68, 293.0, 76.0]
[0, 300.0, 20476.0]
[69, 295.0, 121.0]
[0, 296.0, 20545.0]
[68, 298.0, 281.0]
[0, 303.0, 20885.0]
[68, 306.0, 77.0]
[0, 299.0, 20409.0]
[67, 301.0, 242.0]
[0, 300.0, 20342.0]
[68, 295.0, 282.0]
[0, 293.0, 20206.0]
[68, 294.0, 214.0]
[0, 289.0, 19866.0]
[68, 291.0, 78.0]
[0, 295.0, 20138.0]
[67, 299.0, 105.0]
[0, 297.0, 20004.0]
[67, 297.0, 105.0]
[0, 300.0, 20205.0]
[67, 299.0, 172.0]
[0, 301.0, 20339.0]
[68, 297.0, 143.0]
[0, 300.0, 20543.0]
[65, 312.0, 263.0]
[0, 330.0, 21713.0]
[66, 325.0, 263.0]
[0, 319.0, 21317.0]
[65, 325.0, 192.0]
[0, 322.0, 21122.0]
[65, 321.0, 257.0]
[0, 318.0, 20927.0]
[66, 317.0, 5.0]
[0, 326.0, 21521.0]
[66, 325.0, 71.0]
[0, 313.0, 20729.0]
[75, 274.0, 179.0]
[0, 281.0, 21254.0]
[76, 279.0, 50.0]
[0, 278.0, 21178.0]
[75, 282.0, 28.0]
[0, 281.0, 21103.0]
[74, 283.0, 161.0]
[0, 278.0, 20733.0]
[73, 281.0, 220.0]
[0, 275.0, 20295.0]
[73, 277.0, 74.0]
[0, 275.0, 20149.0]
[74, 271.0, 95.0]
[0, 270.0, 20075.0]
[74, 269.0, 169.0]
[0, 264.0, 19705

[0, 251.0, 15820.0]
[62, 252.0, 196.0]
[0, 253.0, 15882.0]
[62, 256.0, 10.0]
[0, 260.0, 16130.0]
[62, 259.0, 72.0]
[0, 260.0, 16192.0]
[62, 258.0, 196.0]
[0, 258.0, 16192.0]
[64, 252.0, 64.0]
[0, 256.0, 16448.0]
[62, 263.0, 142.0]
[0, 264.0, 16510.0]
[62, 263.0, 204.0]
[0, 266.0, 16696.0]
[61, 271.0, 165.0]
[0, 271.0, 16696.0]
[63, 265.0, 1.0]
[0, 265.0, 16696.0]
[0, 261.0, 16696.0]
[63, 265.0, 1.0]
[0, 265.0, 16696.0]
[62, 269.0, 18.0]
[0, 263.0, 16324.0]
[61, 265.0, 159.0]
[0, 264.0, 16263.0]
[62, 261.0, 81.0]
[0, 260.0, 16201.0]
[63, 257.0, 10.0]
[0, 258.0, 16264.0]
[63, 257.0, 73.0]
[0, 254.0, 16075.0]
[63, 253.0, 136.0]
[0, 258.0, 16390.0]
[0, 257.0, 16390.0]
[63, 259.0, 73.0]
[0, 257.0, 16264.0]
[63, 256.0, 136.0]
[0, 255.0, 16201.0]
[63, 254.0, 199.0]
[0, 251.0, 16012.0]
[64, 247.0, 204.0]
[0, 250.0, 16204.0]
[65, 249.0, 19.0]
[0, 250.0, 16269.0]
[62, 260.0, 149.0]
[0, 259.0, 16207.0]
[62, 258.0, 211.0]
[0, 256.0, 16083.0]
[64, 251.0, 19.0]
[0, 250.0, 16019.0]
[64, 250.0, 19.0]


KeyboardInterrupt: 