In [1]:
#CAPM model
#Weight => as in from asset allocation 
# rPortfolio = summationi(weighti (betai*rMarket)+ alphai)
#Technical indicators
# n is window size (20)
# 1. Momentum -> (price[t]/price[t-n]) -1
# 2. Simple moving average(SMA) -> (price[t]/price[t-n:t].mean())-1
# 3. Bollinger band -> (price[t] - SMA[t]/2*std[t])-1
# +2 std => value >1.0
# -2 std => value <1.0

# Range of these technical indicators
# SMA => -0.5 to +0.5
# MOMENTUM => -0.5 to +0.5
# BB -> -1.0 to +1.0
# PE ratio -> 1 to 300 (Fundamental)

# These factors need to be normalized before using the combination of these indicators
# normed = (values - mean)/values.std()

# Stock split causes price changes. Data anomoly. So we use Adjusted price.

# one should use survival bias free data

In [2]:
# ML dataset
# X- axis => price momentum, Bolinger value, current price , PE ratio
# Y- axis => future price, future return

In [4]:
import pandas as pd
import numpy as np
import math
import random
from random import sample 
import time

# import envs import TradingEnv


import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

In [5]:
def test_run(symbols,date):
    
    dates= pd.date_range(date[0],date[1])
    
    #create empty dataframe
    df1 = pd.DataFrame(index=dates)

    for symbol in symbols:
        df_temp=pd.read_csv("data/{}.csv".format(symbol),index_col="Date",
                        parse_dates=True,usecols=['Date','Adj Close'],
                        na_values=['nan'])
        #rename to prevent clash
        df_temp=df_temp.rename(columns={'Adj Close':symbol})
        df1=df1.join(df_temp,how="inner")
    return df1

In [6]:
def compute_daily_returns(df):
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:]/df[:-1].values) -1
    daily_returns.iloc[0,:]=0
    return daily_returns
def feature(df,window):
    momentum = df.copy()
    for i in range(len(df)):
        if i < window:
            momentum.iloc[i]=0
        else:
            momentum.iloc[i] = (df.iloc[i]/df.iloc[i-window]) -1
    momentum = (momentum - momentum.mean())/momentum.std()
    
    SMA = df.copy()
    for i in range(len(df)):
        if i < window:
            SMA.iloc[i]=0
        else:
            SMA.iloc[i] = (df.iloc[i]/df.iloc[i-window:i].mean()) -1
    
    SMA_std = df.copy()
    for i in range(len(df)):
        if i < window:
            SMA_std.iloc[i]=0
        else:
            SMA_std.iloc[i] = (df.iloc[i]/df.iloc[i-window:i].std()) -1
    
    BBU = SMA + SMA_std*2
    BBL = SMA - SMA_std*2
    BBU = (BBU - BBU.mean())/BBU.std()
    BBL = (BBL - BBL.mean())/BBL.std()
    
    #Calculating adjusted_close / SMA
    ad_SMA = df.copy()
    for i in range(len(df)):
        if i < window:
            ad_SMA.iloc[i]=0
        else:
            ad_SMA.iloc[i] = (df.iloc[i]/SMA.iloc[i]) -1
    return ad_SMA, momentum, BBU, BBL

In [24]:
symbols=['ITC.NS']
date=["2011-4-26","2018-10-9"]
windows = 20
df = test_run(symbols,date)
df = df.fillna(method='ffill')
df = df.fillna(method='bfill')
# print(df)

df['ad_SMA'], df['momentum'], df['BBU'], df['BBL'] = feature(df,windows)
df = df.iloc[windows:]
print(df)
print(df.shape)

                ITC.NS        ad_SMA  momentum       BBU       BBL
2011-05-24  101.002144 -19659.685019 -0.635267  0.327114 -0.327598
2011-05-25  102.164253  13496.396798 -0.617832  0.457756 -0.457674
2011-05-26  101.029694 -41904.551441 -0.647868  0.730958 -0.731301
2011-05-27  102.083214  10978.958091 -0.519811  0.998936 -0.998751
2011-05-30  101.353844  33823.451982 -0.534894  1.339930 -1.340004
...                ...           ...       ...       ...       ...
2018-10-03  289.651154  -9946.804827 -1.519744 -0.415711  0.414134
2018-10-04  281.703400  -5426.739724 -1.625232 -0.241255  0.238684
2018-10-05  271.302643  -3270.811862 -2.022178 -0.439263  0.435317
2018-10-08  268.064667  -3015.816451 -2.232745 -0.831990  0.827763
2018-10-09  263.453003  -2661.508489 -2.513560 -1.069444  1.064760

[1824 rows x 5 columns]
(1824, 5)


In [25]:
%run envs.ipynb
length = len(df)
train_dataset = df[:math.floor((length*0.8))]
test_dataset = df[math.ceil((length*0.8)):]
# env = TradingEnv(train_dataset)
train_dataset.iloc[0,0]

env = TradingEnv(train_dataset)
print(env._get_val())
print(env._get_obs())

20000.0
[0, 101.0, 20000]


In [26]:
def create_model(state_shape,out_shape):
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(100, activation='relu', input_shape=state_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Dense(out_shape, activation=None)
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.005),
        loss="mean_squared_error")
    return model

In [27]:
def DQN(env_name,count_episode,count_per_episode,discount_rate,exploration_decay):
    #Generate Environment
    # action 0 Hold 1 Buy/sell depending upon holding
    state_shape = (5,)
    action_space = [0,1]
    action_shape = len(action_space)
    
    #Create initial and target Model
    model= create_model(state_shape,action_shape)
    target_model = create_model(state_shape,action_shape)
    
    #Initialize lists
    print_reward = []
    memoization_list = []
    
    #setting up exploration rates
    exploration_rate = 1
    min_exploration_rate = 0.01
    
    #Running for every episode
    for episode in range(count_episode):
        env = TradingEnv(train_dataset)
        state = [df.iloc[0]['ad_SMA'],df.iloc[0]['momentum'],df.iloc[0]['BBU'],0,0]
        state = np.array(state).reshape(1,state_shape[0])
        holding = 0
        
        #Dacaying the exploration rate after every episode
        exploration_rate =  exploration_rate * exploration_decay
        exploration_rate = max(exploration_rate,min_exploration_rate)
        
        #Total Reward will be the learning curve
        totalReward = 0
        
        for step in range(count_per_episode):

            #explore or exploit
            tmp = random.uniform(0,1)
            if tmp<exploration_rate:
                action = random.sample(action_space, 1)
#                 print("random",action)
            else:
                action = [np.argmax(model.predict(state)[0])]
#                 print("predicted",action)
            
            #Apply action on environment
            new_state,obs, reward,done,holding = env._step(action,holding)
#             print(holding)
            print(obs)

            new_state = np.array(new_state).reshape(1,state_shape[0])
            
            #store state action information
#             print(reward)
            totalReward +=reward
            memoization_list.append([state,action,reward,new_state,done])
            
            #Fit into model
            batch_size =20
            if(len(memoization_list)>batch_size):
                """ vectorized implementation; 30x speed up compared with for loop """
                minibatch = random.sample(memoization_list, batch_size)
                old_state = np.array([tup[0][0] for tup in minibatch])
                actions = np.array([tup[1] for tup in minibatch])
                rewards = np.array([tup[2] for tup in minibatch])
                next_states = np.array([tup[3][0] for tup in minibatch])
                done = np.array([tup[4] for tup in minibatch])
                
                # Q(s', a)
                target = rewards + discount_rate * np.amax(target_model.predict(next_states), axis=1)
                # end state target is reward itself (no lookahead)
                target[done] = rewards[done]

                # Q(s, a)
                target_f = target_model.predict(old_state)
                # make the agent to approximately map the current state to future discounted reward
                target_f[range(batch_size), actions] = target

                model.fit(old_state, target_f, epochs=1, verbose=0)
                
            #Updating weights into target model
            state = new_state
            weights = model.get_weights()
            target_weights = target_model.get_weights()
            for i in range(len(target_weights)):
                target_weights[i] = weights[i] * 0.1 + target_weights[i] * 0.9
            target_model.set_weights(target_weights)
        print_reward.append(totalReward)
        print(totalReward)
    return print_reward

In [None]:
start= time.time()
reward_list = DQN("MountainCar-v0",500,len(train_dataset)-1,0.99,0.95)
finish = time.time()
print(f'time saved by parallelizing: {finish-start}')

[0, 102.0, 20000]
[198, 101.0, 2.0]
[198, 102.0, 2.0]
[0, 101.0, 20000.0]
[192, 104.0, 32.0]
[192, 105.0, 32.0]
[192, 106.0, 32.0]
[0, 105.0, 20192.0]
[0, 104.0, 20192.0]
[192, 105.0, 32.0]
[0, 105.0, 20192.0]
[192, 105.0, 32.0]
[192, 106.0, 32.0]
[192, 105.0, 32.0]
[0, 107.0, 20576.0]
[192, 107.0, 32.0]
[0, 107.0, 20576.0]
[194, 106.0, 12.0]
[0, 103.0, 19994.0]
[196, 102.0, 2.0]
[196, 102.0, 2.0]
[0, 106.0, 20778.0]
[0, 108.0, 20778.0]
[0, 107.0, 20778.0]
[194, 107.0, 20.0]
[0, 110.0, 21360.0]
[190, 112.0, 80.0]
[0, 111.0, 21170.0]
[0, 110.0, 21170.0]
[0, 108.0, 21170.0]
[194, 109.0, 24.0]
[0, 112.0, 21752.0]
[0, 111.0, 21752.0]
[194, 112.0, 24.0]
[194, 111.0, 24.0]
[194, 113.0, 24.0]
[194, 112.0, 24.0]
[0, 111.0, 21558.0]
[194, 111.0, 24.0]
[194, 113.0, 24.0]
[0, 114.0, 22140.0]
[194, 114.0, 24.0]
[194, 115.0, 24.0]
[0, 113.0, 21946.0]
[197, 111.0, 79.0]
[0, 111.0, 21946.0]
[192, 114.0, 58.0]
[192, 115.0, 58.0]
[192, 115.0, 58.0]
[0, 113.0, 21754.0]
[0, 114.0, 21754.0]
[197, 110.0, 8

In [None]:
import matplotlib.pyplot as plt 
  
# x axis values 
x = range(0,len(reward_list),5) 
# corresponding y axis values 
y=[]

for i in range(int(np.floor(len(reward_list)/5))):
    y.append(reward_list[i*5])
        
# y = reward_list 
  
# plotting the points  
plt.plot(x, y) 
  
# naming the x axis 
plt.xlabel('x - axis') 
# naming the y axis 
plt.ylabel('y - axis') 
  
# giving a title to my graph 
plt.title('My first graph!') 
  
# function to show the plot 
plt.show()