In [None]:
#CAPM model
#Weight => as in from asset allocation 
# rPortfolio = summationi(weighti (betai*rMarket)+ alphai)
#Technical indicators
# n is window size (20)
# 1. Momentum -> (price[t]/price[t-n]) -1
# 2. Simple moving average(SMA) -> (price[t]/price[t-n:t].mean())-1
# 3. Bollinger band -> (price[t] - SMA[t]/2*std[t])-1
# +2 std => value >1.0
# -2 std => value <1.0

# Range of these technical indicators
# SMA => -0.5 to +0.5
# MOMENTUM => -0.5 to +0.5
# BB -> -1.0 to +1.0
# PE ratio -> 1 to 300 (Fundamental)

# These factors need to be normalized before using the combination of these indicators
# normed = (values - mean)/values.std()

# Stock split causes price changes. Data anomoly. So we use Adjusted price.

# one should use survival bias free data

In [None]:
# ML dataset
# X- axis => price momentum, Bolinger value, current price , PE ratio
# Y- axis => future price, future return

In [None]:
import pandas as pd
import numpy as np
import math
import random
from random import sample 
import time

# import envs import TradingEnv


import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.optimizers import Adam

In [None]:
def test_run(symbols,date):
    
    dates= pd.date_range(date[0],date[1])
    
    #create empty dataframe
    df1 = pd.DataFrame(index=dates)

    for symbol in symbols:
        df_temp=pd.read_csv("data/{}.csv".format(symbol),index_col="Date",
                        parse_dates=True,usecols=['Date','Adj Close'],
                        na_values=['nan'])
        #rename to prevent clash
        df_temp=df_temp.rename(columns={'Adj Close':symbol})
        df1=df1.join(df_temp,how="inner")
    return df1

In [None]:
def compute_daily_returns(df):
    daily_returns = df.copy()
    daily_returns[1:] = (df[1:]/df[:-1].values) -1
    daily_returns.iloc[0,:]=0
    return daily_returns
def feature(df,window):
    momentum = df.copy()
    for i in range(len(df)):
        if i < window:
            momentum.iloc[i]=0
        else:
            momentum.iloc[i] = (df.iloc[i]/df.iloc[i-window]) -1
    momentum = (momentum - momentum.mean())/momentum.std()
    
    SMA = df.copy()
    for i in range(len(df)):
        if i < window:
            SMA.iloc[i]=0
        else:
            SMA.iloc[i] = (df.iloc[i]/df.iloc[i-window:i].mean()) -1
    
    SMA_std = df.copy()
    for i in range(len(df)):
        if i < window:
            SMA_std.iloc[i]=0
        else:
            SMA_std.iloc[i] = (df.iloc[i]/df.iloc[i-window:i].std()) -1
    
    BBU = SMA + SMA_std*2
    BBL = SMA - SMA_std*2
    BBU = (BBU - BBU.mean())/BBU.std()
    BBL = (BBL - BBL.mean())/BBL.std()
    
    #Calculating adjusted_close / SMA
    ad_SMA = df.copy()
    for i in range(len(df)):
        if i < window:
            ad_SMA.iloc[i]=0
        else:
            ad_SMA.iloc[i] = (df.iloc[i]/SMA.iloc[i]) -1
    return ad_SMA, momentum, BBU, BBL

In [None]:
symbols=['ITC.NS']
date=["2017-4-26","2018-10-9"]
windows = 20
df = test_run(symbols,date)
df = df.fillna(method='ffill')
df = df.fillna(method='bfill')
# print(df)

df['ad_SMA'], df['momentum'], df['BBU'], df['BBL'] = feature(df,windows)
df = df.iloc[windows:]
print(df)
print(df.shape)

In [None]:
%run envs.ipynb
length = len(df)
train_dataset = df[:math.floor((length*0.8))]
test_dataset = df[math.ceil((length*0.8)):]
# env = TradingEnv(train_dataset)
train_dataset.iloc[0,0]

In [None]:
def create_model(state_shape,out_shape):
    
    model = tf.keras.Sequential([
        tf.keras.layers.Dense(100, activation='relu', input_shape=state_shape),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Dense(100, activation='relu'),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Dropout(0.25),
        tf.keras.layers.Dense(out_shape, activation=None)
    ])
    model.compile(
        optimizer=tf.keras.optimizers.Adam(0.005),
        loss="mean_squared_error")
    return model

In [None]:
def DQN(env_name,count_episode,count_per_episode,discount_rate,exploration_decay):
    #Generate Environment
    # action 0 Hold 1 Buy/sell depending upon holding
    state_shape = (5,)
    action_space = [0,1]
    action_shape = len(action_space)
    
    #Create initial and target Model
    model= create_model(state_shape,action_shape)
    target_model = create_model(state_shape,action_shape)
    
    #Initialize lists
    print_reward = []
    memoization_list = []
    
    #setting up exploration rates
    exploration_rate = 1
    min_exploration_rate = 0.01
    
    #Running for every episode
    for episode in range(count_episode):
        env = TradingEnv(train_dataset)
        state = [df.iloc[0]['ad_SMA'],df.iloc[0]['momentum'],df.iloc[0]['BBU'],0,0]
        state = np.array(state).reshape(1,state_shape[0])
        holding = 0
        
        #Dacaying the exploration rate after every episode
        exploration_rate =  exploration_rate * exploration_decay
        exploration_rate = max(exploration_rate,min_exploration_rate)
        
        #Total Reward will be the learning curve
        totalReward = 0
        
        for step in range(count_per_episode):

            #explore or exploit
            tmp = random.uniform(0,1)
            if tmp<exploration_rate:
                action = random.sample(action_space, 1)
#                 print("random",action)
            else:
                action = np.argmax(model.predict(state)[0])
#                 print("predicted",action)
            
            #Apply action on environment
            new_state,obs, reward,done,holding = env._step(action,holding)
#             print(holding)
            print(obs)

            new_state = np.array(new_state).reshape(1,state_shape[0])
            
            #store state action information
#             print(reward)
            totalReward +=reward
            memoization_list.append([state,action,reward,new_state,done])
            
            #Fit into model
            if(len(memoization_list)>20):
                samples = sample(memoization_list,20)
    #             eachSample = memoization_list[step]
                for eachSample in samples:
                    old_state, action, reward, new_state, done = eachSample
                    target = target_model.predict(old_state)
                    if done:
                        target[0][action] = reward
                    else:
                        expected_reward = max(target_model.predict(new_state)[0])
                        target[0][action] = reward + expected_reward * discount_rate
                    model.fit(old_state, target, epochs=1, verbose=0)
            
            #Updating weights into target model
            state = new_state
            weights = model.get_weights()
            target_weights = target_model.get_weights()
            for i in range(len(target_weights)):
                target_weights[i] = weights[i] * 0.1 + target_weights[i] * 0.9
            target_model.set_weights(target_weights)
            if done:
                break
        print_reward.append(totalReward)
        print(totalReward)
    return print_reward

In [None]:
DQN("MountainCar-v0",100,len(train_dataset)-1,0.99,0.95)