In [1]:
from data.fetch_data import fetch_data

import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam
from IPython.core.debugger import set_trace

import math
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import random
from collections import deque

## Load Data

In [105]:
tickers = ['AAPL', 'KO', 'IBM']
start_date = "2009-12-21"
end_date = "2017-12-31"

data = fetch_data(tickers, start_date, end_date)

[*********************100%***********************]  3 of 3 completed


In [106]:
data

Ticker,AAPL,IBM,KO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-12-21 00:00:00+00:00,6.304612,74.284225,18.337631
2009-12-28 00:00:00+00:00,6.355581,74.472000,18.197166
2010-01-04 00:00:00+00:00,6.393282,74.443504,17.606556
2010-01-11 00:00:00+00:00,6.210813,74.972626,17.970490
2010-01-18 00:00:00+00:00,5.964107,71.399780,17.316038
...,...,...,...
2017-11-27 00:00:00+00:00,40.283970,107.885979,37.115158
2017-12-04 00:00:00+00:00,39.888309,107.920822,36.882210
2017-12-11 00:00:00+00:00,40.971649,106.310455,37.598522
2017-12-18 00:00:00+00:00,41.216587,106.310455,37.110130


## Build Agent

In [2]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        #State size depends and is equal to the the window size, n previous days
        self.state_size = state_size # normalized previous days, 
        self.action_size = 27 # hold, buy, sell for three different stocks
        self.memory = deque(maxlen=1000)
        self.inventory = {'AAPL': [], 'IBM': [], 'KO': []}
        self.model_name = model_name
        self.is_eval = is_eval

        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        #self.epsilon_decay = 0.9
        
        #self.model = self._model()

        self.model = load_model(model_name) if is_eval else self._model()

    #Deep Q Learning model- returns the q-value when given state as input 
    def _model(self):
        model = Sequential()
        #Input Layer
        model.add(Dense(units=64, input_dim=self.state_size, activation="relu"))
        #Hidden Layers
        model.add(Dense(units=32, activation="relu"))
        model.add(Dense(units=8, activation="relu"))
        #Output Layer 
        model.add(Dense(self.action_size, activation="linear"))
        model.compile(loss="mse", optimizer=Adam(learning_rate=0.001))
        return model
    
    #Return the action on the value function
    #With probability (1-$\epsilon$) choose the action which has the highest Q-value.
    #With probability ($\epsilon$) choose any action at random.
    #Intitially high epsilon-more random, later less
    #The trained agents were evaluated by different initial random condition
    #and an e-greedy policy with epsilon 0.05. This procedure is adopted to minimize the possibility of overfitting during evaluation.
 
    def act(self, state): 
        #If it is test and self.epsilon is still very high, once the epsilon become low, there are no random
        #actions suggested.
        if not self.is_eval and random.random() <= self.epsilon:
            return random.randrange(self.action_size)        
        options = self.model.predict(state)
        #set_trace()
        #action is based on the action that has the highest value from the q-value function.
        return np.argmax(options[0])

    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])
        
        # the memory during the training phase. 
        for state, action, reward, next_state, done in mini_batch:
            target = reward # reward or Q at time t    
            #update the Q table based on Q table equation
            #set_trace()
            if not done:
                #set_trace()
                #max of the array of the predicted. 
                target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])     
                
            # Q-value of the state currently from the table    
            target_f = self.model.predict(state)
            # Update the output Q table for the given action in the table     
            target_f[0][action] = target
            #train and fit the model where state is X and target_f is Y, where the target is updated. 
            self.model.fit(state, target_f, epochs=1, verbose=0)
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
    
    def map_real_to_pos_state(self, real_number):
        # Convert the real number to base-3 (get three digits)
        d1 = real_number // 9   # Corresponds to the first die
        d2 = (real_number % 9) // 3   # Corresponds to the second die
        d3 = real_number % 3   # Corresponds to the third die

        # Map base-3 digits (0, 1, 2) back to dice values (1, 2, 3)
        return np.array([d1, d2, d3])
    
    def labeled_actions_to_action_state(self, labeled_actions):
        sorted_list = sorted(labeled_actions, key=lambda x: x[1])
        legal_action_list = [v[0] for v in sorted_list]
        legal_action = np.array(legal_action_list)
        return legal_action
    
    def map_pos_state_to_real(self, pos_state):
        real = ((pos_state[0]) * 9) + ((pos_state[1]) * 3) + (pos_state[2])
        return real
    
    def calc_port_value(self, data, t):
        aapl_pos = len(self.inventory['AAPL'])
        if self.inventory['AAPL']:
            if self.inventory['AAPL'][0] < 0:
                aapl_pos = -1 * aapl_pos

        ibm_pos = len(self.inventory['IBM'])
        if self.inventory['IBM']:
            if self.inventory['IBM'][0] < 0:
                ibm_pos = -1 * ibm_pos

        ko_pos = len(self.inventory['KO'])
        if self.inventory['KO']:
            if self.inventory['KO'][0] < 0:
                ko_pos = -1 * ko_pos

        port_value = (aapl_pos * data['AAPL'].iloc[t]) + (ibm_pos * data['IBM'].iloc[t]) + (ko_pos * data['KO'].iloc[t])
        return port_value

## Helper Functions

In [3]:
# prints formatted price
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# # returns the vector containing stock data from a fixed file 
# def getStockData(key):
#     vec = []
#     lines = open("data/" + key + ".csv", "r").read().splitlines()

#     for line in lines[1:]:
#         vec.append(float(line.split(",")[4])) #Only Close column

#     return vec

# returns the sigmoid
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# returns an an n-day state representation ending at time t

def getState(data, t, n):    
    d = t - n + 1
    block = data.iloc[d:t + 1, :] if d >= 0 else -d * [data.iloc[0, :]] + data.iloc[0:t + 1, :] # pad with t0
    print(block)
    #block is which is the for [1283.27002, 1283.27002]
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block.iloc[i + 1, :] - block.iloc[i, :]))
    return np.array(res)

# Plots the behavior of the output
def plot_behavior(data_input, states_buy, states_sell, profit):
    fig = plt.figure(figsize = (15,5))
    plt.plot(data_input, color='r', lw=2.)
    plt.plot(data_input, '^', markersize=10, color='m', label = 'Buying signal', markevery = states_buy)
    plt.plot(data_input, 'v', markersize=10, color='k', label = 'Selling signal', markevery = states_sell)
    plt.title('Total gains: %f'%(profit))
    plt.legend()
    #plt.savefig('output/'+name+'.png')
    plt.show()

## Train Model

In [7]:
window_size = 1
agent = Agent(window_size * len(data.columns))
#In this step we feed the closing value of the stock price 
n_steps = len(data) - 1
#
batch_size = 32
#An episode represents a complete pass over the data.
episode_count = 12

for e in range(episode_count + 1):
    print("Running episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 1, window_size + 1) # Applies sigmoid function to last week's return
    #set_trace()
    total_profit = 0 
    cash = 100
    states_sell = {'AAPL': [], 'IBM': [], 'KO': []}
    states_buy = {'AAPL': [], 'IBM': [], 'KO': []}
    agent.memory = deque(maxlen=1000)  
    
    for t in range(1, n_steps):
        action = agent.act(state)
        action_arr = agent.map_real_to_pos_state(action)
        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0

        labeled_actions = []
        for i in zip(action_arr, tickers):
            labeled_actions.append(list(i))
        labeled_actions.sort(key=lambda x: -x[0])

        for i in range(3):
            stock_price = data[labeled_actions[i][1]].iloc[t]

            if labeled_actions[i][0] == 1: # buy
                if cash - stock_price > 0:
                    agent.inventory[labeled_actions[i][1]].append(stock_price)
                    states_buy[labeled_actions[i][1]].append(t)
                    cash -= stock_price
                    #print("Buy: " + formatPrice(data[t]))
                else:
                    labeled_actions[i][0] = 0

            elif labeled_actions[i][0] == 2:
                if len(agent.inventory[labeled_actions[i][1]]) > 0: # sell
                    bought_price = agent.inventory[labeled_actions[i][1]].pop(0)      
                    reward = max(stock_price - bought_price, 0)
                    total_profit += stock_price - bought_price
                    states_sell[labeled_actions[i][1]].append(t)
                    cash += stock_price
                    #print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
                else:
                    labeled_actions[i][0] = 0
        
        action = agent.labeled_actions_to_action_state(labeled_actions)
        action = agent.map_pos_state_to_real(action)

        done = True if t == n_steps - 1 else False
        #appends the details of the state action etc in the memory, which is used further by the exeReply function
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
            break

        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
            agent.memory = deque(maxlen=1000)  
            

    if e % 2 == 0:
        agent.model.save("model_ep" + str(e))

Running episode 0/10
Ticker                         AAPL        IBM         KO
Date                                                     
2009-12-21 00:00:00+00:00  6.304612  74.284225  18.337631
2009-12-28 00:00:00+00:00  6.355581  74.472000  18.197166
Ticker                         AAPL        IBM         KO
Date                                                     
2009-12-28 00:00:00+00:00  6.355581  74.472000  18.197166
2010-01-04 00:00:00+00:00  6.393282  74.443504  17.606556
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-04 00:00:00+00:00  6.393282  74.443504  17.606556
2010-01-11 00:00:00+00:00  6.210813  74.972626  17.970490
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-11 00:00:00+00:00  6.210813  74.972626  17.970490
2010-01-18 00:00:00+00:00  5.964107  71.399780  17.316038
Ticker                         AAPL       IBM      

## Train Model with Short-Selling Allowed

In [109]:
window_size = 1
agent = Agent(window_size * len(data.columns))
#In this step we feed the closing value of the stock price 
n_steps = len(data) - 1
#
batch_size = 32
#An episode represents a complete pass over the data.
episode_count = 10

for e in range(episode_count + 1):
    print("Running episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 1, window_size + 1) # Applies sigmoid function to last week's return
    #set_trace()
    total_profit = 0 
    cash = 100
    states_sell = {'AAPL': [], 'IBM': [], 'KO': []}
    states_buy = {'AAPL': [], 'IBM': [], 'KO': []}
    agent.memory = deque(maxlen=1000)  
    
    for t in range(1, n_steps):
        action = agent.act(state)
        action_arr = agent.map_real_to_pos_state(action)

        next_state = getState(data, t + 1, window_size + 1)
        reward = 0

        labeled_actions = []
        for i in zip(action_arr, tickers):
            labeled_actions.append(list(i))
        labeled_actions.sort(key=lambda x: -x[0])

        for i in range(3):
            ticker = labeled_actions[i][1]
            stock_price = data[ticker].iloc[t]
            
            if agent.inventory[ticker]:
                position = 'long' if agent.inventory[ticker][0] > 0 else 'short'
            else:
                position = None

            if labeled_actions[i][0] == 1: # buy
                if cash - stock_price > 0:
                    if position == 'short':
                        sold_price = -1 * agent.inventory[ticker].pop(0)
                        reward = max(sold_price - stock_price, 0)
                        total_profit += sold_price - stock_price
                        states_buy[ticker].append(t)
                        cash -= stock_price
                    
                    else:
                        agent.inventory[ticker].append(stock_price)
                        states_buy[ticker].append(t)
                        cash -= stock_price
                        #print("Buy: " + formatPrice(data[t]))
                else:
                    labeled_actions[i][0] = 0

            elif labeled_actions[i][0] == 2:
                if position == 'long': # sell
                    bought_price = agent.inventory[ticker].pop(0)      
                    reward = max(stock_price - bought_price, 0)
                    total_profit += stock_price - bought_price
                    states_sell[ticker].append(t)
                    cash += stock_price
                    #print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
                else:
                    agent.inventory[ticker].append(-1 * stock_price)
                    states_sell[ticker].append(t)
                    cash += stock_price
        
        action = agent.labeled_actions_to_action_state(labeled_actions)
        action = agent.map_pos_state_to_real(action)

        done = True if t == n_steps - 1 else False
        #appends the details of the state action etc in the memory, which is used further by the exeReply function
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print("--------------------------------")
            print("Total Profit: " + formatPrice(total_profit))
            print("--------------------------------")
            break

        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)
            agent.memory = deque(maxlen=1000)  
            

    if e % 2 == 0:
        agent.model.save("model_ep" + str(e))

Running episode 0/10
Ticker                         AAPL        IBM         KO
Date                                                     
2009-12-21 00:00:00+00:00  6.304612  74.284225  18.337631
2009-12-28 00:00:00+00:00  6.355581  74.472000  18.197166
Ticker                         AAPL        IBM         KO
Date                                                     
2009-12-28 00:00:00+00:00  6.355581  74.472000  18.197166
2010-01-04 00:00:00+00:00  6.393282  74.443504  17.606556
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-04 00:00:00+00:00  6.393282  74.443504  17.606556
2010-01-11 00:00:00+00:00  6.210813  74.972626  17.970490
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-11 00:00:00+00:00  6.210813  74.972626  17.970490
2010-01-18 00:00:00+00:00  5.964107  71.399780  17.316038
Ticker                         AAPL       IBM      

## Test

In [110]:
tickers = ['AAPL', 'KO', 'IBM']
start_date = "2017-12-28"
end_date = "2018-12-31"

test_data = fetch_data(tickers, start_date, end_date)
test_data.tail(5)

[*********************100%***********************]  3 of 3 completed


Ticker,AAPL,IBM,KO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2018-11-26 00:00:00+00:00,42.688766,90.549332,42.111389
2018-12-03 00:00:00+00:00,40.276798,86.957092,41.341179
2018-12-10 00:00:00+00:00,39.557259,87.365128,41.551708
2018-12-17 00:00:00+00:00,36.031342,80.836418,40.061108
2018-12-24 00:00:00+00:00,37.346096,82.359306,39.749508


In [111]:
# Initialize agent for test
n_test = len(test_data) - 1
state = getState(test_data, 1, window_size + 1)
total_profit = 0
cash = 100
is_eval = True
done = False
states_sell_test = {'AAPL': [], 'IBM': [], 'KO': []}
states_buy_test = {'AAPL': [], 'IBM': [], 'KO': []}
tracker = {}

#Get the trained model
model_name = "model_ep"+str(episode_count)
agent = Agent(window_size, is_eval, model_name)

Ticker                          AAPL         IBM         KO
Date                                                       
2017-12-25 00:00:00+00:00  39.855324  106.951828  37.346188
2018-01-01 00:00:00+00:00  41.214226  113.274696  37.500851


In [27]:
for t in range(1, n_test):
    action = agent.act(state)
    action_arr = agent.map_real_to_pos_state(action)
    #print(action)
    #set_trace()
    next_state = getState(test_data, t + 1, window_size + 1)
    reward = 0

    labeled_actions = []
    for i in zip(action_arr, tickers):
        labeled_actions.append(list(i))
    labeled_actions.sort(key=lambda x: -x[0])

    for i in range(3):
        ticker = labeled_actions[i][1]
        stock_price = test_data[ticker].iloc[t]
        if agent.inventory[ticker]:
            position = 'long' if agent.inventory[ticker][0] > 0 else 'short'
        else:
            position = None

        if labeled_actions[i][0] == 1: # buy
            if cash - stock_price > 0:
                if position == 'short':
                agent.inventory[labeled_actions[i][1]].append(stock_price)
                states_buy_test[labeled_actions[i][1]].append(t)
                cash -= stock_price
                #print("Buy: " + formatPrice(data[t]))
            else:
                labeled_actions[i][0] = 0

        elif labeled_actions[i][0] == 2:
            if len(agent.inventory[labeled_actions[i][1]]) > 0: # sell
                bought_price = agent.inventory[labeled_actions[i][1]].pop(0)
                total_profit += stock_price - bought_price
                states_sell_test[labeled_actions[i][1]].append(t)
                cash += stock_price
                #print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
            else:
                labeled_actions[i][0] = 0
                
    action = agent.labeled_actions_to_action_state(labeled_actions)
    action = agent.map_pos_state_to_real(action)
    
    done = True if t == n_steps - 1 else False
    agent.memory.append((state, action, reward, next_state, done))

    aapl_pos = len(agent.inventory['AAPL'])
    ibm_pos = len(agent.inventory['IBM'])
    ko_pos = len(agent.inventory['KO'])
    port_value = (aapl_pos * test_data['AAPL'].iloc[t]) + (ibm_pos * test_data['IBM'].iloc[t]) + (ko_pos * test_data['KO'].iloc[t])

    net_worth = cash + port_value
    if net_worth < 0:
        print('Dead!')
        break

    tracker[t] = (cash, port_value, net_worth, action)

    state = next_state

    if done:
        print("------------------------------------------")
        print("Total Profit: " + formatPrice(total_profit))
        print("------------------------------------------")

Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-01 00:00:00+00:00  41.214226  113.274696  37.500851
2018-01-08 00:00:00+00:00  41.706440  113.727806  37.565979
Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-08 00:00:00+00:00  41.706440  113.727806  37.565979
2018-01-15 00:00:00+00:00  42.029099  113.191002  38.388107
Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-15 00:00:00+00:00  42.029099  113.191002  38.388107
2018-01-22 00:00:00+00:00  40.392303  116.655708  39.503281
Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-22 00:00:00+00:00  40.392303  116.655708  39.503281
2018-01-29 00:00:00+00:00  37.799339  110.862686  38.038086
Ticker                          AAPL    

In [28]:
tracker

{1: (58.78577423095703, 41.21422576904297, 100.0, -4),
 2: (17.079334259033203, 83.41287994384766, 100.49221420288086, -4),
 3: (17.079334259033203, 84.05819702148438, 101.13753128051758, -13),
 4: (17.079334259033203, 80.78460693359375, 97.86394119262695, -13),
 5: (17.079334259033203, 75.59867858886719, 92.67801284790039, -13),
 6: (17.079334259033203, 73.67221069335938, 90.75154495239258, -13),
 7: (17.079334259033203, 81.54906463623047, 98.62839889526367, -13),
 8: (17.079334259033203, 83.00100708007812, 100.08034133911133, -13),
 9: (17.079334259033203, 83.33678436279297, 100.41611862182617, -13),
 10: (17.079334259033203, 85.11976623535156, 102.19910049438477, -13),
 11: (17.079334259033203, 84.19281005859375, 101.27214431762695, -13),
 12: (17.079334259033203, 78.00676727294922, 95.08610153198242, -13),
 13: (17.079334259033203, 79.3498764038086, 96.4292106628418, -13),
 14: (17.079334259033203, 79.63365936279297, 96.71299362182617, -13),
 15: (17.079334259033203, 82.63683319091

In [15]:
getState(test_data, 1, window_size + 1)

Ticker                          AAPL         IBM         KO
Date                                                       
2017-12-25 00:00:00+00:00  39.855324  106.951828  37.346188
2018-01-01 00:00:00+00:00  41.214226  113.274696  37.500851


array([[0.79558118, 0.99820843, 0.53858888]])

In [19]:
getState(test_data, 1, window_size + 1)

Ticker                          AAPL         IBM         KO
Date                                                       
2017-12-25 00:00:00+00:00  39.855324  106.951828  37.346188
2018-01-01 00:00:00+00:00  41.214226  113.274696  37.500851


array([[0.79558118, 0.99820843, 0.53858888]])

In [38]:
action = agent.act(state)
print(action)
action_arr = agent.map_real_to_pos_state(action)
print(action_arr)
agent.map_pos_state_to_real(np.array([3, 3, 3]))

15
[1 2 0]


26

In [39]:
agent.map_real_to_pos_state(26)

array([2, 2, 2])

## Test Model With Short-Selling Allowed

In [113]:
for t in range(1, n_test):
    action = agent.act(state)
    action_arr = agent.map_real_to_pos_state(action)
    #print(action)
    #set_trace()
    next_state = getState(test_data, t + 1, window_size + 1)
    reward = 0

    labeled_actions = []
    for i in zip(action_arr, tickers):
        labeled_actions.append(list(i))
    labeled_actions.sort(key=lambda x: -x[0])

    for i in range(3):
        ticker = labeled_actions[i][1]
        stock_price = test_data[ticker].iloc[t]
        if agent.inventory[ticker]:
            position = 'long' if agent.inventory[ticker][0] > 0 else 'short'
        else:
            position = None

        if labeled_actions[i][0] == 1: # buy
            if cash - stock_price > 0:
                if position == 'short':
                    sold_price = -1 * agent.inventory[ticker].pop(0)
                    total_profit += sold_price - stock_price
                    states_buy_test[ticker].append(t)
                    cash -= stock_price
                
                else:
                    agent.inventory[ticker].append(stock_price)
                    states_buy_test[ticker].append(t)
                    cash -= stock_price
                    #print("Buy: " + formatPrice(data[t]))
            else:
                labeled_actions[i][0] = 0

        elif labeled_actions[i][0] == 2:
            if position == 'long': # sell
                bought_price = agent.inventory[ticker].pop(0)
                total_profit += stock_price - bought_price
                states_sell_test[ticker].append(t)
                cash += stock_price
                #print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
            else:
                agent.inventory[ticker].append(-1 * stock_price)
                states_buy_test[ticker].append(t)
                cash += stock_price
                
                
    action = agent.labeled_actions_to_action_state(labeled_actions)
    action = agent.map_pos_state_to_real(action)
    
    done = True if t == n_steps - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    
    port_value = agent.calc_port_value(test_data, t)

    net_worth = cash + port_value
    if net_worth < 0:
        print('Dead!')
        break

    tracker[t] = (cash, port_value, net_worth, action)

    state = next_state

    if done:
        print("------------------------------------------")
        print("Total Profit: " + formatPrice(total_profit))
        print("------------------------------------------")

Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-01 00:00:00+00:00  41.214226  113.274696  37.500851
2018-01-08 00:00:00+00:00  41.706440  113.727806  37.565979
Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-08 00:00:00+00:00  41.706440  113.727806  37.565979
2018-01-15 00:00:00+00:00  42.029099  113.191002  38.388107
Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-15 00:00:00+00:00  42.029099  113.191002  38.388107
2018-01-22 00:00:00+00:00  40.392303  116.655708  39.503281
Ticker                          AAPL         IBM         KO
Date                                                       
2018-01-22 00:00:00+00:00  40.392303  116.655708  39.503281
2018-01-29 00:00:00+00:00  37.799339  110.862686  38.038086
Ticker                          AAPL    

In [114]:
tracker

{1: (62.499149322509766, 37.500850677490234, 100.0, 1),
 2: (24.933170318603516, 75.1319580078125, 100.06512832641602, 1),
 3: (24.933170318603516, 76.77621459960938, 101.70938491821289, 0),
 4: (24.933170318603516, 79.00656127929688, 103.93973159790039, 0),
 5: (24.933170318603516, 76.076171875, 101.00934219360352, 0),
 6: (24.933170318603516, 70.21540069580078, 95.1485710144043, 0),
 7: (24.933170318603516, 73.22718048095703, 98.16035079956055, 0),
 8: (24.933170318603516, 71.69686889648438, 96.63003921508789, 0),
 9: (24.933170318603516, 71.17591094970703, 96.10908126831055, 0),
 10: (24.933170318603516, 72.96672058105469, 97.8998908996582, 0),
 11: (24.933170318603516, 70.75262451171875, 95.68579483032227, 0),
 12: (24.933170318603516, 69.52133178710938, 94.45450210571289, 0),
 13: (24.933170318603516, 71.32794189453125, 96.26111221313477, 0),
 14: (24.933170318603516, 72.13267517089844, 97.06584548950195, 0),
 15: (24.933170318603516, 73.10167694091797, 98.03484725952148, 0),
 16:

## Test Model on Full

In [4]:
tickers = ['AAPL', 'KO', 'IBM']
start_date = "2009-12-28"
end_date = "2018-12-31"

test_data = fetch_data(tickers, start_date, end_date)
test_data

[*********************100%***********************]  3 of 3 completed


Ticker,AAPL,IBM,KO
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2009-12-28 00:00:00+00:00,6.355581,74.471954,18.197166
2010-01-04 00:00:00+00:00,6.393281,74.443535,17.606552
2010-01-11 00:00:00+00:00,6.210814,74.972618,17.970493
2010-01-18 00:00:00+00:00,5.964107,71.399780,17.316042
2010-01-25 00:00:00+00:00,5.792500,69.630417,17.319229
...,...,...,...
2018-11-26 00:00:00+00:00,42.688766,90.549339,42.111393
2018-12-03 00:00:00+00:00,40.276802,86.957092,41.341164
2018-12-10 00:00:00+00:00,39.557259,87.365135,41.551708
2018-12-17 00:00:00+00:00,36.031349,80.836411,40.061108


In [5]:
# Initialize agent for test
window_size = 1
n_test = len(test_data) - 1
state = getState(test_data, 1, window_size + 1)
total_profit = 0
cash = 100
is_eval = True
done = False
states_sell_test = {'AAPL': [], 'IBM': [], 'KO': []}
states_buy_test = {'AAPL': [], 'IBM': [], 'KO': []}
tracker = {}

#Get the trained model
model_name = "model_ep"+str(10)
agent = Agent(window_size, is_eval, model_name)

Ticker                         AAPL        IBM         KO
Date                                                     
2009-12-28 00:00:00+00:00  6.355581  74.471954  18.197166
2010-01-04 00:00:00+00:00  6.393281  74.443535  17.606552


In [6]:
for t in range(1, n_test):
    action = agent.act(state)
    action_arr = agent.map_real_to_pos_state(action)
    #set_trace()
    next_state = getState(test_data, t + 1, window_size + 1)
    reward = 0

    labeled_actions = []
    for i in zip(action_arr, tickers):
        labeled_actions.append(list(i))
    labeled_actions.sort(key=lambda x: -x[0])

    for i in range(3):
        ticker = labeled_actions[i][1]
        stock_price = test_data[ticker].iloc[t]
        if agent.inventory[ticker]:
            position = 'long' if agent.inventory[ticker][0] > 0 else 'short'
        else:
            position = None

        if labeled_actions[i][0] == 1: # buy
            if cash - stock_price > 0:
                if position == 'short':
                    sold_price = -1 * agent.inventory[ticker].pop(0)
                    total_profit += sold_price - stock_price
                    states_buy_test[ticker].append(t)
                    cash -= stock_price
                
                else:
                    agent.inventory[ticker].append(stock_price)
                    states_buy_test[ticker].append(t)
                    cash -= stock_price
                    #print("Buy: " + formatPrice(data[t]))
            else:
                labeled_actions[i][0] = 0

        elif labeled_actions[i][0] == 2:
            if position == 'long': # sell
                bought_price = agent.inventory[ticker].pop(0)
                total_profit += stock_price - bought_price
                states_sell_test[ticker].append(t)
                cash += stock_price
                #print("Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))
            else:
                agent.inventory[ticker].append(-1 * stock_price)
                states_buy_test[ticker].append(t)
                cash += stock_price
                
                
    action = agent.labeled_actions_to_action_state(labeled_actions)
    action = agent.map_pos_state_to_real(action)
    
    done = True if t == n_test - 1 else False
    agent.memory.append((state, action, reward, next_state, done))
    
    port_value = agent.calc_port_value(test_data, t)

    net_worth = cash + port_value
    if net_worth < 0:
        print('Dead!')
        break

    tracker[t] = (cash, port_value, net_worth, action)

    state = next_state

    if done:
        print("------------------------------------------")
        print("Total Profit: " + formatPrice(total_profit))
        print("------------------------------------------")

Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-04 00:00:00+00:00  6.393281  74.443535  17.606552
2010-01-11 00:00:00+00:00  6.210814  74.972618  17.970493
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-11 00:00:00+00:00  6.210814  74.972618  17.970493
2010-01-18 00:00:00+00:00  5.964107  71.399780  17.316042
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-18 00:00:00+00:00  5.964107  71.399780  17.316042
2010-01-25 00:00:00+00:00  5.792500  69.630417  17.319229
Ticker                         AAPL        IBM         KO
Date                                                     
2010-01-25 00:00:00+00:00  5.792500  69.630417  17.319229
2010-02-01 00:00:00+00:00  5.895041  70.273308  16.948906
Ticker                         AAPL        IBM         KO
Date          

In [7]:
tracker

{1: (7.949913024902344, 92.05008697509766, 100.0, 4),
 2: (7.949913024902344, 92.94311141967773, 100.89302444458008, 0),
 3: (7.949913024902344, 88.71582221984863, 96.66573524475098, 0),
 4: (7.949913024902344, 86.94964599609375, 94.8995590209961, 0),
 5: (7.949913024902344, 87.22221374511719, 95.17212677001953, 0),
 6: (7.949913024902344, 87.7794361114502, 95.72934913635254, 0),
 7: (7.949913024902344, 90.47343826293945, 98.4233512878418, 0),
 8: (7.949913024902344, 89.49857521057129, 97.44848823547363, 0),
 9: (7.949913024902344, 90.18209457397461, 98.13200759887695, 0),
 10: (7.949913024902344, 90.14542198181152, 98.09533500671387, 0),
 11: (7.949913024902344, 90.60396575927734, 98.55387878417969, 0),
 12: (7.949913024902344, 91.45760726928711, 99.40752029418945, 0),
 13: (7.949913024902344, 91.08962059020996, 99.0395336151123, 0),
 14: (7.949913024902344, 91.15252304077148, 99.10243606567383, 0),
 15: (7.949913024902344, 92.3434944152832, 100.29340744018555, 0),
 16: (7.94991302490

In [None]:
# Convert tracker dictionary to DataFrame
tracker_df = pd.DataFrame.from_dict(tracker, orient='index', columns=['Cash', 'Portfolio Value', 'Net Worth', 'Action'])
tracker_df

In [11]:
# convert tracker to weekly, monthly and yearly 

start_date = pd.to_datetime("2010-01-04")   
date_range = pd.date_range(start=start_date, periods=len(tracker_df), freq='5B')  # 'B' represents business days
tracker_df.index = date_range

# extract year
tracker_df['Year'] = tracker_df.index.year
tracker_df['Month'] = tracker_df.index.month

In [12]:
tracker_df

Unnamed: 0,Cash,Portfolio Value,Net Worth,Action,Year,Month
2010-01-04,7.949913,92.050087,100.000000,4,2010,1
2010-01-11,7.949913,92.943111,100.893024,0,2010,1
2010-01-18,7.949913,88.715822,96.665735,0,2010,1
2010-01-25,7.949913,86.949646,94.899559,0,2010,1
2010-02-01,7.949913,87.222214,95.172127,0,2010,2
...,...,...,...,...,...,...
2018-11-19,7.949913,126.348824,134.298737,0,2018,11
2018-11-26,7.949913,132.660732,140.610645,0,2018,11
2018-12-03,7.949913,128.298256,136.248169,0,2018,12
2018-12-10,7.949913,128.916843,136.866756,0,2018,12
