#  Stock Market Dataset

In this kernel, we will train and evaluate the model we will use in our Dash web app to predict optimal decision to take for future investments in a company given the last few day's stock prices. We will develop a Deep Q-Learning Agent.

In [1]:
import numpy as np
import pandas as pd
import time

from agent import Agent

Using TensorFlow backend.


## Pre-Processing

Functions to load and process the data.

In [2]:
def get_data(path):
    """
    Get a company's stock price history, extract the Closing price and index it by day.
    
    :path: path to the company's data 
    
    :return: data
    """
    data = pd.read_csv(path)
    data['Date'] = pd.to_datetime(data['Date'])
    return data.set_index('Date')["Close"]
    
    
def process_data(df, memory_len, training):
        """
        Create individual series of 10 Closing prices.
        
        :df: raw data
        :memory_len: size of the series
        :training: boolean to determine whether to give training or testing data.
        
        :return: array of series of stock prices
        """
        LSTM_inputs = []
        for i in range(len(df) - memory_len):
            LSTM_inputs.append(df[i:(i+memory_len)])

        LSTM_inputs = [np.array(LSTM_input) for LSTM_input in LSTM_inputs]
        LSTM_inputs = np.array(LSTM_inputs)
        LSTM_inputs = np.reshape(LSTM_inputs, (LSTM_inputs.shape[0], 1, LSTM_inputs.shape[1]))
        # Select the correct data
        if (training):
            return LSTM_inputs[0:int(len(df)*0.5)]
        else:
            return LSTM_inputs[int(len(df)*0.5)+1:]

## Training



In [3]:
def train_agent(memory_len=100, epochs=50, model_name="dqn"):
    """
    Function responsible for training the model.
    
    :memory_len: number of previous stock prices use for analysis
    :epochs: number of episodes of learning
    :model_name: path of the saved model
    
    :return: agent
    """
    # Get the training data
    agent = Agent(process_data(get_data('Stocks/goog.us.txt'), memory_len, True), False, model_name, memory_len)
    
    l = len(agent.data) - 1
    total_rewards = []
    total_errors = []
    show_log_freq = 5
    print('\t'.join(map(str, ["Epochs", "Epsilon    ", "Profits     ", "Reward     ", "Error     ", "Elapsed time"])))

    start = time.time()
    # Repeat for a number of epochs
    for e in range(epochs):
        total_error = total_reward = 0
        # Reset the agent at the begining of each epoch
        agent.reset()
        # For each data point
        for t in range(l):
            # Get the last few stock prices
            state = agent.data[t]
            next_state = agent.data[t+1]
            # Make a decision
            decision = agent.decision(state)
            # Perform the action
            reward = agent.step(decision)
            # Save the observations
            agent.memory.append((state, next_state, decision, reward, agent.done))
            # Learn after a certain number of iterations
            if (e + 1) * (t + 1) % agent.batch_size == 0:
                total_error += agent.learn()

            total_reward += reward

        total_rewards.append(total_reward)
        total_errors.append(total_error)

        # Save the model
        if (e+1) % 10 == 0:
            agent.model.save(agent.model_name + str(e+1))

        # Log
        if (e+1) % show_log_freq == 0:
            # Average the last few (show_log_freq) rewards and errors
            log_reward = sum(total_rewards[((e + 1) - show_log_freq):]) / show_log_freq
            log_error = sum(total_errors[((e + 1) - show_log_freq):]) / show_log_freq
            elapsed_time = time.time()-start
            print('\t'.join(map(str, [e+1, "{0:02f}".format(agent.epsilon), "{0:02f}".format(agent.profit),
                        "{0:02f}".format(log_reward), "{0:02f}".format(log_error), "{0:02f}".format(elapsed_time)])))
            start = time.time()
            
    return agent

agent = train_agent()

Epochs	Epsilon    	Profits     	Reward     	Error     	Elapsed time
5	0.974600	2590.530000	18.000000	83240.059994	8.970923
10	0.935000	1418.550000	12.200000	1385.228470	11.469996
15	0.909600	4591.740000	32.400000	516.165615	7.420840
20	0.841400	1947.810000	9.400000	671.667101	20.870218
25	0.804600	3.590000	26.400000	454.090466	10.738450
30	0.776400	2749.430000	24.400000	465.102871	8.443142
35	0.671000	2853.330000	14.600000	222962.216214	30.407993
40	0.625600	1485.890000	2.600000	175.142849	13.537200
45	0.600200	1677.520000	14.000000	30.024094	7.945068
50	0.537800	2466.400000	0.600000	38.064943	18.606562


## Testing

In [4]:
def evaluate_agent():
    """
    Function responsible for testing the model.
    """
    # Get the training data
    agent.data = process_data(get_data('Stocks/goog.us.txt'), agent.input_space, False)
    
    l = len(agent.data) - 1
    # Set testing mode
    agent.reset()

    # For each data point
    for t in range(l):
        # Get the last few stock prices
        state = agent.data[t]
        # Make a decision
        decision = agent.decision(state)
        # Perform the action
        reward = agent.step(decision)
        
    print("--------------------------------")
    print(agent.profit)
    print("--------------------------------")
    
evaluate_agent()

--------------------------------
3488.199999999999
--------------------------------
