In [None]:
from utils import *
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from collections import deque
import random

: 

# Configure Modeling Parameters and Fetch Data

Enter a ticker and date range you would like to build the model on.  This model takes a a single ticker's data.  Also enter a training size for the proportion of the data you want to include in your training set vs. your test set.

In [3]:
# stock configs
ticker = ['TSLA']
start_date = '2015-04-01'
end_date = '2024-04-05'

# model configs
train_size = 0.8

n_future = 1   # Number of days we want to look into the future based on the past days.
n_past = 30  # Number of past days we want to use to predict the future.

In [4]:
# Data Fetching
data = fetch_stock_data(ticker, start_date, end_date)[ticker[0]]
data.reset_index(drop=False, inplace=True)
data['Date'] = pd.to_datetime(data['Date']).dt.tz_localize(None)

print(data.shape)
included_days = len(data)
data.head()

(2268, 8)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2015-04-01,12.58,12.82,12.403333,12.506,56919000,0.0,0.0
1,2015-04-02,12.682,12.882,12.666667,12.733333,75156000,0.0,0.0
2,2015-04-06,13.2,13.85,13.166667,13.54,186837000,0.0,0.0
3,2015-04-07,13.500667,13.670667,13.409333,13.55,65218500,0.0,0.0
4,2015-04-08,13.88,14.06,13.724667,13.844667,94546500,0.0,0.0


# Model Build
Here I build an actor-critic agent that signals actions in a trading enviroment, trained and tested on your given ticker

In [35]:
class Actor(tf.keras.Model):
    def __init__(self, name, input_size, output_size, size_layer):
        super(Actor, self).__init__()
        with tf.name_scope(name):
            self.dense1 = tf.keras.layers.Dense(size_layer, activation='relu')
            self.dense2 = tf.keras.layers.Dense(output_size)

    def call(self, x):
        x = self.dense1(x)
        logits = self.dense2(x)
        return logits

class Critic(tf.keras.Model):
    def __init__(self, name, input_size, output_size, size_layer):
        super(Critic, self).__init__()
        with tf.name_scope(name):
            self.dense1 = tf.keras.layers.Dense(size_layer, activation='relu')
            self.dense2 = tf.keras.layers.Dense(output_size, activation='relu')
            self.dense3 = tf.keras.layers.Dense(size_layer//2, activation='relu')
            self.dense4 = tf.keras.layers.Dense(1)

    def call(self, x, y):
        x = self.dense1(x)
        x = self.dense2(x) + y
        x = self.dense3(x)
        logits = self.dense4(x)
        return logits

In [41]:
class Agent:
    LEARNING_RATE = 0.001
    BATCH_SIZE = 32
    LAYER_SIZE = 256
    OUTPUT_SIZE = 3
    EPSILON = 0.5
    DECAY_RATE = 0.005
    MIN_EPSILON = 0.1
    GAMMA = 0.99
    MEMORIES = deque()
    MEMORY_SIZE = 300
    COPY = 1000
    T_COPY = 0

    def __init__(self, state_size, window_size, trend, skip):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        self.actor = Actor('actor-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
        self.actor_target = Actor('actor-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
        self.critic = Critic('critic-original', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
        self.critic_target = Critic('critic-target', self.state_size, self.OUTPUT_SIZE, self.LAYER_SIZE)
        self.actor_optimizer = tf.keras.optimizers.Adam(self.LEARNING_RATE)
        self.critic_optimizer = tf.keras.optimizers.Adam(self.LEARNING_RATE)

    def _assign(self, from_model, to_model):
        for from_layer, to_layer in zip(from_model.layers, to_model.layers):
            to_layer.set_weights(from_layer.get_weights())

    def _memorize(self, state, action, reward, new_state, dead):
        self.MEMORIES.append((state, action, reward, new_state, dead))
        if len(self.MEMORIES) > self.MEMORY_SIZE:
            self.MEMORIES.popleft()

    def _select_action(self, state):
        if np.random.rand() < self.EPSILON:
            action = np.random.randint(self.OUTPUT_SIZE)
        else:
            state_tensor = tf.convert_to_tensor([state], dtype=tf.float32)
            logits = self.actor(state_tensor)
            action = tf.argmax(logits[0]).numpy()
        return action

    def _construct_memories_and_train(self, replay):
        states = np.array([a[0] for a in replay])
        new_states = np.array([a[3] for a in replay])
        Q = self.actor(states)
        Q_target = self.actor_target(states)
        rewards = np.array([a[2] for a in replay]).reshape((-1, 1))
        rewards_target = self.critic_target(new_states, Q_target)
        for i in range(len(replay)):
            if not replay[0][-1]:
                rewards[i] += self.GAMMA * rewards_target[i]

        with tf.GradientTape() as tape:
            q_values = self.critic(states, Q)
            critic_loss = tf.reduce_mean(tf.square(rewards - q_values))
        critic_grad = tape.gradient(critic_loss, self.critic.trainable_variables)
        self.critic_optimizer.apply_gradients(zip(critic_grad, self.critic.trainable_variables))

        with tf.GradientTape() as tape:
            logits = self.actor(states)
            actor_loss = -tf.reduce_mean(self.critic(states, logits))
        actor_grad = tape.gradient(actor_loss, self.actor.trainable_variables)
        self.actor_optimizer.apply_gradients(zip(actor_grad, self.actor.trainable_variables))

        return critic_loss

    def get_state(self, t):
        window_size = self.window_size + 1
        d = t - window_size + 1
        block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
        res = []
        for i in range(window_size - 1):
            res.append(block[i + 1] - block[i])
        return np.array(res)

    def buy(self, initial_money):
        starting_money = initial_money
        states_sell = []
        states_buy = []
        inventory = []
        state = self.get_state(0)
        for t in range(0, len(self.trend) - 1, self.skip):
            action = self._select_action(state)
            next_state = self.get_state(t + 1)

            if action == 1 and initial_money >= self.trend[t]:
                inventory.append(self.trend[t])
                initial_money -= self.trend[t]
                states_buy.append(t)
                print('day %d: buy 1 unit at price %f, total balance %f'% (t, self.trend[t], initial_money))

            elif action == 2 and len(inventory):
                bought_price = inventory.pop(0)
                initial_money += self.trend[t]
                states_sell.append(t)
                try:
                    invest = ((self.trend[t] - bought_price) / bought_price) * 100
                except:
                    invest = 0
                print(
                    'day %d, sell 1 unit at price %f, investment %f %%, total balance %f,'
                    % (t, self.trend[t], invest, initial_money)
                )

            state = next_state
        invest = ((initial_money - starting_money) / starting_money) * 100
        total_gains = initial_money - starting_money
        return states_buy, states_sell, total_gains, invest

    def train(self, iterations, checkpoint, initial_money):
        for i in range(iterations):
            total_profit = 0
            inventory = []
            state = self.get_state(0)
            starting_money = initial_money
            for t in range(0, len(self.trend) - 1, self.skip):
                if (self.T_COPY + 1) % self.COPY == 0:
                    self._assign(self.actor, self.actor_target)
                    self._assign(self.critic, self.critic_target)

                action = self._select_action(state)
                next_state = self.get_state(t + 1)

                if action == 1 and starting_money >= self.trend[t]:
                    inventory.append(self.trend[t])
                    starting_money -= self.trend[t]

                elif action == 2 and len(inventory) > 0:
                    bought_price = inventory.pop(0)
                    total_profit += self.trend[t] - bought_price
                    starting_money += self.trend[t]

                invest = ((starting_money - initial_money) / initial_money)

                self._memorize(state, action, invest, next_state, starting_money < initial_money)
                state = next_state
                batch_size = min(len(self.MEMORIES), self.BATCH_SIZE)
                replay = random.sample(self.MEMORIES, batch_size)
                cost = self._construct_memories_and_train(replay)
                self.T_COPY += 1
                self.EPSILON = self.MIN_EPSILON + (1.0 - self.MIN_EPSILON) * np.exp(-self.DECAY_RATE * i)
            if (i+1) % checkpoint == 0:
                print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f'%(i + 1, total_profit, cost, starting_money))

In [None]:
close = data.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size, 
              window_size = window_size, 
              trend = close, 
              skip = skip)
agent.train(iterations = 10, checkpoint = 1, initial_money = initial_money)

In [None]:
states_buy, states_sell, total_gains, invest = agent.buy(initial_money = initial_money)

In [None]:
fig = plt.figure(figsize = (15,5))
plt.plot(close, color='r', lw=2.)
plt.plot(close, '^', markersize=10, color='m', label = 'buying signal', markevery = states_buy)
plt.plot(close, 'v', markersize=10, color='k', label = 'selling signal', markevery = states_sell)
plt.title('total gains %f, total investment %f%%'%(total_gains, invest))
plt.legend()
plt.show()