<a href="https://www.kaggle.com/code/nsff591/failed-stock-bot-dqn-project?scriptVersionId=97815469" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

# Predicting Buy and Sell opportunities - StockBot

disclaimer: This project is a failed project and does not work. I started this project thinking a simple Deep Q-learning Neural network (DQN) was good enough for training a stock investment bot. Through trial and error, this is not the best solution to it. There are some extra nuances that need to be addressed. I would like to link a solution to this project in the following link:  https://towardsdatascience.com/deep-reinforcement-learning-for-automated-stock-trading-f1dad0126a02

If you still want to run this code, you will need to get an API key from the Alpha Vantage website: https://www.alphavantage.co/

Alternatively, yfinance library might also work and some tweeking to the data format

# Libraries

Using The **Alpha Vantage API** to retreive the most recent Stock data
<img src="https://miro.medium.com/max/1400/1*hniu4saxhfVhhKdE4aMnMg.png" width="650" align="centr"/>

In [None]:
pip install alpha_vantage

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten, LSTM
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf

import requests #Used to fetch the data from Alpha Vantage API

from alpha_vantage.timeseries import TimeSeries 
from alpha_vantage.fundamentaldata import FundamentalData

from collections import deque
import time
import random
import os
import cv2
from PIL import Image

from tqdm import tqdm

# Global Constants

In [None]:
API_KEY = "" # insert API key here from Alpha vantage website
STOCK_SYMBOL = "AAPL"
ALPHA_VANTAGE_OUTPUT_SIZE = "full"
WINDOW_SIZE = 30
EPOCH_NUM = 20

REPLAY_MEMORY_SIZE = 200
MIN_REPLAY_MEMORY_SIZE = 50
MINIBATCH_SIZE = 16
MODEL_NAME = "2x4"
GAMMA = 0.97
MIN_REWARD = -200  # For model save
UPDATE_TARGET_EVERY = 30

# Environment settings
EPISODES = 10

# Exploration settings
epsilon = 1  # not a constant, going to be decayed
EPSILON_DECAY = 0.99975
MIN_EPSILON = 0.001

#  Stats settings
AGGREGATE_STATS_EVERY = 50  # episodes
SHOW_PREVIEW = False

#Importing the Data

In [None]:
ts = TimeSeries(key = API_KEY)
data, meta_data = ts.get_daily(STOCK_SYMBOL, outputsize = ALPHA_VANTAGE_OUTPUT_SIZE)

## ALL DATES ###
data_date = [date for date in data.keys()]
data_date.reverse()

## ALL PRIZES ##

data_close_price = [float(close_prize['4. close']) for close_prize in [data_values for data_values in data.values()]]
data_close_price.reverse()

In [None]:
# print(data_date)
print(data_close_price)
data_close_price = data_close_price[-1000:]

In [None]:
# Scale the data
scaler = MinMaxScaler()
data_close_price_scaled = scaler.fit_transform(np.reshape(data_close_price, newshape =(-1,1))).flatten()

print(data_close_price_scaled)

In [None]:
# putting WINDOW_SIZE amount of datapoints in a list which rolls over throughout the data. This is used as input for the neural network
X_train = []
for i in range(WINDOW_SIZE, len(data_close_price_scaled)):
    X_train.append(data_close_price_scaled[i-WINDOW_SIZE:i])

np.shape(X_train)

# Defining the Bot

We are gonna use Deep Q-Networks

In [None]:
class StockBot:
  def __init__(self):
    self.reset()

  def reset(self):
    self.position_price = 0 # price when bought
    self.position_status = 0 # 0 = waiting 1 = bought
    self.profit = 0

  def buy(self, position):
    self.position_price = position
    self.position_status = 1 # bought

  def sell(self, current_price):
    self.profit = self.position_price - current_price
    self.position_price = 0
    self.position_status = 0 # waiting

# Defining the Environment

In [None]:
class StockEnvironment:
  def __init__(self,data):
    self.data = data
    self.data_len = len(self.data)
    self.stock_bot = StockBot()
    self.reset()
  
  def reset(self):
    self.total_profit = 0
    self.t = 0
    self.done = False
    self.new_observation = self.data[self.t]
    self.stock_bot.reset()

  def step(self, action):
    reward = 0

    # 0 = waiting; 1 = buying; 2 = selling

    if action == 1: # buying
      if self.stock_bot.position_status == 0:
        self.stock_bot.buy(self.data[self.t][-1])
    elif action == 2:
      if self.stock_bot.position_status == 1: # if bought, you can sell, otherwise nothing happens
        self.stock_bot.sell(self.data[self.t][-1])
        self.total_profit += self.stock_bot.profit 
        #print("Total Profit: " + str(self.total_profit))
        reward += self.stock_bot.profit

    # clipping reward
    if reward > 0:
        reward = 1
    elif reward < 0:
        reward = -1
    
    self.t += 1 # increasing t to the next day

    if(self.t >= self.data_len): # if the end of the data is reached
      self.done = True
      new_observation = self.data[self.data_len-1]
    else:
      new_observation = self.data[self.t] # the next price

    return new_observation, reward , self.done



# Defining the agent

In [None]:
class DQNAgent:
  def __init__(self):
    # main model that gets trained every step
    self.model = self.create_model()

    # Target model that we .predict every step
    self.target_model = self.create_model()
    self.target_model.set_weights(self.model.get_weights())

    self.replay_memory = deque(maxlen = REPLAY_MEMORY_SIZE)

    self.target_update_counter = 0

  def create_model(self):
    model = Sequential()
    # position, current price as inputs
    model.add(LSTM(units = 64, input_shape= (WINDOW_SIZE, 1)))
    model.add(Dense(units = 32, activation = 'relu'))
    model.add(Dropout(0.25))
    model.add(Dense(units = 16, activation = 'relu'))
    # buy, sell , wait outputs
    model.add(Dense(units = 3, activation = 'softmax')) 

    model.compile(optimizer= 'adam', loss='mse', metrics=['mse'])

    return model

  def update_replay_memory(self, transition):
    self.replay_memory.append(transition)

  # Queries main network for Q values given current observation space (environment state)
  def get_qs(self, state):
        return self.model.predict(np.array(state).reshape(1,-1))

  def train(self, terminal_state, step):
    if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
      return

    if len(self.replay_memory) > REPLAY_MEMORY_SIZE:
      self.replay_memory.pop(0)

    minibatch = random.sample(self.replay_memory, MINIBATCH_SIZE)
    current_states = np.array([transition[0] for transition in minibatch])

    current_qs_list = self.model.predict(current_states)

    new_current_states = np.array([transition[3] for transition in minibatch])
    future_qs_list = self.target_model.predict(new_current_states)

    X = []
    y = []

    for index, (current_state, action, reward, new_current_states, done) in enumerate(minibatch):
      if not done:
        max_future_q = np.max(future_qs_list[index])
        new_q = reward + GAMMA * max_future_q
      else:
        new_q = reward

      current_qs = current_qs_list[index]
      current_qs[action] = new_q
      X.append(current_state)
      y.append(current_qs)

    self.model.fit(np.array(X),np.array(y), batch_size = MINIBATCH_SIZE, verbose = 0, shuffle = False)

    #print("number of model.fit : "+ str(step))
    # updating to determine if we want to update target_model yet
    self.target_update_counter += 1

    if self.target_update_counter > UPDATE_TARGET_EVERY:
      self.target_model.set_weights(self.model.get_weights())
      self.target_update_counter = 0

# Training the agent

In [None]:
env = StockEnvironment(X_train)
print(env.reset())
# short example of what StockEnvironment returns (the next observation, the reward, the end goal reached?)
# for _ in range(3):
#     pact = np.random.randint(3)
#     print(pact)
#     print(env.step(pact))

In [None]:
ep_rewards = [-200]

agent = DQNAgent()

# Iterate over episodes
for episode in tqdm(range(1, EPISODES + 1), ascii=True, unit='episodes'):

    # Restarting episode - reset episode reward and step number
    episode_reward = 0
    step = 1

    print(step)
    # Reset environment and get initial state
    env.reset()
    current_state = env.new_observation
    
    # Reset flag and start iterating until episode ends
    done = False
    while not done:
    #while step < EPOCH_NUM:

        # This part stays mostly the same, the change is to query a model for Q values
        if np.random.random() > epsilon:
            # Get action from Q table
            action = np.argmax(agent.get_qs(current_state))
        else:
            # Get random action
            action = np.random.randint(3)

        new_state, reward, done = env.step(action)

        # Transform new continous state to new discrete state and count reward
        episode_reward += reward

        # if SHOW_PREVIEW and not episode % AGGREGATE_STATS_EVERY:
        #     env.render()

        # Every step we update replay memory and train main network
        agent.update_replay_memory((current_state, action, reward, new_state, done))
        agent.train(done, step)
        
        if step % 25 == 0:
          print(step)

        current_state = new_state
        step += 1

        # Decay epsilon
        if epsilon > MIN_EPSILON:
            epsilon *= EPSILON_DECAY
            epsilon = max(MIN_EPSILON, epsilon)

    # Append episode reward to a list and log stats (every given number of episodes)
    ep_rewards.append(episode_reward)
    # if not episode % AGGREGATE_STATS_EVERY or episode == 1:
    #     average_reward = sum(ep_rewards[-AGGREGATE_STATS_EVERY:])/len(ep_rewards[-AGGREGATE_STATS_EVERY:])
    #     min_reward = min(ep_rewards[-AGGREGATE_STATS_EVERY:])
    #     max_reward = max(ep_rewards[-AGGREGATE_STATS_EVERY:])

        # Save model, but only when min reward is greater or equal a set value
        # if min_reward >= MIN_REWARD:
        #     agent.model.save(f'models/{MODEL_NAME}__{max_reward:_>7.2f}max_{average_reward:_>7.2f}avg_{min_reward:_>7.2f}min__{int(time.time())}.model')
agent.model.save("./model.h5")