## Stage 1: Installing dependencies and enviornment setup for stock trading. 

In [0]:
## Google just rolled out a new successor to 2.0.0 beta colab only.
#might have to fix this later to get things working. 
# version still outpusts to 2.0.0-beta 1 so ill stick with the standard pip for now.
# that way i can send it in theory to jupyter later if need be. 

try:
  #%tensorflow_version 2.x  # Colab only. 
  !pip install -q tensorflow-gpu==2.0.0-beta1
except Exception:
  pass
import tensorflow as tf
!pip install pandas-datareader

## Step 2 Import Dependencies. 

In [0]:
import math #need for sigmoid functions
import random # generating random numbers
import numpy as np # helps with arrays and matrices
import pandas as pd #reading dataframes and reading the csv we will use
import tensorflow as tf # the big guns
import matplotlib.pyplot as plt #data visualization
import pandas_datareader as data_reader #download and use stock info

from tqdm import tqdm_notebook, tqdm #visualizes our progress
from collections import deque # implements experience replay.
tf.__version__

## Step 3 building an AI network

In [0]:
class AI_Trader():
  
  def __init__(self, state_size, action_space = 3, model_name = "AITrader"): #Three actions: Stay, Buy, and Sell
    
    self.state_size = state_size
    self.action_space = action_space
    
    #more model realted params
    #experience replay memory
    self.memory = deque(maxlen=2000) # how many elements we can store inside experience replay
    self.inventory = [] # Blank list to hold all our stocks
    self.model_name = model_name
    
    #see reinforcement learning notes to jog memory of these formulas 
    self.gamma = 0.95 # maximizes current reward over longtime reward
    self.epsilon = 1.0 # determines whether to choose random action, or model. We choose random before trained in this case
    self.epsilon_final = 0.01 #when equal to or less we will stop decreasing it.
    self.epsilon_decay = 0.995 #must be less than 1
    
    self.model = self.model_builder()
  
  def model_builder(self):
    
    model = tf.keras.models.Sequential() #models not model get the syntax right
    
    # hidden layers
    
    model.add(tf.keras.layers.Dense(units = 32, activation='relu', input_dim = self.state_size))
    
    model.add(tf.keras.layers.Dense(units = 64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units = 128, activation='relu'))
    
    # output layer
    
    model.add(tf.keras.layers.Dense(units = self.action_space, activation = 'linear')) #change acivation to linear for mean squared error
    
    #compile
    # since this is regression and not classification, cant use accuracy as metric, we leave that empty
    model.compile(loss='mse', optimizer = tf.keras.optimizers.Adam(lr=0.001)) #lr = learning rate
    
    return model
  
  #build trade function that takes state and performs an action based on it. 
  def trade(self,state):
    
    if random.random() <= self.epsilon:
      #returns random action
      return random.randrange(self.action_space)
    #if random number is bigger than epsilon we use our model to choose an action to perform
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  #train the model in batches
  def batch_train(self, batch_size):
    #select data from experiance replay memory
    batch = []
    #iteration time append recent stock memory to batch
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    #iterate the batch
    
    #remember the variable s to the temporal differential equation in the previous notes. 
    for state, action, reward, next_state, done in batch:
      #if agent is in a terminal state we will use current reward as reward
      reward = reward
      if not done: #not in terminal state and there are a few more actions to be played
        # if not in terminal state and there are few more actions to be played calculate discounted terminal reward as reward        
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
      
      target = self.model.predict(state) #just an action. must modify with current reward
      target[0][action] = reward
      
      #feed the model
      self.model.fit(state, target, epochs=1, verbose=0)
      
    #decrease epsilon param to stop performing random actions eventaully 
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

## Step 4 Dataset Preprocessing

### Sigmoid

In [0]:
#scales to either 0 or 1 for binary classification
def sigmoid(x):
  # scale prices to compare and gather real differneces between each day
  return 1 / (1+math.exp(-x))

### Price format function

In [0]:
def stocks_price_format(n):
  if n < 0 :
  #returns positive or negative
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

### Dataset loader

In [0]:
#dataset = data_reader.DataReader("AAPL", data_source='yahoo')

In [0]:
def dataset_loader(stock_name):
  
  dataset = data_reader.DataReader(stock_name, data_source='yahoo')
  
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]
  
  close = dataset['Close']
  
  return close

In [0]:
dataset = data_reader.DataReader("AAPL", data_source='yahoo')
dataset.head()

### State creator

In [0]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  
  if starting_id >= 0:
    windowed_data = data[starting_id: timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
    
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

### Loading a dataset

In [0]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

## Stage 5: Training the AI Trader

### Setting hyper parameters

In [0]:
window_size = 10 # previous number of of days to predict the current 
episodes = 1000 #same as epochs

batch_size = 32
data_samples = len(data) - 1 

### Defining the Trader model.

In [0]:
#aka assigning the big class we wrote Just need to specify state_size which is our window_size
trader = AI_Trader(window_size)

In [0]:
trader.model.summary()

### Training Loop

In [0]:
for episode in range(episodes+1):  
  
  print(f"Episode: {episode}/{episodes}")
  
  #current timestep is 0
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  #empty list to append all stocks bought
  trader.inventory = []
  
  # define our timestamp 
  #tqdm is used to visualize the progress bar
  for t in tqdm(range(data_samples)):
    #access action taken by the model
    action = trader.trade(state)
    
    next_state = state_creator(data, t+1, window_size + 1)
    
    reward = 0
    
    if action == 1: #buying
      trader.inventory.append(data[t]) #current stock added to inventory
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #selling 
      buy_price = trader.inventory.pop(0)
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
    if t == data_samples - 1:
      done = True
    else:
      done = False
    
    #append 
    trader.memory.append((state, action, reward, next_state, done))
    
    #change the state to the next state
    
    state = next_state
    
    if done:
      print("######################")
      print(f"TOTAL PROFIT: {total_profit}")
      print("######################")
      
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
  if episode % 10 == 0:
    trader.model.save(f"ai_trader_{episode}.h5 ")