## Stage 1: Installing dependencies and environment setup


In [34]:
!pip install tensorflow



In [35]:
!pip install pandas-datareader



## Stage 2: Importing project dependencies

In [36]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader #to read trading data

from tqdm import tqdm_notebook, tqdm
from collections import deque #deque data structure

In [37]:
tf.__version__

'2.17.0'

## Stage 3: Building the AI Trader network

In [38]:
class AI_Trader():

  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell

    self.state_size = state_size #state_size
    self.action_space = action_space #actionspace
    #Experience Replay memory
    self.memory = deque(maxlen=2000)#We want to store 2000 elements
    self.inventory = [] #this list will hold all bought stocks since we cannot sell a stock we haven't bought before.
    self.model_name = model_name #model name

    #action selection parameters
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995


    self.model = self.model_builder() #calling the model function

  #model building or brain
  def model_builder(self):

    model = tf.keras.models.Sequential() #sequential layers

    #Use 32 in first layer, then 64 in second and 128 in third layer

    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size)) #state_size is the input dimension here

    model.add(tf.keras.layers.Dense(units=64, activation='relu'))

    model.add(tf.keras.layers.Dense(units=128, activation='relu'))

    #output layer will have same number of neuron as  the action space (buy/sell/stay)
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear')) #as we will use mean_square_error as  our loss, we use  linear as activation.
    #compiling
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)) #meanSquaredError used; also use the Adam optimizer but with a small learning rate rather than the default one
    #as it's a regression task rather than a classification, we won't use metrics

    return model



  def trade(self, state):

    #random action
    if random.random() <= self.epsilon: #depending on the epsilon value, we choose random action
      return random.randrange(self.action_space) #return random action

    #use model to choose an action
    actions = self.model.predict(state)

    #we are going to use our NP dot argmax to return only an action which has the highest probability as the argument for the argmax.
    return np.argmax(actions[0]) #We will put actions of zero because of the output shape



  def batch_train(self, batch_size):

    batch = [] # select the data from the experience replay memory.
    #Then we have to iterate through the memory.
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)): #sample from the end of the memory
      batch.append(self.memory[i]) #Append element from the memory itself


    #At this point, we have the batch of data and now it's time to iterate through it and to train the model for each sample from that batch
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])

      target = self.model.predict(state)
      target[0][action] = reward

      #fit the model
      self.model.fit(state, target, epochs=1, verbose=0) #For our target epochs.Just set to one because we will train the model very often on each sample from our batch.
      #We don't want to print all of these training results, so just put verbose is equal to zero.

    #At the end of this function, let's decrease the epsilon parameter so we can stop performing random actions at one point.
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

## Stage 4: Dataset preprocessing

### Defining helper functions

#### Sigmoid

In [39]:
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

#### Price format function

In [40]:
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

#### Dataset loader

In [41]:
!pip install yfinance --upgrade
!pip install pandas-datareader --upgrade

import yfinance as yf
import pandas_datareader.data as data_reader

#Experiment your stock name instead of "AAPL"
#dataset = yf.download("AAPL", progress=False)
#dataset



In [42]:
def dataset_loader(stock_name):

  # Using yfinance to download data
  dataset = yf.download(stock_name, progress=False)

  #We use the starting date (dataset.index[0]) and end date (dataset.index[-1])
  start_date = str(dataset.index[0]).split()[0]
  end_date = str(dataset.index[-1]).split()[0]

  close = dataset['Close'] #we are working with the Close value from the dataset

  return close

### State creator

In [50]:
def state_creator(data, timestep, window_size):
  starting_id = timestep - window_size + 1

  if starting_id >= 0:
    windowed_data = data.iloc[starting_id:timestep+1, :].values # Convert to numpy array for consistency
  else:
    windowed_data = - starting_id * [data.iloc[0, :].values] + list(data.iloc[0:timestep+1, :].values) # Convert to numpy array

  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))

  return np.array([state]) # Return state as a numpy array

### Loading a dataset

In [44]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

In [45]:
data

Ticker,AAPL
Date,Unnamed: 1_level_1
1980-12-12 00:00:00+00:00,0.128348
1980-12-15 00:00:00+00:00,0.121652
1980-12-16 00:00:00+00:00,0.112723
1980-12-17 00:00:00+00:00,0.115513
1980-12-18 00:00:00+00:00,0.118862
...,...
2024-10-29 00:00:00+00:00,233.669998
2024-10-30 00:00:00+00:00,230.100006
2024-10-31 00:00:00+00:00,225.910004
2024-11-01 00:00:00+00:00,222.910004


## Stage 5: Training the AI Trader

### Setting hyper parameters

In [46]:
window_size = 10
episodes = 1000

batch_size = 32
data_samples = len(data) - 1

### Defining the Trader model

In [47]:
trader = AI_Trader(window_size)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [48]:
trader.model.summary()

### Training loop

In [None]:
for episode in range(1, episodes + 1):

  print("Episode: {}/{}".format(episode, episodes))

  state = state_creator(data, 0, window_size + 1)

  total_profit = 0
  trader.inventory = []

  for t in tqdm(range(data_samples)):

    action = trader.trade(state)

    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0

    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))

    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)

      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )

    if t == data_samples - 1:
      done = True
    else:
      done = False

    trader.memory.append((state, action, reward, next_state, done))

    state = next_state

    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")

    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)

  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))

  return 1 / (1 + math.exp(-x))


Episode: 1/1000


  0%|          | 0/11065 [00:00<?, ?it/s]

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 26ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27