In [None]:
    !pip install tensorflow-gpu==2.4.1
     
    !pip install pandas-datareader
     
    import math
    import random
    import numpy as np
    import pandas as pd
    import tensorflow as tf
    import matplotlib.pyplot as plt
    import pandas_datareader as data_reader
     
    from tqdm import tqdm_notebook, tqdm
    from collections import deque
     
    class AITrader:
      def __init__(self, state_size, action_space=3, model_name="AITrader"):
        self.state_size = state_size
        self.action_space = action_space # Stay, Buy, Sell
        # A type of linked list 
        self.memory = deque(maxlen=2000)
        self.inventory = []
        self.model_name = model_name
     
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_final = 0.01
        self.epsilon_decay = 0.995
     
        # A compiled Sequential model from Keras
        self.model = self.model_builder()
     
      # The brain
     
      # Creating the model
      def model_builder(self):
        model = tf.keras.models.Sequential()
        
        # Input layer
        model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
        
        model.add(tf.keras.layers.Dense(units=64, activation='relu'))
        model.add(tf.keras.layers.Dense(units=128, activation='relu'))
     
        # Output layer
        # Units = number of classes
        model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
     
        # Compiling the model
        model.compile(
          loss='mse',
          optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
        )
        return model
     
      def trade(self, state):
        # In this case: random action - epsilon will decrease
        if random.random() <= self.epsilon:
          return random.randrange(self.action_space) # Random choice of one of our possible actions
        
        actions = self.model.predict(state)
        return np.argmax(actions[0])
     
      def batch_train(self, batch_size):
        batch = []
        # Starting from memory minus batch size until the latest point in memory
        for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
          # Collect all these objects in our memory Linked List
          batch.append(self.memory[i])
        
        # We could condense the last for loop and this, potentially
        for state, action, reward, next_state, done in batch:
          # This line is not useful
          reward = reward
          if not done:
            # Q(s, a) = R(s, a) + gamma * V(s')
            temp = self.model.predict(next_state)[0]
            print(f"temp: {temp}")
            reward = reward + (self.gamma * np.amax(temp))
          
          target = self.model.predict(state)
          print(f"target: {target}")
          target[0][action] = reward
          self.model.fit(state, target, epochs=1, verbose=0)
     
          # Decrease epsilon - our randomness - based on our learning rate (0.005)
          if self.epsilon > self.epsilon_final:
            self.epsilon *= self.epsilon_decay
     
    # Helper functions
    def sigmoid(x):
      """
        Parameters: number (integer)
        Returns: Float (number after a sigmoid interface)
      """
      return 1 / (1 + math.exp(-x))
     
    def stocks_price_format(n):
      """
        Parameters: n - int
        Returns: string
      """
      if n < 0:
        return f"- $ {abs(n):0.2f}"
      else:
        return f"$ {abs(n):0.2f}"
     
    def dataset_loader(stock_name):
      """
        Parameters: string
        return: Pandas Data Series
      """
      # Dataset is a Pandas Dataframe
      dataset = data_reader.DataReader(stock_name, data_source = "yahoo")
     
      # This is data we don't actually use, but it could be useful
      # This is the start and end date of the stock data
      start_date = str(dataset.index[0]).split()[0]
      end_date = str(dataset.index[-1]).split()[0]
     
      close = dataset['Close']
      return close
     
    def state_creator(data, timestep, window_size):
      """
        Parameters:
          data: our Pandas Series
          timestep: How many days at a time we take - int
          window_size: int
      """
      # Identify the row which will be the starting point in our Pandas Series
      starting_id = timestep - window_size + 1
      if starting_id >= 0:
        # If it's a positive number, then we'll get the rows in our series from
        # our starting point until the length specified in our timestep
        windowed_data = data[starting_id:timestep+1]
      else:
        # Else starting_id is negative, so we create a list
        # That's as long as the starting ID then it gets all the items from the
        # beginning of the list and appends those to the end
        temp = [data[0]]
        print(f"[data[0]]: {temp}")
        print(f"type: {type(temp)}")
        temp *= -starting_id
        print(f"*= -starting_id: {temp}")
        print(f"type: {type(temp)}")
        temp_1 = list(data[0:timestep+1])
        print(f"list(data[0:timestep+1]: {temp_1}")
        print(f"type: {type(temp_1)}")
        windowed_data = -starting_id * [data[0]] + list(data[0:timestep+1])
      
      state = []
      print(f"windowed_data: {windowed_data}")
      for i in range(window_size - 1):
        # For each item, create a list (that will become a np array)
        # Of the sigmoid function with the different between the next
        # item in our list and the current
        # N.B.: In this step we're ALWAYS converting it to a list
        state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
      
      print(f"state:")
      print(state)
      print(type(state))
      return np.array([state])
     
    stock_name = 'AAPL'
    data = dataset_loader(stock_name)
     
    print(data)
     
    # These were for me to learn, given all the comments and print statements I added to state_creator, so I could understand what the possible outcomes were
    state_creator(data, 0, 11)
    state_creator(data, 20, 11)
     
    window_size = 10
    episodes = 1000
    batch_size = 32
    data_samples = len(data) - 1
     
    trader = AITrader(window_size)
     
    trader.model.summary()
     
    for episode in range(episodes + 1):
      print(f"Episode: {episode}/{episodes}")
      # On the first iteration:
      # Our state will be window_size + 1 (11) list of copies of the
      # close value of the stock
      state = state_creator(data, 0, window_size + 1)
      total_profit = 0
      trader.inventory = []
      # Range: 1259 + 1
      # A tqdm series is like any other iterable except it creates nice print statements as we proceed
      for t in tqdm(range(data_samples)):
        # What will our next action be?
        action = trader.trade(state)
        # Next state: What is our next state?
        # On the first iteration it will be window_size + 1 copies of the
        # close value of the stock PLUS the stock value at close on day + 1
     
        # When t+1 finally becomes larger than 11
        # We'll just get the sigmoid'd values of the stock in that range
        next_state = state_creator(data, t+1, window_size + 1)
        reward = 0
     
        if action == 1: # Buying
          trader.inventory.append(data[t])
          print("AI Trader bought: ", stocks_price_format(data[t]))
          
        elif action == 2 and len(trader.inventory) > 0: # Selling
          # N.B.: Selling can only occur if trader has anything to sell
          buy_price = trader.inventory.pop(0)
          # The buy price is the last price that the trader got
          reward = max(data[t] - buy_price, 0)
          # The reward is how much profit we made
          total_profit += data[t] - buy_price
          # Our profit goes up by that amount
          print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
     
        # If we run out of samples to examine!
        if t == data_samples - 1:
          done = True
        else:
          done = False
     
        # We add these to the memory so that we can do some batch trading
        trader.memory.append((state, action, reward, next_state, done))
        # Now the date has moved forward, and we want our current state to be
        # the pfuture state
        state = next_state
     
        if done:
          print("########################")
          print("TOTAL PROFIT: {}".format(total_profit))
          print("########################")
        
        if len(trader.memory) > batch_size:
          trader.batch_train(batch_size)