### Arhum Zafar
<br>
<br>
A brief attempt of using reinforcement learning strategies to make a quick trading agent.

In [1]:
# %watermark

In [2]:
#!pip install tensorflow-gpu==2.0.0.alpha0|
from src.config.config import Config
from src.db_writer.db import DB

### Dependencies

In [3]:
import math
import random
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas_datareader as data_reader

from tqdm import tqdm_notebook, tqdm
from collections import deque

In [4]:
tf.__version__

'2.9.0'

#### Building the trading network

In [5]:
class AI_Trader():
  
  def __init__(self, state_size, action_space=3, model_name="AITrader"): #Stay, Buy, Sell
    
    self.state_size = state_size
    self.action_space = action_space
    self.memory = deque(maxlen=2000)
    self.inventory = []
    self.model_name = model_name
    
    self.gamma = 0.95
    self.epsilon = 1.0
    self.epsilon_final = 0.01
    self.epsilon_decay = 0.995
    
    self.model = self.model_builder()
    
  def model_builder(self):
    
    model = tf.keras.models.Sequential()
    
    model.add(tf.keras.layers.Dense(units=32, activation='relu', input_dim=self.state_size))
    
    model.add(tf.keras.layers.Dense(units=64, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=128, activation='relu'))
    
    model.add(tf.keras.layers.Dense(units=self.action_space, activation='linear'))
    
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(lr=0.001))
    
    return model
  
  def trade(self, state):
    
    if random.random() <= self.epsilon:
      return random.randrange(self.action_space)
    
    actions = self.model.predict(state)
    return np.argmax(actions[0])
  
  
  def batch_train(self, batch_size):
    
    batch = []
    for i in range(len(self.memory) - batch_size + 1, len(self.memory)):
      batch.append(self.memory[i])
      
    for state, action, reward, next_state, done in batch:
      reward = reward
      if not done:
        reward = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
        
      target = self.model.predict(state)
      target[0][action] = reward
      
      self.model.fit(state, target, epochs=1, verbose=0)
      
    if self.epsilon > self.epsilon_final:
      self.epsilon *= self.epsilon_decay

#### Preprocessing

In [6]:
#Sigmoid
def sigmoid(x):
  return 1 / (1 + math.exp(-x))

#Price Format Function
def stocks_price_format(n):
  if n < 0:
    return "- $ {0:2f}".format(abs(n))
  else:
    return "$ {0:2f}".format(abs(n))

#### Load Dataset

In [7]:
def dataset_loader(stock_name):
  
  #Complete the dataset loader function
  config = Config()
  db = DB(config)
  funding_data = pd.read_sql('SELECT * FROM `binance`.funding_data;', con=db.con)
  funding_data = funding_data.sort_values(by='timestamp')
  funding_data

  # dataset = data_reader.DataReader(stock_name, data_source="yahoo")
  
  # start_date = str(dataset.index[0]).split()[0]
  # end_date = str(dataset.index[-1]).split()[0]
  
  close = funding_data['mark_price']
  return close

#### State Creator

In [30]:
def state_creator(data, timestep, window_size):
  
  starting_id = timestep - window_size + 1
  #TODO some shit when starting_id == -1 .KeyError: 0
  print(starting_id)
  print(data[0])
  if starting_id >= 0:
    windowed_data = data[starting_id:timestep+1]
  else:
    windowed_data = - starting_id * [data[0]] + list(data[0:timestep+1])
  windowed_data = list(windowed_data)
  state = []
  for i in range(window_size - 1):
    state.append(sigmoid(windowed_data[i+1] - windowed_data[i]))
    
  return np.array([state])

#### Loading a dataset for a stock

In [31]:
stock_name = "AAPL"
data = dataset_loader(stock_name)

### Training

In [32]:
# hyperparameters

window_size = 10
episodes = 1000

batch_size = 32
data_samples = len(data) - 1 - window_size

In [33]:
trader = AI_Trader(window_size)
trader.model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 32)                352       
                                                                 
 dense_13 (Dense)            (None, 64)                2112      
                                                                 
 dense_14 (Dense)            (None, 128)               8320      
                                                                 
 dense_15 (Dense)            (None, 3)                 387       
                                                                 
Total params: 11,171
Trainable params: 11,171
Non-trainable params: 0
_________________________________________________________________


In [34]:
for episode in range(1, episodes + 1):
  
  print("Episode: {}/{}".format(episode, episodes))
  
  state = state_creator(data, 0, window_size + 1)
  
  total_profit = 0
  trader.inventory = []
  
  for t in tqdm(range(data_samples)):
    
    action = trader.trade(state)
    print((t+1, window_size + 1))
    next_state = state_creator(data, t+1, window_size + 1)
    reward = 0
    
    if action == 1: #Buying
      trader.inventory.append(data[t])
      print("AI Trader bought: ", stocks_price_format(data[t]))
      
    elif action == 2 and len(trader.inventory) > 0: #Selling
      buy_price = trader.inventory.pop(0)
      
      reward = max(data[t] - buy_price, 0)
      total_profit += data[t] - buy_price
      print("AI Trader sold: ", stocks_price_format(data[t]), " Profit: " + stocks_price_format(data[t] - buy_price) )
      
    if t == data_samples - 1:
      done = True
    else:
      done = False
      
    trader.memory.append((state, action, reward, next_state, done))
    
    state = next_state
    
    if done:
      print("########################")
      print("TOTAL PROFIT: {}".format(total_profit))
      print("########################")
    
    if len(trader.memory) > batch_size:
      trader.batch_train(batch_size)
      
  if episode % 10 == 0:
    trader.model.save("ai_trader_{}.h5".format(episode))
    

Episode: 1/1000
-10
22418.8


  0%|          | 0/116824 [00:00<?, ?it/s]

(1, 11)
-9
22418.8
(2, 11)
-8
22418.8
(3, 11)
-7
22418.8
(4, 11)
-6
22418.8
(5, 11)
-5
22418.8
(6, 11)
-4
22418.8
AI Trader bought:  $ 22339.300000
(7, 11)
-3
22418.8
AI Trader bought:  $ 22343.200000
(8, 11)
-2
22418.8
AI Trader sold:  $ 22347.800000  Profit: $ 8.500000
(9, 11)
-1
22418.8
AI Trader bought:  $ 22346.500000
(10, 11)
0
22418.8
AI Trader bought:  $ 22347.200000
(11, 11)
1
22418.8
AI Trader bought:  $ 22342.000000
(12, 11)
2
22418.8
(13, 11)
3
22418.8
AI Trader sold:  $ 22345.400000  Profit: $ 2.200000
(14, 11)
4
22418.8
AI Trader bought:  $ 22345.400000
(15, 11)
5
22418.8
(16, 11)
6
22418.8
AI Trader bought:  $ 22345.500000
(17, 11)
7
22418.8
AI Trader bought:  $ 22336.900000
(18, 11)
8
22418.8
AI Trader sold:  $ 22337.700000  Profit: - $ 8.800000
(19, 11)
9
22418.8
AI Trader sold:  $ 22337.700000  Profit: - $ 9.500000
(20, 11)
10
22418.8
AI Trader sold:  $ 22335.900000  Profit: - $ 6.100000
(21, 11)
11
22418.8
AI Trader sold:  $ 22338.000000  Profit: - $ 7.400000
(22, 11

  0%|          | 33/116824 [00:09<9:06:19,  3.56it/s]

(34, 11)
24
22418.8
AI Trader bought:  $ 22330.800000


  0%|          | 34/116824 [00:18<21:28:26,  1.51it/s]

(35, 11)
25
22418.8
AI Trader sold:  $ 22329.600000  Profit: - $ 4.900000


  0%|          | 35/116824 [00:28<37:49:43,  1.17s/it]

(36, 11)
26
22418.8


  0%|          | 36/116824 [00:37<57:20:24,  1.77s/it]

(37, 11)
27
22418.8
AI Trader bought:  $ 22329.700000


  0%|          | 37/116824 [00:46<79:54:07,  2.46s/it]

(38, 11)
28
22418.8
AI Trader bought:  $ 22328.900000


  0%|          | 38/116824 [00:53<96:19:31,  2.97s/it]

(39, 11)
29
22418.8
AI Trader bought:  $ 22330.400000


  0%|          | 39/116824 [00:58<107:21:47,  3.31s/it]

(40, 11)
30
22418.8
AI Trader bought:  $ 22321.600000


  0%|          | 40/116824 [01:06<131:31:05,  4.05s/it]

(41, 11)
31
22418.8


  0%|          | 41/116824 [01:15<164:24:04,  5.07s/it]

(42, 11)
32
22418.8
AI Trader sold:  $ 22329.900000  Profit: - $ 5.900000


  0%|          | 42/116824 [01:24<193:26:12,  5.96s/it]

(43, 11)
33
22418.8
AI Trader sold:  $ 22329.900000  Profit: - $ 2.300000


  0%|          | 43/116824 [01:33<219:45:58,  6.77s/it]

(44, 11)
34
22418.8


  0%|          | 44/116824 [01:43<240:44:07,  7.42s/it]

(45, 11)
35
22418.8


  0%|          | 45/116824 [01:52<254:34:39,  7.85s/it]

(46, 11)
36
22418.8
AI Trader sold:  $ 22318.000000  Profit: - $ 12.800000


  0%|          | 46/116824 [02:01<266:28:48,  8.21s/it]

(47, 11)
37
22418.8
AI Trader bought:  $ 22327.000000


  0%|          | 47/116824 [02:10<274:32:09,  8.46s/it]

(48, 11)
38
22418.8
AI Trader bought:  $ 22327.800000


  0%|          | 48/116824 [02:20<284:41:41,  8.78s/it]

(49, 11)
39
22418.8
AI Trader bought:  $ 22328.700000


  0%|          | 49/116824 [02:29<289:36:30,  8.93s/it]

(50, 11)
40
22418.8
AI Trader bought:  $ 22328.600000


  0%|          | 50/116824 [02:39<298:30:47,  9.20s/it]

(51, 11)
41
22418.8
AI Trader sold:  $ 22334.800000  Profit: $ 5.100000


  0%|          | 51/116824 [02:48<302:01:03,  9.31s/it]

(52, 11)
42
22418.8


  0%|          | 52/116824 [02:57<300:32:36,  9.27s/it]

(53, 11)
43
22418.8


  0%|          | 53/116824 [03:07<298:43:48,  9.21s/it]

(54, 11)
44
22418.8
AI Trader bought:  $ 22337.800000


  0%|          | 54/116824 [03:16<298:49:17,  9.21s/it]

(55, 11)
45
22418.8
AI Trader bought:  $ 22336.000000


  0%|          | 55/116824 [03:25<298:38:42,  9.21s/it]

(56, 11)
46
22418.8


  0%|          | 56/116824 [03:34<298:35:36,  9.21s/it]

(57, 11)
47
22418.8
AI Trader sold:  $ 22329.200000  Profit: $ 0.300000


  0%|          | 57/116824 [03:43<299:20:22,  9.23s/it]

(58, 11)
48
22418.8


  0%|          | 58/116824 [03:53<300:49:17,  9.27s/it]

(59, 11)
49
22418.8


  0%|          | 59/116824 [04:02<302:14:02,  9.32s/it]

(60, 11)
50
22418.8
AI Trader bought:  $ 22325.900000


  0%|          | 60/116824 [04:07<259:50:59,  8.01s/it]

(61, 11)
51
22418.8
AI Trader sold:  $ 22322.200000  Profit: - $ 8.200000


  0%|          | 61/116824 [04:12<228:23:26,  7.04s/it]

(62, 11)
52
22418.8
AI Trader sold:  $ 22321.800000  Profit: $ 0.200000


  0%|          | 62/116824 [04:17<207:25:13,  6.40s/it]

(63, 11)
53
22418.8
AI Trader bought:  $ 22323.800000


  0%|          | 63/116824 [04:22<190:11:52,  5.86s/it]

(64, 11)
54
22418.8
AI Trader sold:  $ 22324.300000  Profit: - $ 2.700000


  0%|          | 64/116824 [04:26<181:32:40,  5.60s/it]

(65, 11)
55
22418.8
AI Trader sold:  $ 22324.300000  Profit: - $ 3.500000


Depending on whether you have a GPU or not, the loop time will vary. As the loop iterates, you will notice how the network makes stronger predictions over time; reinforcing its behavior over while iterating, yielding greater profit.