In [1]:
import keras
from keras.models import Sequential
from keras.models import load_model
from keras.layers import LSTM
from keras.layers import Dense
from keras.optimizers import Adam
import math
import numpy as np
import random
from collections import deque
import pandas as pd

In [2]:
class Agent:
   def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size # normalized previous minites
        self.action_size = 3 # sit, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model(model_name) if is_eval else self._model()
   
   def _model(self):
      model = Sequential()
      model.add(Dense(units=100, input_dim=self.state_size, activation="relu"))
      model.add(Dense(units=50, activation="relu"))
      model.add(Dense(units=25, activation="relu"))
      model.add(Dense(self.action_size, activation="linear"))
      model.compile(loss="mse", optimizer=Adam(lr=0.001))
      return model
   
   def act(self, state):
      if not self.is_eval and random.random()<= self.epsilon:
          return random.randrange(self.action_size)
      options = self.model.predict(state)
      return np.argmax(options[0])
   
   def expReplay(self, batch_size):
      mini_batch = []
      l = len(self.memory)
      for i in range(l - batch_size + 1, l):
          mini_batch.append(self.memory[i])
      for state, action, reward, next_state, done in mini_batch:
          target = reward
          if not done:
              target = reward + self.gamma * np.amax(self.model.predict(next_state)[0])
          target_f = self.model.predict(state)
          target_f[0][action] = target
          self.model.fit(state, target_f, epochs=1, verbose=0)
      if self.epsilon > self.epsilon_min:
          self.epsilon *= self.epsilon_decay

In [3]:
def formatPrice(n):
  return("-USD." if n<0 else "USD.")+"{0:.2f}".format(abs(n))

def getStockDataVec():
  vec = []
  df = pd.read_csv("Train.csv")
  df.values.tolist()
  return vec 

def sigmoid(x):
  return 1/(1+math.exp(-x))

def getState(data, t, n):
  d = t - n + 1
  block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
  res = []
  for i in range(n - 1):
      res.append(sigmoid(block[i + 1] - block[i]))
  return np.array([res])

In [4]:
import sys

window_size = 0

episode_count = 2000

agent = Agent(window_size)

data = getStockDataVec()

l = len(data) - 1

batch_size = 32

for e in range(episode_count + 1):

    print("Episode " + str(e) + "/" + str(episode_count))

    state = getState(data, 0, window_size + 1)

    total_profit = 0

    agent.inventory = []

    for t in range(l):

        action = agent.act(state)

        # sit

        next_state = getState(data, t + 1, window_size + 1)

        reward = 0

        if action == 1: # buy

            agent.inventory.append(data[t])


        elif action == 2 and len(agent.inventory) > 0: # sell

            bought_price = window_size_price = agent.inventory.pop(0)

            reward = max(data[t] - bought_price, 0)

            total_profit += data[t] - bought_price

        done = True if t == l - 1 else False

        agent.memory.append((state, action, reward, next_state, done))

        state = next_state

        if done:

            print("--------------------------------")

            print("Total Profit: " + formatPrice(total_profit))

            print("--------------------------------")

        if len(agent.memory) > batch_size:

            agent.expReplay(batch_size)

Episode 0/2000
Episode 1/2000
Episode 2/2000
Episode 3/2000
Episode 4/2000
Episode 5/2000
Episode 6/2000
Episode 7/2000
Episode 8/2000
Episode 9/2000
Episode 10/2000
Episode 11/2000
Episode 12/2000
Episode 13/2000
Episode 14/2000
Episode 15/2000
Episode 16/2000
Episode 17/2000
Episode 18/2000
Episode 19/2000
Episode 20/2000
Episode 21/2000
Episode 22/2000
Episode 23/2000
Episode 24/2000
Episode 25/2000
Episode 26/2000
Episode 27/2000
Episode 28/2000
Episode 29/2000
Episode 30/2000
Episode 31/2000
Episode 32/2000
Episode 33/2000
Episode 34/2000
Episode 35/2000
Episode 36/2000
Episode 37/2000
Episode 38/2000
Episode 39/2000
Episode 40/2000
Episode 41/2000
Episode 42/2000
Episode 43/2000
Episode 44/2000
Episode 45/2000
Episode 46/2000
Episode 47/2000
Episode 48/2000
Episode 49/2000
Episode 50/2000
Episode 51/2000
Episode 52/2000
Episode 53/2000
Episode 54/2000
Episode 55/2000
Episode 56/2000
Episode 57/2000
Episode 58/2000
Episode 59/2000
Episode 60/2000
Episode 61/2000
Episode 62/2000
Ep

In [5]:
data = pd.read_csv("Train.csv").values.tolist()

In [6]:
results = []

In [7]:
i = 0
while i<=500:
  window_size = agent.model.layers[0].input.shape.as_list()[1]

  l = len(data) - 1

  batch_size = 32

  state = getState(data, 0, window_size + 1)

  print(state)

  total_profit = 0

  agent.inventory = []

  for t in range(l):
    
    action = agent.act(state)

      # sit

    next_state = getState(data, t + 1, window_size + 1)

    reward = 0

    if action == 1: # buy
      
      agent.inventory.append(data[t])

    elif action == 2 and len(agent.inventory) > 0: # sell

      bought_price = agent.inventory.pop(0)

      reward = max(data[t][4] - bought_price[4], 0)

      total_profit += data[t][4] - bought_price[4]

    done = True if t == l - 1 else False

    agent.memory.append((state, action, reward, next_state, done))

    state = next_state

    if done:
      
      print("--------------------------------")

      print("AMZN Total Profit: " + formatPrice(total_profit))

      print("--------------------------------")

      print ("Total profit is:",formatPrice(total_profit))

  results.append(total_profit)
  i+=1

[]
--------------------------------
AMZN Total Profit: USD.1396765.55
--------------------------------
Total profit is: USD.1396765.55
[]
--------------------------------
AMZN Total Profit: USD.1188429.73
--------------------------------
Total profit is: USD.1188429.73
[]
--------------------------------
AMZN Total Profit: USD.422111.03
--------------------------------
Total profit is: USD.422111.03
[]
--------------------------------
AMZN Total Profit: USD.510749.93
--------------------------------
Total profit is: USD.510749.93
[]
--------------------------------
AMZN Total Profit: USD.925539.63
--------------------------------
Total profit is: USD.925539.63
[]
--------------------------------
AMZN Total Profit: USD.602297.49
--------------------------------
Total profit is: USD.602297.49
[]
--------------------------------
AMZN Total Profit: USD.333193.62
--------------------------------
Total profit is: USD.333193.62
[]
--------------------------------
AMZN Total Profit: USD.954882

In [None]:
window_size = agent.model.layers[0].input.shape.as_list()[1]

data = pd.read_csv("Test.csv").values.tolist()

l = len(data) - 1

batch_size = 32

state = getState(data, 0, window_size + 1)

total_profit = 0

agent.inventory = []

for t in range(l):
    
  action = agent.act(state)


    # sit

  next_state = getState(data, t + 1, window_size + 1)

  reward = 0

  if action == 1: # buy
      
    agent.inventory.append(data[t])

  elif action == 2 and len(agent.inventory) > 0: # sell

    bought_price = agent.inventory.pop(0)

    reward = max(data[t][4] - bought_price[4], 0)

    total_profit += data[t][4] - bought_price[4]

  done = True if t == l - 1 else False

  agent.memory.append((state, action, reward, next_state, done))

  state = next_state

  if done:
    print("--------------------------------")
    
    print("AMZN Total Profit: " + formatPrice(total_profit))
    
    print("--------------------------------")
    
    print ("Total profit is:",formatPrice(total_profit))

  if len(agent.memory) > batch_size:
        agent.expReplay(batch_size)

In [None]:
results