In [1]:
import sys,os,math,random
import pandas as pd
import tensorflow as tf
import numpy as np
from collections import deque

#set GPU Device
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [2]:
class Agent:
    def __init__(self, state_size, is_eval=False, model_name=""):
        self.state_size = state_size # normalized previous days
        self.action_size = 3 # sit, buy, sell
        self.memory = deque(maxlen=1000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval

        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995

        self.model = load_model("models/" + model_name) if is_eval else self._model()

    def _model(self):

        Input = tf.keras.layers.Input(shape=(self.state_size,1),name='Input')
        lstm1 = tf.keras.layers.LSTM(32, activation='relu', return_sequences=True, name='lstm1')(Input)
        lstm2 = tf.keras.layers.LSTM(16, activation='relu', return_sequences=True, name='lstm2')(lstm1)
        y = tf.keras.layers.Dense(self.action_size,name='y')(lstm2)
        model = tf.keras.models.Model(Input, y)
        optimizer = tf.keras.optimizers.Adam(lr=0.0050)
        model.compile(optimizer=optimizer,loss='mse')# print summary
        model.summary()
        return model


    def act(self, state):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)

        options = self.model.predict(state)
        print("predicted action from the model is ===>",np.argmax(options[0]))
        return np.argmax(options[0])

    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size + 1, l):
            mini_batch.append(self.memory[i])

        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.model.predict(np.reshape(next_state,     (next_state.shape[0],next_state.shape[1],1))))

            target_f = self.model.predict(np.reshape(state,(state.shape[0],state.shape[1],1)))
            self.model.fit(np.reshape(state,(state.shape[0],state.shape[1],1)), target_f, epochs=1, verbose=0)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay 

In [3]:
#functions

# prints formatted price
def formatPrice(n):
    return ("-$" if n < 0 else "$") + "{0:.2f}".format(abs(n))

# returns the vector containing stock data from a fixed file
def getStockDataVec(key):
    vec = []
    lines = open("./data/" + key + ".csv", "r").read().splitlines()

    for line in lines[1:]:
        vec.append(float(line.split(",")[4]))

    return (vec)

# returns the sigmoid
def sigmoid(x):
    return 1 / (1 + math.exp(-x))

# returns an an n-day state representation ending at time t
def getState(data, t, n):
    d = t - n + 1
    block = data[d:t + 1] if d >= 0 else -d * [data[0]] + data[0:t + 1] # pad with t0
    res = []
    for i in range(n - 1):
        res.append(sigmoid(block[i + 1] - block[i]))

    return np.array([res])


In [11]:
#Train.py

main_df=pd.DataFrame()
empty_list=[]

stock_name, window_size, episode_count = "^GSPC",10,1

agent = Agent(window_size)
data = getStockDataVec(stock_name)
print(type(data))

l = len(data) -1
batch_size = 32

for e in range(episode_count + 1):
    print( "Episode " + str(e) + "/" + str(episode_count))
    state = getState(data, 0, window_size + 1)

    total_profit = 0
    agent.inventory = []


    for t in range(l):
        state=np.reshape(state,(state.shape[0],state.shape[1],1))
        action = agent.act(state)

        # sit
        next_state = getState(data, t + 1, window_size + 1)
        reward = 0

        if action == 1: # buy
            agent.inventory.append(data[t])
            print( "Buy: " + formatPrice(data[t]))

        elif action == 2 and len(agent.inventory) > 0: # sell
            bought_price = agent.inventory.pop(0)
            reward = max(data[t] - bought_price, 0)
            total_profit += data[t] - bought_price
            empty_list.append({'Buy':bought_price,'Sell':data[t],'Profit':data[t] - bought_price})
            print( "Sell: " + formatPrice(data[t]) + " | Profit: " + formatPrice(data[t] - bought_price))

        done = True if t == l - 1 else False
        agent.memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            df1 = pd.DataFrame(empty_list, columns=['Buy','Sell','Profit'])
            path='./output/episode'+str(e)+'.csv'
            df1.to_csv(path)
            main_df=main_df.append(df1)
            print( "--------------------------------")
            print( "Total Profit: " + formatPrice(total_profit))
            print( "--------------------------------")

        if len(agent.memory) > batch_size:
            agent.expReplay(batch_size)

    if e % 10 == 0:
        agent.model.save("./models/model_LSTM_epi" + str(e))





Model: "model_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
Input (InputLayer)           [(None, 10, 1)]           0         
_________________________________________________________________
lstm1 (LSTM)                 (None, 10, 32)            4352      
_________________________________________________________________
lstm2 (LSTM)                 (None, 10, 64)            24832     
_________________________________________________________________
lstm3 (LSTM)                 (None, 10, 128)           98816     
_________________________________________________________________
y (Dense)                    (None, 10, 1)             129       
Total params: 128,129
Trainable params: 128,129
Non-trainable params: 0
_________________________________________________________________
<class 'list'>
Episode 0/1
Buy: $1295.86
Buy: $1313.27
Buy: $1326.82
Buy: $1318.55
Sell: $1342.54 | Profit: $46.68
Buy:

InvalidArgumentError:  ValueError: TypeError: len() of unsized object
Traceback (most recent call last):

  File "cupy/core/core.pyx", line 1151, in cupy.core.core.ndarray.__len__

TypeError: len() of unsized object


Traceback (most recent call last):

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/ops/script_ops.py", line 234, in __call__
    return func(device, token, args)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/ops/script_ops.py", line 135, in __call__
    for (x, dtype) in zip(ret, self._out_dtypes)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/ops/script_ops.py", line 135, in <listcomp>
    for (x, dtype) in zip(ret, self._out_dtypes)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/ops/script_ops.py", line 112, in _convert
    return ops.convert_to_tensor(value, dtype=dtype)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/framework/ops.py", line 1314, in convert_to_tensor
    ret = conversion_func(value, dtype=dtype, name=name, as_ref=as_ref)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py", line 317, in _constant_tensor_conversion_function
    return constant(v, dtype=dtype, name=name)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py", line 258, in constant
    allow_broadcast=True)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py", line 266, in _constant_impl
    t = convert_to_eager_tensor(value, ctx, dtype)

  File "/conda/envs/data-science-stack-2.3.0/lib/python3.7/site-packages/tensorflow_core/python/framework/constant_op.py", line 96, in convert_to_eager_tensor
    return ops.EagerTensor(value, ctx.device_name, dtype)

ValueError: TypeError: len() of unsized object
Traceback (most recent call last):

  File "cupy/core/core.pyx", line 1151, in cupy.core.core.ndarray.__len__

TypeError: len() of unsized object




	 [[{{node EagerPyFunc}}]]
	 [[IteratorGetNext]] [Op:__inference_distributed_function_5417]

Function call stack:
distributed_function


In [12]:
print(state.shape)


(1, 10)
