In [8]:
import os
from os import path
import pandas as pd
import numpy as np
import collections
import time
import random
from matplotlib import pyplot as plt

# Import Tensorflow libraries

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.optimizers import Adam

from tensorflow.keras.callbacks import TensorBoard
tensorboard = TensorBoard(log_dir="./logs")

###### Tensorflow-GPU ########
os.environ["KERAS_BACKEND"] = "tensorflow"
os.environ["TF_ENABLE_GPU_GARBAGE_COLLECTION"] = 'false'
physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [9]:
### some variables
datapath = "/home/joren/Coding/cryptodata/normalized/"

REPLAY_MEMORY_CAPACITY = 256
EPSILON = 0.9
DECAY = 0.9
MIN_EPSILON = 0.1
DISCOUNT = 1
UPDATE_TARGET_INTERVAL = 10
POSSIBLE_ACTIONS = [0, 1, 2] #BUY HOLD SELL
BATCH_SIZE = 10

input_size = 120 # amount of candles given to the model

In [10]:
### Loading Data
dataset_train = [pd.read_csv(path.join(datapath, filename), index_col=0) for filename in os.listdir(datapath)[:1]]
print(dataset_train[0])

                  open     close      high       low    volume  \
event_time                                                       
1.567742e+12  0.292194 -0.077700 -0.183333  0.250000 -0.540235   
1.567743e+12 -0.035205  0.030142  0.026837  0.029222  0.119566   
1.567743e+12  0.079564  0.041996  0.002087  0.047717 -0.301701   
1.567743e+12 -0.026900 -0.019596 -0.018148 -0.000515 -0.787394   
1.567743e+12  0.017264 -0.034206 -0.000202 -0.033608  1.114660   
...                ...       ...       ...       ...       ...   
1.632852e+12  0.000000 -0.003256 -0.000407 -0.002850  2.420520   
1.632852e+12 -0.002850  0.000817 -0.000814  0.000000 -0.852874   
1.632852e+12  0.000000 -0.002856 -0.001222 -0.002042  0.877959   
1.632852e+12 -0.002042  0.000818  0.000408 -0.000818  6.948060   
1.632852e+12  0.000818 -0.000818 -0.001631  0.000819 -0.905006   

              differencelowhigh  differenceopenclose  maxprofitclose  \
event_time                                                           

In [11]:
X_train = []

for dataset in dataset_train:  
    X_train= [*X_train, *dataset.drop(columns=["maxprofitclose", "maxprofitlowhigh"]).values.tolist()]

X_train = np.array(X_train)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1]))

print(X_train[1])

[-0.0352047  0.030142   0.0268367  0.0292222  0.119566   0.0863651
  0.0256936]


In [27]:
### lstm agent class
class DQLSTMAgent:

    def __init__(self, replayCapacity, inputShape):
        ## Initialize replay memory
        self.capacity = replayCapacity
        self.memory = collections.deque(maxlen=self.capacity)
        self.populated = False
        ## Policiy model
        self.policy_model = self.buildNetwork()

        ## Target model
        self.target_model = self.buildNetwork()
        self.target_model.set_weights(self.policy_model.get_weights())

    def addToReplayMemory(self, step):
        self.step = step
        self.memory.append(self.step)

    def sampleFromReplayMemory(self, batchSize):
        self.batchSize = batchSize
        if self.batchSize > len(self.memory):
            self.populated = False
            return self.populated
        else:
            return random.sample(self.memory, self.batchSize)

    def buildNetwork(self):
        model = Sequential()

        model.add(LSTM(units = 256, return_sequences = True, input_shape = (X_train.shape[1], input_size)))
        model.add(Dropout(0.2))

        model.add(LSTM(units = 256, return_sequences = True))
        model.add(Dropout(0.2))

        model.add(LSTM(units = 128, return_sequences = True))
        model.add(Dropout(0.2))

        model.add(LSTM(units = 64))
        model.add(Dropout(0.2))

        model.add(Dense(units = 3)) # BUY HOLD SELL

        model.compile(loss='mse', optimizer=Adam(lr = 0.001), metrics=['MeanSquaredError'])
        return model

    def policy_network_fit(self,batch, batchSize):
        self.batchSize = batchSize
        self.batch = batch

    def policy_network_predict(self, state):
        self.state = state
        self.qPolicy = self.policy_model.predict(self.state)
        return self.qPolicy

    def target_network_predict(self, state):
        self.state = state
        self.qTarget = self.target_model.predict(self.state)
        return self.qTarget

    def update_target_network(self):
        self.target_model.set_weights(self.policy_model.get_weights())


In [28]:
agent = DQLSTMAgent(replayCapacity= REPLAY_MEMORY_CAPACITY, inputShape= X_train.shape)

  super(Adam, self).__init__(name, **kwargs)


In [32]:
def calculateStep(action):
    global position, positionPrice, profit, episodeReward, state

    NewState = X_train[candle+1-input_size:candle+1]

    if not position:
        if action == 0:
            position = True
            positionPrice = state[-1][1]

        return NewState, 0

    
    elif action == 1:
        return NewState, 0
    elif position and action == 2:
        position = False
        currentClose = state[-1][1]
        profit += currentClose / positionPrice
        reward = profit / state[-1][-1]

        return NewState, reward
    
    else:
        return NewState, 0
    

In [43]:
# Fill the replay memory with the first batch of samples

rewardHistory = []

for episode in range(1000):
    position = False
    positionPrice = 0
    profit = 0
    episodeReward = 0
    stepCounter = 0  # count the number of successful steps within the episode

    
    print(X_train.shape)
    for candle in range(input_size, X_train.shape[0]):
        state = X_train[candle-input_size:candle]
        state = np.reshape(state, (1, 7, state.shape[0]))

        r = random.random()

        if r <= EPSILON:
            action = random.sample(POSSIBLE_ACTIONS, 1)[0]
        else:
            qValues = agent.policy_network_predict(state) # insert correct data here
            action = np.argmax(qValues[0])

        # print(action)

        newState, reward = calculateStep(action) # calculate correct reward here
        print(reward)

        stepCounter +=1
        
        # store step in replay memory
        step = (state, action, reward, newState)
        agent.addToReplayMemory(step)
        # state = newState
        episodeReward += reward
        #print('episodeReward = ',episodeReward)

        # When enough steps in replay memory -> train policy network
        if len(agent.memory) >= (BATCH_SIZE):
            EPSILON = DECAY * EPSILON
            if EPSILON < MIN_EPSILON:
                EPSILON = MIN_EPSILON

            # sample minibatch from replay memory
            miniBatch = agent.sampleFromReplayMemory(BATCH_SIZE)
            print(np.asarray(list(zip(*miniBatch)), dtype=object)[0])
            miniBatch_states = np.asarray(list(zip(*miniBatch))[0],dtype=float)
            miniBatch_actions = np.asarray(list(zip(*miniBatch))[1], dtype = int)
            miniBatch_rewards = np.asarray(list(zip(*miniBatch))[2], dtype = float)
            miniBatch_next_state = np.asarray(list(zip(*miniBatch))[3],dtype=float)

            current_state_q_values = agent.policy_network_predict(miniBatch_states)
            y = current_state_q_values
            #print(y.shape)
            #miniBatch_next_state = np.squeeze(miniBatch_next_state, axis =1)

            next_state_q_values = agent.target_network_predict(miniBatch_next_state)
            #print(next_state_q_values.shape)
            max_q_next_state = np.max(next_state_q_values,axis=1)

            for i in range(BATCH_SIZE):
                y[i,miniBatch_actions[i]] = miniBatch_rewards[i] + DISCOUNT *  max_q_next_state[i]

            agent.policy_model.fit(miniBatch_states, y, batch_size=BATCH_SIZE, verbose = 0, callbacks = [tensorboard])
            #print(y)

        else:
            continue
        if updateCounter == UPDATE_TARGET_INTERVAL:
            agent.update_target_network()
            print('target updated')
            updateCounter = 0
        updateCounter += 1
    print('episodeReward for episode ', episode, '= ', episodeReward, 'with epsilon = ', EPSILON)
    rewardHistory.append(episodeReward)

plt.plot(rewardHistory)
plt.show()


#actions = agent.policy_network_predict(state)

#action = np.argmax(actions)
#print(action)

#state, reward, done, info = env.step(action)
#print(reward)




(1082773, 7)
0
[array([[[ 1.72644e-02, -3.42056e-02, -2.02000e-04, -3.36082e-02,
           1.11466e+00,  5.60060e-02, -5.30357e-02, -4.64693e-02,
          -1.96288e-02, -4.64693e-02, -2.99765e-02,  8.58487e-01,
           3.80512e-02, -2.63799e-02, -2.74393e-02, -1.08814e-02,
           5.29717e-04, -3.29924e-04, -4.25876e-01,  3.89439e-02,
          -9.80392e-03, -1.93900e-02, -5.00550e-02, -3.94960e-02,
          -5.00550e-02,  1.23093e+00,  5.04922e-02, -4.07687e-02,
          -4.64341e-02, -5.39664e-02, -4.81755e-02, -7.00637e-02,
           5.37665e-01,  7.52179e-02, -4.83458e-02, -4.82293e-02,
           2.82776e-02, -2.03845e-02,  1.74346e-02, -8.10507e-01,
           3.52509e-02,  2.81518e-02,  2.81518e-02,  2.03571e-02,
           1.34784e-02,  2.52142e-02,  2.55839e-01,  2.34002e-02,
           2.03571e-02,  2.29762e-02,  2.03010e-02,  3.82641e-02,
           2.59074e-02,  1.49530e+00,  3.57268e-02,  1.76888e-02,
           3.14209e-03, -7.89022e-03, -3.48315e-03,  3.14209e

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).

In [None]:
##### TESTING OF MODEL #####
dataset_test = pd.read_csv(path.join(datapath, 'ADABNB.csv'))
real_stock_price = dataset_test.iloc[:, 1:2].values

dataset_total = pd.concat((dataset_train['Open'], dataset_test['Open']), axis = 0)
inputs = dataset_total[len(dataset_total) - len(dataset_test) - 60:].values
inputs = inputs.reshape(-1,1)
# inputs = sc.transform(inputs)
X_test = []
for i in range(60, 76):
    X_test.append(inputs[i-60:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
predicted_stock_price = agent.target_model.predict(X_test)
# predicted_stock_price = sc.inverse_transform(predicted_stock_price)