In [None]:
import gym
import gym_anytrading
import matplotlib.pyplot as plt
import pandas as pd
import os, time, random
import numpy as np
import datetime

from collections import deque
import tensorflow as tf
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv1D, Dropout
from tensorflow.keras.optimizers import Adam

from tensorflow.python.framework.ops import disable_eager_execution
disable_eager_execution()

physical_devices = tf.config.experimental.list_physical_devices('GPU')
tf.config.experimental.set_memory_growth(physical_devices[0], True)

In [11]:
columnNames = ["Symbol", "Timestamp", "Open", "High", "Low", "Close", "Volume"]
data_len = 7 # slice of data in in days (1 min interval) (1 week is 2100 datapoints)

df = pd.read_csv('./Data/Prices/AAPL.csv')
df.columns = columnNames
df["Timestamp"] = pd.to_datetime(df["Timestamp"])
df = df[(df["Timestamp"] >= df["Timestamp"].max()-datetime.timedelta(days=data_len)) & (df["Timestamp"] <= df["Timestamp"].max())]

window_size = 60
start_index = window_size
end_index = len(df)

env = gym.make('stocks-v0', df = df, window_size = window_size, frame_bound = (start_index, end_index))
print(f"Actions: {env.action_space.n}, Observation space: {env.observation_space.shape[0]*env.observation_space.shape[1]}")
print(env.observation_space.shape)


Actions: 2, Observation space: 120
(60, 2)


In [54]:
class DQSN:
    def __init__(self, epsilon, gamma, epsilon_min, lr, epsilon_decay, batch_size = 60, copy_interval = 1,  action_space = env.action_space.n, state_space = env.observation_space.shape[0]*env.observation_space.shape[1], sarsa = False):
        self.epsilon = epsilon
        self.gamma = gamma
        self.batch_size = batch_size
        self.epsilon_min = epsilon_min
        self.lr = lr
        self.epsilon_decay = epsilon_decay
        self.memory = deque(maxlen=50000)
        self.copy_interval = copy_interval
        self.fit_counter = 0
        self.action_space = action_space
        self.state_space = state_space
        self.model = self.build_model()
        self.target_model = self.build_model()
        self.sarsa = sarsa
        if self.sarsa:
            self.model_path = f'./Models/dsn.h5'
        else:
            self.model_path = f'./Models/dqn.h5'

    def build_model(self):
        model = Sequential()
        model.add(Dense(24, input_dim=self.state_space, activation='relu'))
        model.add(Dense(24, activation='relu'))
        model.add(Dense(self.action_space, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=self.lr))
        return model

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_space)
        print(state.shape)
        print(state[0], state[119])
        print(state)
        act_values = self.target_model.predict(state)
        return np.argmax(act_values[0])

    def replay(self):
        if len(self.memory) < self.batch_size:
            return

        minibatch = random.sample(self.memory, self.batch_size)
        states = np.array([i[0] for i in minibatch])
        actions = np.array([i[1] for i in minibatch])
        rewards = np.array([i[2] for i in minibatch])
        next_states = np.array([i[3] for i in minibatch])
        dones = np.array([i[4] for i in minibatch])

        states = np.squeeze(states)
        next_states = np.squeeze(next_states)

        if self.sarsa:
            targets = rewards + self.gamma * (self.model.predict_on_batch(next_states).shape[0]) * (1-dones)
        else:
            targets = rewards + self.gamma * (np.amax(self.target_model.predict_on_batch(next_states), axis=1))*(1-dones)
        targets_full = self.model.predict_on_batch(states)

        ind = np.array([i for i in range(self.batch_size)])
        targets_full[[ind], [actions]] = targets

        self.model.fit(states, targets_full, epochs=1, verbose=0)
        self.fit_counter += 1
        
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
        if self.fit_counter % self.copy_interval == 0:
            self.target_model.set_weights(self.model.get_weights())
            # print("  Target network updated")

    def save_model(self):
        self.model.save(self.model_path)
        #print('model saved')
        
    def load_model(self):
        if os.path.isfile(self.model_path):            
            self.model.load_weights(self.model_path)
            self.target_model.load_weights(self.model_path)
            print('model loaded')

In [55]:
def train_dqsn(episodes, sarsa, render):
    rewardlist = []
    agent = DQSN(epsilon, gamma, epsilon_min, learning_rate, epsilon_decay, sarsa = sarsa)
    agent.load_model()
    
    for e in range(episodes):
        state = env.reset()
        done = False
        state = state.flatten()                    # reshape naar aantal states variabelen
        score = 0
        i = 0
        
        while not done:
            action = agent.act(state)
            if render:
                env.render()
            next_state, reward, done, _ = env.step(action)
            next_state = next_state.flatten()       # reshape naar aantal states variabelen
                        
            score += reward
            
            agent.remember(state, action, reward, next_state, done)
            state = next_state
            agent.replay()
            i += 1
            
        if e % 10 == 0:
            agent.save_model()
            
        rewardlist.append(score)
        
    return loss

In [56]:
if __name__ == '__main__':
    gamma = .95
    learning_rate = 0.01
    epsilon = 1
    epsilon_min = 0.01
    epsilon_decay = .995
    SARSA = False
    render = False
    
    try:
        rewardlist = train_dqsn(1000, SARSA, render)
    except KeyboardInterrupt as e:
        env.close()

(120,)
127.4 0.04549999999998988
[ 1.274000e+02  2.000000e-02  1.274446e+02  4.460000e-02  1.276250e+02
  1.804000e-01  1.277038e+02  7.880000e-02  1.277400e+02  3.620000e-02
  1.277250e+02 -1.500000e-02  1.277100e+02 -1.500000e-02  1.277535e+02
  4.350000e-02  1.277138e+02 -3.970000e-02  1.277900e+02  7.620000e-02
  1.277538e+02 -3.620000e-02  1.277950e+02  4.120000e-02  1.277900e+02
 -5.000000e-03  1.277993e+02  9.300000e-03  1.276700e+02 -1.293000e-01
  1.277020e+02  3.200000e-02  1.277237e+02  2.170000e-02  1.278600e+02
  1.363000e-01  1.279000e+02  4.000000e-02  1.279210e+02  2.100000e-02
  1.279200e+02 -1.000000e-03  1.278211e+02 -9.890000e-02  1.277201e+02
 -1.010000e-01  1.275900e+02 -1.301000e-01  1.276260e+02  3.600000e-02
  1.276350e+02  9.000000e-03  1.276050e+02 -3.000000e-02  1.275992e+02
 -5.800000e-03  1.275910e+02 -8.200000e-03  1.276250e+02  3.400000e-02
  1.276300e+02  5.000000e-03  1.275900e+02 -4.000000e-02  1.274748e+02
 -1.152000e-01  1.275800e+02  1.052000e-01  

ValueError: Error when checking input: expected dense_105_input to have shape (120,) but got array with shape (1,)