In [20]:
### Market Environment Jupyter File

In [15]:
!pip install keras
!pip install tensorflow




[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip




In [16]:
import os 

os.chdir(os.getcwd())
print(os.getcwd())




c:\Users\Nico\Documents\Uni\RL\Trading_Example


In [17]:
import numpy as np 
import pandas as pd 


class Market:
    def __init__(self, window_size, stock_name):
        self.data = self.get_stock_data(stock_name)
        self.states = self.get_all_window_prices_diff(self.data, window_size)
        self.index = -1
        self.last_data_index = len(self.data) - 1 

    def get_stock_data(self, stock_name):
        vec = []
        # creating list of lines - splitted
        lines = open(str(os.getcwd()) + "/data/" + stock_name + ".csv", "r").read().splitlines()
        for line in lines[1:]:
            vec.append(float(line.split(',')[4]))

        return vec

    def get_all_window_prices_diff(self, data, window_size):
        # Preprocsssing data to create list of states 

        processed_data = []

        for t in range(len(data)):
            state = self.get_window(data, t, window_size + 1)
            processed_data.append(state)
        
        return processed_data
    
    def get_window(self, data, t, n):
        # t - index of data
        # n - window size + 1 

        d = t - n + 1 # window block for each day 
        block = data[d:t+1] if d >= 0 else -d * [data[0]] + data[0:t+1]
        res = []
        for i in range(n-1):
            res.append(block[i+1]-block[i])
        
        return np.array([res])

    def reset(self):
        self.index = -1 
        return self.states[0], self.data[0]
    
    def get_next_state_reward(self, action, bought_price=None):
        # bought price only require for action = selling 

        self.index += 1 
        if self.index > self.last_data_index: # only checking 
            self.index = 0

        next_state = self.states[self.index +1]
        next_price_data = self.data[self.index + 1]

        price_data = self.data[self.index]
        reward = 0 
        # in case of selling - 2 // 0 holding // 1 buying 
        if action == 2 and bought_price is not None:
            reward = max(price_data - bought_price, 0) # just positive rewards 

        done = True if self.index == self.last_data_index - 1 else False 

        return next_state, next_price_data, reward, done 
    



In [18]:
### state == current batch of state (i.e. per day )

In [19]:
market_1 = Market(3, "GSPC")
n = 8

market_1.states
# current price plus two previous ones

#market_1.get_all_window_prices_diff(market_1.data, 2)

[array([[0., 0., 0.]]),
 array([[ 0.      ,  0.      , 64.290039]]),
 array([[  0.      ,  64.290039, -14.220093]]),
 array([[ 64.290039, -14.220093, -34.98999 ]]),
 array([[-14.220093, -34.98999 ,  -2.489991]]),
 array([[-34.98999 ,  -2.489991,   4.940064]]),
 array([[-2.489991,  4.940064, 12.469971]]),
 array([[ 4.940064, 12.469971, 13.549926]]),
 array([[12.469971, 13.549926, -8.269897]]),
 array([[13.549926, -8.269897,  8.099975]]),
 array([[-8.269897,  8.099975,  2.819947]]),
 array([[ 8.099975,  2.819947, 18.5     ]]),
 array([[ 2.819947, 18.5     , -5.429932]]),
 array([[18.5     , -5.429932,  0.359985]]),
 array([[-5.429932,  0.359985, 17.5     ]]),
 array([[ 0.359985, 17.5     ,  3.900025]]),
 array([[17.5     ,  3.900025, -6.790039]]),
 array([[ 3.900025, -6.790039, -2.560059]]),
 array([[-6.790039, -2.560059,  9.220093]]),
 array([[-2.560059,  9.220093,  9.559936]]),
 array([[ 9.220093,  9.559936, -7.71997 ]]),
 array([[ 9.559936, -7.71997 ,  7.459961]]),
 array([[ -7.71997 

In [40]:
from keras.models import Sequential 
from keras.models import load_model
from keras.layers import Dense
from keras.optimizers import Adam

import numpy as np
import random 
from collections import deque 


class Agent:
    def __init__(self, state_size, is_eval=False, model_name = ""):
        self.__inventory = []
        self.__total_profit = 0
        self.action_history = []

        self.state_size = state_size
        self.action_size = 3 # obvious 
        self.memory = deque(maxlen=1000)
        self.model_name = model_name
        self.is_eval = is_eval

        self.gamma = 0.95 
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995 # to geometrically decrease epislon until min value is reached
        

        self.model = load_model(os.getcwd() + "/models/" + model_name) if is_eval else self.create_model()

    
    def create_model(self):
        model = Sequential()
        model.add(Dense(units = 32, input_dim = self.state_size, activation = "relu" ))
        model.add(Dense(units = 8,  activation = "relu" ))
        ## output - mappping to 3 (action size)
        model.add(Dense(units = self.action_size,  activation = "linear" ))
        model.compile (loss = "mse", optimizer = Adam(lr = 0.001))

        return model 
    
    def reset(self):
        # to reset agent -> clearing inventory, clearing profit
         
        self.__inventory = []
        self.__total_profit = 0
        self.action_history = []
        

    # developing exploitation/exploration during training
    def act(self, state, price_data):
        if not self.is_eval and np.random.rand() <= self.epsilon:
            action = random.randrange(self.action_size)
        else:
            actions = self.model.predict(state)
            action = np.argmax(actions[0])
        
        bought_price = None
        if action == 0: # keep
            print(".", end = '', flush = True)
            self.action_history.append(action)
        elif action == 1: # buying 
            self.buy(price_data)
            self.action_history.append(action)
        elif action == 2 and len(self.__inventory) > 0:
            self.sell
            bought_price = self.sell(price_data)
            self.action_history.append(action)
        else: # if no stocks held, cant sell!
            self.action_history.append(0)
        
        return action, bought_price 
    
    def buy(self, price_data):
        self.__inventory.append(price_data)
        print(f"Buy: {str(self.format_price(price_data))}")

    def sell(self, price_data):
        bought_price = self.__inventory.pop(0)
        self.__total_profit += price_data - bought_price
        profit = price_data - bought_price
        print(f"Selling {str(self.format_price(price_data))} with profit {str(self.format_price(profit))}")
        return bought_price
    
    def has__invetory(self):
        return len(self.__inventory) > 0
    
    def format_price(self, price):
        output = str(f"{str(price)}$")
        return output

    def get_total_profit(self):
        return self.format_price(self.__total_profit)
    

    ## experience replay 

    def experience_replay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        
        for i in range(l-batch_size + 1, l):
            mini_batch.append(self.memory[i])

        for state, action, reward, next_state, done in mini_batch:
            if done:
                target = reward 
            else:
                # Q-value 
                next_q_values = self.model.predict(next_state)[0]
                target = reward + self.gamma * np.argmax(next_q_values)
            
            predicted_target = self.model.predict(state)
            predicted_target[0][action] = target
            self.model.fit(state, predicted_target, epochs = 1, verbose = 0)  ## called again ang again, hence only epochs = 1 

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay





In [31]:
### Training procedure 


import os 
import time 

def trainer():
    window_size = 5 # hence last 5 days as model input 
    episode_count = 10
    stock_name = "GSPC"
    batch_size = 32

    agent = Agent(window_size)
    market = Market(window_size = window_size, stock_name= stock_name)

    start_time = time.time()

    for e in range(episode_count+1):
        print(f"Current episode: {str(e)}")
        agent.reset()
        state, price_data = market.reset()

        for t in range(market.last_data_index):

            # letting the agent act // at times randomly 
            action, bought_price = agent.act(state, price_data)

            
            next_state, next_price_data, reward, done = market.get_next_state_reward(action, bought_price)
            agent.memory.append((state, action, reward, next_state, done))
        
            if len(agent.memory) > batch_size:
                agent.experience_replay(batch_size)
            
        
            state = next_state
            price_data = next_price_data
        
            if done:
                print("---------------------------------------")
                print(f"Total profit: {str(agent.get_total_profit())}")


        if e % 10 == 0:
            agent.model.save(f"{os.getcwd()}/models/model_episode{str(e)}") 


    end_time = time.time()
    training_time = end_time - start_time
    print(f"Training Time in Seconds: {str(training_time)}")




In [41]:
## calling trainer

trainer()



Current episode: 0
.Buy: 1333.339966$
Selling 1298.349976$ with profit -34.989990000000034$
.Buy: 1300.800049$
.Buy: 1326.819946$
.Selling 1326.650024$ with profit 25.849975000000086$
Selling 1329.469971$ with profit 2.650024999999914$
.Buy: 1342.540039$
Selling 1342.900024$ with profit 0.3599850000000515$
..Buy: 1354.949951$
Buy: 1364.170044$
Selling 1373.72998$ with profit 18.780029000000013$
..Selling 1349.469971$ with profit -14.700072999999975$
.....Buy: 1326.609985$
Selling 1278.939941$ with profit -47.67004399999996$
