In [1]:
import gym
from gym import spaces
import pandas as pd
import numpy as np
import random

In [4]:
df = pd.read_csv("btc_6H_C.csv")
df.head(5)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,MACD,MACD_Sig,MACD_status,3D_return,RSI,EMA_9,EMA_21,EMA_50,EMA_status,RSI_status,3D_return_norm,combiend_indicators
0,2015-01-14 00:00:00,227.01,230.89,213.32,213.42,6823.048857,1495162.0,219.366758,-17.876382,-12.636517,0.5,-0.229531,12.838542,240.5213,259.412635,302.708863,0.5,0.5,0.3,0.5
1,2015-01-14 06:00:00,213.32,215.0,152.4,190.65,55956.799331,10777880.0,196.250499,-21.027478,-14.314709,0.5,-0.315292,10.218676,230.54704,253.161486,300.467686,0.5,0.5,0.24,0.5
2,2015-01-14 12:00:00,190.62,208.11,175.71,176.35,32111.532855,6133616.0,191.489716,-24.397396,-16.331247,0.5,-0.349502,10.847085,219.707632,246.178624,297.985332,0.5,0.5,0.22,0.5
3,2015-01-14 18:00:00,176.0,186.58,161.1,171.41,29297.50404,5169611.0,177.395178,-27.153687,-18.495735,0.5,-0.356545,14.446886,210.048106,239.381476,295.453825,0.5,0.5,0.21,0.5
4,2015-01-15 00:00:00,172.0,205.56,168.5,193.08,17490.65337,3298064.0,190.755609,-27.275069,-20.251602,0.5,-0.288001,27.0672,206.654485,235.172251,293.406349,0.5,0.5,0.26,0.5


In [3]:
df["3D_return"].describe()

count    8713.000000
mean        0.008686
std         0.066787
min        -0.389930
25%        -0.020907
50%         0.006184
75%         0.038461
max         0.496291
Name: 3D_return, dtype: float64

In [9]:
random.uniform(0.2,0.5)

0.45387441190995004

In [4]:
class trading_env(gym.Env):
    """Single Stock Trading Environment"""
    def __init__(self,df, init_capital=10000):
        #instance attributes
        self.df = df
        self.initial_capital = init_capital
        self.current_step = None
        #Porfolio Information
        self.no_stocks_bought = None
        self.no_stocks_sold = None
        self.portfolio_value = None
        self.current_stocks_held = None
        self.current_capital = None
        self.avg_cost = None
        self.buy_cost = None
        self.returns = None
        self.max_steps = None
    
        #Values for normalising data
        self.max_stock_price = max(self.df["Close"])
        self.max_volume = max(self.df["Volume_(BTC)"])
        self.max_capital = 1000000
        self.max_no_shares = 10000
    
    
        #state/observation space
        self.action_space = spaces.Box(low=np.array([0,0]),high=np.array([3,1]),dtype=np.float16)
        #Consider Volumne, Close, Return, MACD,RSI, EMA, Porfolio(current_capital,portfolio_value,returns, no_stocks_owned,avg_cost,no_stocks_sold )
        self.observation_space = spaces.Box(low=0.0,high= 1.0,shape=(7,6))
    
    
    def observation(self):
        #-6 the predefined lookback window 
#         env_observations = np.array([self.df.loc[self.current_step-5:self.current_step,"Close"].values/self.max_stock_price,
#                                     self.df.loc[self.current_step-5:self.current_step,"Volume_(BTC)"].values/self.max_volume,
#                                     self.df.loc[self.current_step-5:self.current_step,"MACD_status"].values,
#                                     self.df.loc[self.current_step-5:self.current_step,"RSI_status"].values,
#                                     self.df.loc[self.current_step-5:self.current_step,"EMA_status"].values,
#                                     self.df.loc[self.current_step-5:self.current_step,"3D_return_norm"].values]
#                                    ) #Not required for Q-learning, only using 2 variables, combined_indicators & return_norm
        
#         obs = np.append(env_observations,[[
#             self.current_capital/self.max_capital,
#             self.portfolio_value/self.max_capital,
#             self.returns/self.initial_capital, # not sure how to normalise returns since it can be a negative value
#             self.no_stocks_bought/self.max_no_shares,
#             self.no_stocks_sold/self.max_no_shares,
#             self.avg_cost/self.max_stock_price
#         ]],axis = 0)
        obs = np.array([self.df.loc[self.current_step,"3D_return_norm"], self.df.loc[self.current_step,"MACD_status"],self.df.loc[self.current_step,"RSI_status"],self.df.loc[self.current_step,"EMA_status"]])
        
        return obs
    
    def step(self,a):
        self.action(a)
        self.current_step += 1
        
        if self.current_step > len(self.df.loc[:,"Open"].values):
            self.current_step = 0 # Sanity check ensuring that current step isn't greater than 6 steps ahead
        
        delay = self.current_step/self.max_steps
        
        reward = self.returns * delay
        
        if self.current_step == len(self.df):
            self.done = True
        elif self.portfolio_value == 0:
            self.done = True
        
        obs = self.observation()
        
        return obs,float(reward), self.done
        
    def action(self,a):
        self.amount = 0
        current_price = random.uniform(self.df.loc[self.current_step,"Open"],self.df.loc[self.current_step,"Close"])
        #Buy at the low and sell high
        if self.df.loc[self.current_step,"3D_return"] < -0.19:
            self.amount = random.uniform(0.3,0.5)
        elif (self.df.loc[self.current_step,"3D_return"] > -0.19) & (self.df.loc[self.current_step,"3D_return"]<-0.02):
            self.amount = random.uniform(0.1,0.3)
        elif self.df.loc[self.current_step,"3D_return"] > 0.3:
            self.amount = random.uniform(0.3,0.5)
        elif (self.df.loc[self.current_step,"3D_return"] >0.1) & (self.df.loc[self.current_step,"3D_return"]<0.3):
            self.amount = random.uniform(0.1,0.3)
        
        
        action_taken = a

        
        if action_taken == 2: # Buy
            total_possible = self.current_capital/current_price
            amount_stocks_bought = total_possible * self.amount
            current_cost = amount_stocks_bought * current_price
            self.buy_cost += current_cost
            self.no_stocks_bought += amount_stocks_bought
            self.current_stocks_held += amount_stocks_bought
            self.avg_cost = float(self.buy_cost) / float(self.current_stocks_held)
            self.current_capital -= current_cost
            self.returns = self.avg_cost - current_cost  #attemps to incentivise buying behaviour at prices lower than the average cost
            
        elif action_taken == 0: #Sell
            #can probably do and if else statement to check if there is any stocks bought if not do nothing
            if self.current_stocks_held == 0:
                None
            else:
                shares_sell = self.current_stocks_held * self.amount
                profit = shares_sell * current_price
                self.no_stocks_sold += shares_sell
                self.current_stocks_held -= shares_sell
                self.current_capital += profit
                self.returns = profit - (shares_sell * self.avg_cost)
                self.buy_cost -= shares_sell * self.avg_cost
            
            
        elif action_taken == 1:
            self.returns = (current_price*self.current_stocks_held)-(self.current_stocks_held*self.avg_cost) #holding should only be considered beneficial if current price of all assets > average price of assets, besides that other actions maybe better
            
        if self.current_capital > self.max_capital:
            self.max_capital = self.current_capital
        if self.current_stocks_held <= 0:
            self.avg_cost == 0 
            
    def reset(self):
        self.no_stocks_bought = 0.00000001 #to avoid double scalar problems
        self.no_stocks_sold = 0.0000001   #to avoid double scalar problems
        self.current_stocks_held = 0.000001
        self.portfolio_value = self.initial_capital
        self.current_capital = self.initial_capital
        self.avg_cost = 0
        self.returns = 0 
        self.max_steps = len(self.df)
        self.current_step = 0
        self.buy_cost = 0
        self.done = False
        
        return self.observation()
        
    def render(self):
        current_price = random.uniform(self.df.loc[self.current_step, "Open"],self.df.loc[self.current_step,"Close"])
        self.portfolio_value = self.current_capital + (self.current_stocks_held*current_price)
        return_perc = (self.portfolio_value/self.initial_capital) * 100
        
        print(f"Current Porfolio Value:{self.portfolio_value}; Available Capital: {self.current_capital}; Current Stocks Held: {self.current_stocks_held}")
        print(f"No. Stocks Bought:{self.no_stocks_bought}; No. Stocks Sold:{self.no_stocks_sold}; Average Cost:{self.avg_cost} ")
        print(f"Return:{return_perc}%; {self.portfolio_value-self.initial_capital}")
        print(f"Termination date: {self.df.loc[self.current_step,'Timestamp']}")
        
    def reward_output(self):
        return_value = self.portfolio_value-self.initial_capital
        return_perc = (self.portfolio_value/self.initial_capital) * 100
        return return_perc, return_value, self.no_stocks_bought,self.no_stocks_sold

In [7]:
env = trading_env(df)
env.reset()
env.render()


Current Porfolio Value:10000.0; Available Capital: 10000; Current Stocks Held: 0
No. Stocks Bought:0; No. Stocks Sold:0; Average Cost:0 
Returns:0
0


In [15]:
env.step(1)

(array([0.43, 0.5 ]), 0.0, False, {})