In [1]:
import gym
from gym import spaces
import pandas as pd
import numpy as np
import random

In [2]:
df = pd.read_csv("btc_6H.csv")
df.head(5)

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,MACD,MACD_Sig,MACD_status,3D_return,RSI,EMA_9,EMA_21,EMA_50,EMA_status,RSI_status,3D_return_norm
0,2015-01-14 00:00:00,227.01,230.89,213.32,213.42,6823.048857,1495162.0,219.366758,-17.876382,-12.636517,0.5,-0.229531,12.838542,240.5213,259.412635,302.708863,0.5,0.5,0.3
1,2015-01-14 06:00:00,213.32,215.0,152.4,190.65,55956.799331,10777880.0,196.250499,-21.027478,-14.314709,0.5,-0.315292,10.218676,230.54704,253.161486,300.467686,0.5,0.5,0.24
2,2015-01-14 12:00:00,190.62,208.11,175.71,176.35,32111.532855,6133616.0,191.489716,-24.397396,-16.331247,0.5,-0.349502,10.847085,219.707632,246.178624,297.985332,0.5,0.5,0.22
3,2015-01-14 18:00:00,176.0,186.58,161.1,171.41,29297.50404,5169611.0,177.395178,-27.153687,-18.495735,0.5,-0.356545,14.446886,210.048106,239.381476,295.453825,0.5,0.5,0.21
4,2015-01-15 00:00:00,172.0,205.56,168.5,193.08,17490.65337,3298064.0,190.755609,-27.275069,-20.251602,0.5,-0.288001,27.0672,206.654485,235.172251,293.406349,0.5,0.5,0.26


In [None]:
class trading_env(gym.Env):
    """Single Stock Trading Environment"""
    def __init__(self,df, init_capital=10000)
    
    #instance attributes
    self.df = df
    self.init_capital = init_capital
    self.current_step = None
    #Porfolio Information
    self.no_stocks_bought = None
    self.no_stocks_sold = None
    self.portfolio_value = None
    self.current_capital = None
    self.avg_cost = None
    self.returns = None
    
    #Values for normalising data
    self.max_stock_price = max(self.df["Close"])
    self.max_volume = max(self.df["Volume_(BTC)"])
    self.max_capital = 1000000
    self.max_no_shares = 10000
    
    
    #state/observation space
    self.action_space = spaces.Box(low=np.array([0,0]),high=np.array([3,1]),dtype=np.float16)
    #Consider Volumne, Close, Return, MACD,RSI, EMA, Porfolio(current_capital,portfolio_value,returns, no_stocks_owned,avg_cost,no_stocks_sold )
    self.observation_space = spaces.Box(low=0.0,high= 1.0,shape=(7,6))
    
    def reset(self):
        self.no_stocks_bought = 0
        self.no_stocks_sold = 0
        self.portfolio_value = 0
        self.current_capital = self.init_capital
        self.avg_cost = 0
        self.returns = 0 
        
        self.current_step = random.randint(0,len(self.df.loc["Open"].values)-6) # 6 to account for lookback
        
        return self.observation()
    
    def observation(self):
        env_observations = np.array([self.df.loc[self.current_step:self.current_step+6,"Close"].values/self.max_stock_price]
                                    [self.df.loc[self.current_step:self.current_step+6,"Volume_(BTC)"].values/self.max_volume]
                                    [self.df.loc[self.current_step:self.current_step+6,"MACD_status"].values]
                                    [self.df.loc[self.current_step:self.current_step+6,"RSI_status"].values]
                                    [self.df.loc[self.current_step:self.current_step+6,"EMA_status"].values]
                                    [self.df.loc[self.current_step:self.current_step+6,"3D_return_norm"].values]
                                   )
        
        obs = np.append(env_observations,[[
            self.current_capital/self.max_capital
            self.portfolio_value/self.max_capital
            self.returns/self.max_capital # not sure how to normalise returns since it can be a negative value
            self.no_stocks_bought/self.max_no_shares
            self.no_stocks_sold/self.max_no_shares
            self.avg_cost/self.max_stock_price
        ]],axis = 0)
        
    def step(self):
        self.action(action)
        self.current_step += 1
        
        if self.current_step>