In [1]:
import random
import datetime
import numpy
import pandas as pd
import torch
import gym
from gym import error, spaces, utils
from gym.utils import seeding

dtype = torch.float32
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("device:", device)

class FXEnvBase:
    def __init__(self, frame:str = "M5", isTraining:bool = True):
        rates = pd.read_csv('/mnt/landisk/data/fx/NextBoaderPossibility/fx_USDJPY_5_2020-08-03T23-05-00_to_2021-12-04T07-50-00.csv', header=0, index_col=0, parse_dates=True)
        self.rowdata = rates.copy()
        diff_array = rates.diff()
        self.data = pd.DataFrame(diff_array, columns=['time', 'open', 'high', 'low', 'close', 'tick_volume', 'spread', 'real_volume'])
        self.data = self.data.drop(columns=['time', 'real_volume'])
        self.data.tick_volume, _, _ = self.minmaxNormalization(rates.tick_volume)
        self.data.spread, self.minSp, self.maxSp = self.minmaxNormalization(rates.spread)
        df_budget = pd.Series([1 for i in range(0, len(self.data))])
        df_asb_diff = pd.Series([0 for i in range(0, len(self.data))])
        self.data = self.data[1:]
        self.rowdata = self.rowdata[1:]
        length = len(self.data)
        print(length)
        
        self.budget_org = 100000
        self.leverage = 25
        self.volume_point = 10000
        self.point = 0.001
        

        self.dataRange = datetime.timedelta(days=2)
        self.dims = 5
        INTERVAL_DAYS = 5
        MINUTES_SPAN = 5
        
        totalMinutes = INTERVAL_DAYS * 24 * 60
        self.span  = int(totalMinutes/MINUTES_SPAN)+1
        
        ##select random indices.
        self.indices = random.sample(range(self.span, length - self.span -1), k=length - self.span*2 -1)
        if isTraining:
            self.fromIndex = self.span
            self.toIndex = int(length*0.7)
        else:
            self.fromIndex = int(length*0.7)+1
            self.toIndex = length+1
            
        #For Reinforce lerning
        self.action_space = gym.spaces.Discrete(3+2)
        self.observation_space = gym.spaces.Box(
            low=-1,
            high=1,
            shape=(self.span,6)
        )
        self.reward_range = [-1, 1]
        self.INVALID_REWARD = -1
        self.reset()

    def __len__(self):
        return self.toIndex - self.fromIndex
    
    def reset(self):
        '''
        '''
        self.askPositions = []
        self.bidPositions = []
        self.budget = self.budget_org
        self.coin = 0
        #self.index = random.randint(self.fromIndex, self.toIndex)
        self.index = self.fromIndex
        self.stepCount = 0
        self.observation = None
        self.rewards = 0
        self.winCount = 0
        self.orderCount = 0
        self.pl = 0
        self.budget_history = [1 for i in range(0, self.span)]
        self.ask_diff_history = [0 for i in range(0, self.span)]
        self.bid_diff_history = [0 for i in range(0, self.span)]
        observations = self.__getitem__(self.index + self.stepCount)
        observations = numpy.hstack([observations, numpy.atleast_2d(self.budget_history).T])
        observations = numpy.hstack([observations, numpy.atleast_2d(self.ask_diff_history).T])
        observations = numpy.hstack([observations, numpy.atleast_2d(self.bid_diff_history).T])
        return observations
    
    def getSymbolInfo(self, symbol='USDJPY'):
        if symbol == 'USDJPY':
             return {
                 "point": 0.001,
                 "min":0.1,
                 "rate":10000
             }
            
    @property
    def ask(self):
        if self.index + self.stepCount < len(self.rowdata):
            value = self.rowdata.close.iloc[self.index + self.stepCount -1]  + self.rowdata["spread"].iloc[self.index + self.stepCount -1]*self.point
            #value = random.uniform(next_data["Open"].iloc[0], next_data["High"].iloc[0])
            #value = next_data["Open"].iloc[0] + next_data["spread"].iloc[0]*self.point
            return value
        else:
            raise IndexError(f"{self.index} + {self.stepCount} is out of range")
    
    @property
    def bid(self):
        if self.index + self.stepCount < len(self.rowdata):
            value = self.rowdata.close.iloc[self.index + self.stepCount -1]  - self.rowdata["spread"].iloc[self.index + self.stepCount -1]*self.point
            #value = random.uniform(next_data["Low"].iloc[0], next_data["Open"].iloc[0])
            #value = next_data["Open"].iloc[0] - next_data["spread"].iloc[0]*self.point
            return value
        else:
            raise IndexError(f"{self.index} + {self.stepCount} is out of range")
    
    def __getRowData__(self, ndx):
        inputs = []
        if type(ndx) == slice:
            for index in self.indices[ndx]:
                inputs.append(self.rowdata[index+1-self.span:index+1].values.tolist())
        else:
            index = ndx
            inputs = self.rowdata[index+1-self.span:index+1].values.tolist()

        return inputs
    
    def __getInputs__(self, ndx):
        inputs = []
        if type(ndx) == int:
            indicies = slice(ndx, ndx+1)
            for index in self.indices[indicies]:
                inputs.append(self.data[index+1-self.span:index+1].values.tolist())
            return inputs[0]
        elif type(ndx) == slice:
            indicies = ndx
            for index in self.indices[indicies]:
                inputs.append(self.data[index+1-self.span:index+1].values.tolist())
            return inputs
    
    def __getActialIndex__(self,ndx):
        inputs = []
        if type(ndx) == slice:
            for index in self.indices[ndx]:
                inputs.append(index)
        else:
            inputs = self.indices[ndx]

        return inputs
    
    def __getitem__(self, ndx):
        ins = numpy.array(self.__getInputs__(ndx), dtype=numpy.dtype('float32'))
        return ins
        #return ins, outputs
        
    def render(self, mode='human', close=False):
        '''
        '''
        ask_diff = 0
        sell_price = self.GET_CURRENT_BID()
        budget =0
        for position in self.askPositions:
            ask_diff += (sell_price - position['price'])/position['price']
            budget += position['volume']*sell_price*self.volume_point/self.leverage
        bid_diff = 0
        ask_price = self.GET_CURRENT_ASK()
        for position in self.bidPositions:
            bid_diff += (position['price'] - ask_price)/ask_price
            budget += position['volume']*ask_price*self.volume_point/self.leverage
        if self.orderCount > 0:
            winRate = self.winCount/self.orderCount
        else:
            winRate = -1
        print (f"budget:{self.budget} + {budget}, pl:{self.pl}, winRate:{winRate}")
    
    def close(self): # 環境を閉じて後処理をする
        '''
        '''
        pass
    
    def seed(self, seed=None): # ランダムシードを固定する
        '''
        '''
        if seed == None:
            random.seed(1017)
            torch.manual_seed(1017)
        else:
            torch.manual_seed(seed)
            random.seed(seed)

    def __settlement__(self, type, price=None):
        reward = 0
        # settlement bid position
        if type == "buy":
            if len(self.bidPositions) > 0:
                current_buy_rate = self.GET_CURRENT_ASK()
                for position in self.bidPositions:
                    reward += (position['price'] - current_buy_rate)/current_buy_rate
                    self.budget += (position['volume'] * self.volume_point * (position['price'] - current_buy_rate))/self.leverage
                    #print(f"BID SETTLEMENT: {position['price']} - {current_buy_rate} = {position['price'] - current_buy_rate}")
                    pl = position['price'] - current_buy_rate
                    self.pl += pl
                    if pl > 0:
                        self.winCount += 1
                self.bidPositions = [] 
            else:
                reward = self.INVALID_REWARD
        elif type == "sell":# settlement ask position
            if len(self.askPositions) > 0:
                current_sell_rate = self.GET_CURRENT_BID()
                for position in self.askPositions:
                    reward += (current_sell_rate - position['price'])/position['price']
                    self.budget += (position['volume'] * self.volume_point * (current_sell_rate - position['price']))/self.leverage
                    #print(f"ASK SETTLEMENT: {current_sell_rate} - {position['price']} = {current_sell_rate - position['price']}")
                    pl = current_sell_rate - position['price']
                    self.pl += pl
                    if pl >0:
                        self.winCount += 1
                self.askPositions = []
            else:
                reward = self.INVALID_REWARD
        return reward
    
    def __buy__(self, volume=0.1):
        current_buy_rate = self.GET_CURRENT_ASK()
        reward = 0
        required_budget = (volume * self.volume_point * current_buy_rate)/self.leverage
        if self.budget > required_budget:
            position = {'volume': volume, 'price': current_buy_rate}
            self.askPositions.append(position)
            self.budget = self.budget -  required_budget
        else:
            reward = self.INVALID_REWARD
        return reward
        
    def __sell__(self, volume=0.1):
        current_sell_rate = self.GET_CURRENT_BID()
        reward = 0
        required_budget = (volume * self.volume_point * current_sell_rate)/self.leverage
        if self.budget > required_budget:
            position = {'volume': volume, 'price': current_sell_rate}
            #if self.budget * self.leverage >= volume*self.volume_point * current_sell_rate:
            self.bidPositions.append(position)
            self.budget = self.budget -  required_budget
        else:
            reward = self.INVALID_REWARD
        return reward

device: cuda:0


In [2]:
env = FXEnv()

99935


In [9]:
obs = env.reset()

In [15]:
obs.shape

(1441, 9)