In [1]:
import datetime
import gym_trading_env
import numpy as np
import pandas as pd

from gym_trading_env.downloader import download

  from pandas.core import (


In [2]:
download(exchange_names=["binance"],
        symbols=["BTC/USDT"],
        timeframe="1h",
        dir="data",
        since=datetime.datetime(year=2024, month=10, day=1))

BTC/USDT downloaded from binance and stored at data/binance-BTCUSDT-1h.pkl


In [3]:
df = pd.read_pickle("./data/binance-BTCUSDT-1h.pkl")
df.head()

Unnamed: 0_level_0,open,high,low,close,volume,date_close
date_open,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-09-30 22:00:00,63706.93,63771.11,63480.0,63481.98,406.67834,2024-09-30 23:00:00
2024-09-30 23:00:00,63481.99,63617.45,62856.3,63327.59,1548.15582,2024-10-01 00:00:00
2024-10-01 00:00:00,63327.6,63606.0,63006.7,63531.99,1336.93335,2024-10-01 01:00:00
2024-10-01 01:00:00,63532.0,63639.86,63370.01,63458.0,1004.08763,2024-10-01 02:00:00
2024-10-01 02:00:00,63458.0,63458.0,63180.0,63443.76,716.11822,2024-10-01 03:00:00


In [4]:
print(df.index.is_monotonic_increasing)
display(df.info())

True
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1119 entries, 2024-09-30 22:00:00 to 2024-11-16 12:00:00
Data columns (total 6 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   open        1119 non-null   float64       
 1   high        1119 non-null   float64       
 2   low         1119 non-null   float64       
 3   close       1119 non-null   float64       
 4   volume      1119 non-null   float64       
 5   date_close  1119 non-null   datetime64[ns]
dtypes: datetime64[ns](1), float64(5)
memory usage: 61.2 KB


None

In [5]:
class Trader:
    def __init__(self, N, data, capital):
        
        self.N = N
        self.data = data.values[:self.N]
        self.capital = capital
        
        self.actions = [np.random.choice(['buy', 'hold', 'sell']) for i in range(self.N)]
        
        self.caches = [{'state': 'stop', 'action': 'hold', 'quant.': 0, 'cost': 0, 'portfolio': 0,
                    'bank_account': capital,  'portfolio_value': capital, 'rewards': 0,  'message': 'inizialization'}]
        
    @classmethod
    def buy(cls, cur_cost, last_bacc, last_portfolio, close_position=False):
        if close_position:
            msg = 'short: market stop'
            qty = last_portfolio
            cost = qty * cur_cost
            cur_bacc = last_bacc - cost
        else:
            avalible = last_bacc // cur_cost
            if avalible > 0:
                msg = 'buy: processed'
                qty = np.random.choice(np.arange(1, avalible+1))
                cost = qty * cur_cost
                cur_bacc = last_bacc - cost
            elif avalible == 0 and last_portfolio == 0:
                msg = 'fired'
                qty = 0
                cur_bacc = last_bacc
            else:
                msg = 'buy: no money to purchase, next step'
                qty = 0
                cur_bacc = last_bacc
    
        return qty, cur_bacc, msg

    @classmethod
    def sell(cls, cur_cost, last_bacc, last_portfolio, close_position=False):
        
        if close_position:
            msg = 'long: market stop'
            qty = last_portfolio
            cost = qty * cur_cost
            cur_bacc = last_bacc + cost
        else:
            if last_portfolio > 0:
                msg = 'sell: processed'
                qty = np.random.choice(np.arange(1, last_portfolio+1))          
                cost = qty * cur_cost
                cur_bacc = last_bacc + cost
            elif last_portfolio == 0 and last_bacc < cur_cost:
                msg = 'fired'
                qty = 0
                cur_bacc = last_bacc
            else:
                msg = 'sell: nothing to sell, next step'
                qty = 0
                cur_bacc = last_bacc
    
        return -1*qty, cur_bacc, msg

    def make_steps(self):
        for i in range(1, self.N):  
            cur_cost = self.data[i-1]
            cur_action = self.actions[i-1]
            last_cost = self.caches[i-1]['cost']
            last_state = self.caches[i-1]['state']
            last_portfolio = self.caches[i-1]['portfolio']
            last_rewards = self.caches[i-1]['rewards']
            last_bacc = self.caches[i-1]['bank_account']
            
            if cur_action == 'hold' :
                if last_state == 'stop':
                    cur_reward = last_rewards - 10
                qty = 0
                cur_bacc = last_bacc
                cur_state = last_state
                msg = cur_state + ': ' + 'hold'
            
            elif cur_action == 'buy':
                if last_state == 'stop':                                # BUY: MARKET_STOP - LONG
                    qty, cur_bacc, msg = self.buy(cur_cost, last_bacc, last_portfolio)
                    cur_state = 'long'
                elif last_state == 'long':                             # BUY: LONG - LONG
                    qty, cur_bacc, msg = self.buy(cur_cost, last_bacc, last_portfolio)
                    cur_state = 'long'
                elif last_state == 'short':                            # BUY: SHORT - MARKET_STOP
                    qty, cur_bacc, msg = self.buy(cur_cost, last_bacc, last_portfolio, close_position=True)
                    cur_state = 'stop'
                    
            elif cur_action == 'sell':
                if last_state == 'stop':                               # SELL: MARKET_STOP - SHORT
                    qty, cur_bacc, msg = self.sell(cur_cost, last_bacc, last_portfolio)
                    cur_state = 'short'
                elif last_state == 'long':                             # SELL: LONG - MARKET_STOP
                    qty, cur_bacc, msg = self.sell(cur_cost, last_bacc, last_portfolio, close_position=True)
                    cur_state = 'stop'
                elif last_state == 'short':                            # SELL: SHORT - SHORT
                    qty, cur_bacc, msg  = self.sell(cur_cost, last_bacc, last_portfolio)
                    cur_state = 'short'
        
            if qty == 0:
                cur_state = last_state
    
            cur_portfolio = last_portfolio + qty
            cur_portfolio_value = cur_bacc + cur_cost * cur_portfolio
            cur_reward = cur_portfolio_value - self.capital

            cur_cache = {'state': cur_state, 'action': cur_action, 'quant.': qty, 'cost': cur_cost, 'portfolio': cur_portfolio, 
                        'bank_account': cur_bacc, 'portfolio_value': cur_portfolio_value, 'rewards': cur_reward, 'message': msg}
            self.caches.append(cur_cache)

        return pd.DataFrame(self.caches)

In [6]:
t = Trader(N=100, data=df['close'], capital=5*1e6)
log = t.make_steps()
pd.set_option('display.max_rows', 300)
display(log)

Unnamed: 0,state,action,quant.,cost,portfolio,bank_account,portfolio_value,rewards,message
0,stop,hold,0.0,0.0,0.0,5000000.0,5000000.0,0.0,inizialization
1,long,buy,64.0,63481.98,64.0,937153.28,5000000.0,0.0,buy: processed
2,long,hold,0.0,63327.59,64.0,937153.28,4990119.04,-9880.96,long: hold
3,long,buy,5.0,63531.99,69.0,619493.33,5003200.64,3200.64,buy: processed
4,long,hold,0.0,63458.0,69.0,619493.33,4998095.33,-1904.67,long: hold
5,long,buy,7.0,63443.76,76.0,175387.01,4997112.77,-2887.23,buy: processed
6,long,buy,2.0,63723.48,78.0,47940.05,5018371.49,18371.49,buy: processed
7,stop,sell,-78.0,63868.94,0.0,5029717.37,5029717.37,29717.37,long: market stop
8,stop,sell,0.0,63749.99,0.0,5029717.37,5029717.37,29717.37,"sell: nothing to sell, next step"
9,long,buy,4.0,64033.98,4.0,4773581.45,5029717.37,29717.37,buy: processed


# Описание

Награда за действие "reward" расчитывается в методе make_steps() по фромуле:  "(стоимость портфеля + средаства на счете) - начальный капитал"  (cur_reward = cur_portfolio_value - self.capital). При действии "HOLD" награда также вычисляется по этой формуле и меняется в зависимости от стоимости активов портфеле. Для удобства, помимо позиций LONG и SHORT, дополнительно введена позиция MARKET STOP - 

Логика работы метода make_steps():

- При действии "HOLD":
  - из позициии LONG: не меняется;
  - из позициии SHORT: не меняется;
  - из позициии MARKET STOP: не меняется;
- При действии "BUY":
  - из позициии LONG: сохраняется LONG;
  - из позициии SHORT: переходит в MARKET STOP;
  - из позициии MARKET STOP: переходит в LONG;
- При действии "SELL":
  - из позициии LONG: переходит в MARKET STOP;
  - из позициии SHORT: сохраняется SHORT;
  - из позициии MARKET STOP: переходит в SHORT;
 
Для симуляции использовались данные о цене закрытия "BTC/USDT" за ноябрь 2024 года, с окном 1 час.