In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
#%matplotlib inline
import datetime
from dateutil.relativedelta import relativedelta

from finrl import config
from finrl import config_tickers
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline,convert_daily_return_to_pyfolio_ts
from finrl.meta.data_processor import DataProcessor
from finrl.meta.data_processors.processor_yahoofinance import YahooFinanceProcessor
import sys
sys.path.append("../FinRL-Library")



In [2]:
from pyfolio import timeseries

In [3]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

<b>Дано:</b> Есть данные по некоторым ликвидным акциям (OHLCV). Цены по акциям учитывают дивидендные доходности, поэтому могут не совпадать с котировками, которые мы видим в текущий момент на рынке (но динамика ровно та же самая). Есть данные по фонду денежного рынка (OHLCV) - безрисковый актив (аналог накопительного счета). В данной работе котировки синтетические - сгенерированы на основе текущих ставок рефинансирования, так как большинство имеющихся на данный момент фондов ведут свое начало от 22го года и наоборот, фонды, которые были до 22го года прекратили свое существование. Есть котировки по дополнительным рядам, назовем их вспомогательными: IMOEX (индекс мосбиржи), GD (золото), BZ (нефть), USD (доллар США). Рассматриваемые даты 06.16-05.24. 

<b>Найти:</b> С помощью модели RL предполагается научиться эффективно распределять денежные средства между рисковыми бумагами и безрисковой.

 В качестве состояния в среде планируется использовать индикаторы технического анализа, корреляцию активов со вспомогательными рядам. Действие планируется MultiDiscrete - держать i-ю бумагу в портфеле или не держать. Распределение между бумагами отобранными в портфель равномерное (куплены на равные денежные суммы). Обязательно должны быть учтены комиссии за совершение сделок.

In [4]:
from DataLoader import DataLoader
from FeaturesAdder import FeaturesAdder
from PortfolioEnvBox import PortfolioEnv
%load_ext autoreload
%autoreload 2

In [5]:
loader = DataLoader()
df = loader.LoadData()

  df = pd.concat([df, dft[self.columns]], ignore_index=True)


In [6]:
df.tail(13)

Unnamed: 0,date,open,high,low,close,volume,tic
1995,2024-06-03,78.15,78.15,78.15,78.15,62488,BZ
1995,2024-06-03,2366.0,2366.0,2366.0,2366.0,49519,GD
1995,2024-06-03,10956.62,10958.36,10949.48,10954.24,18285800,GMKN
1995,2024-06-03,3217.19,3217.36,3087.99,3141.42,30769,IMOEX
1995,2024-06-03,10730.0,10814.5,10496.0,10587.0,1661232,LKOH
1995,2024-06-03,81.978,83.258,80.453,82.898,35974230,MAGN
1995,2024-06-03,168.532877,168.532877,168.532877,168.532877,1,MM
1995,2024-06-03,538.06,543.61,535.11,541.51,5116050,MTSS
1995,2024-06-03,1419.72,1434.32,1379.52,1407.92,2076053,NVTK
1995,2024-06-03,731.69,739.19,716.99,737.44,7536920,ROSN


In [7]:
cnt = df.groupby('date').count()
cnt[cnt["open"] > 25]

Unnamed: 0_level_0,open,high,low,close,volume,tic
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [8]:
df.shape

(25949, 7)

In [9]:
df.index = range(len(df))
#df = df.drop(df[df['tic'] == 'IMOEX'].index)
df = df.drop(df[df['tic'] == 'BZ'].index)
#df = df.drop(df[df['tic'] == 'GD'].index)
df = df.drop(df[df['tic'] == 'USD'].index)

In [10]:
df['volume'] = pd.to_numeric(df['volume'])

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 21957 entries, 1 to 25947
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    21957 non-null  datetime64[ns]
 1   open    21957 non-null  float64       
 2   high    21957 non-null  float64       
 3   low     21957 non-null  float64       
 4   close   21957 non-null  float64       
 5   volume  21957 non-null  int64         
 6   tic     21957 non-null  object        
dtypes: datetime64[ns](1), float64(4), int64(1), object(1)
memory usage: 1.3+ MB


In [12]:
fa = FeaturesAdder()
df = fa.Process(df)

Length of values (1999) does not match length of index (1997)
Length of values (1999) does not match length of index (1997)
Successfully added technical indicators


100%|██████████| 1974/1974 [00:17<00:00, 113.52it/s]


In [13]:
df.tail()

Unnamed: 0,date,open,high,low,close,volume,tic,rsi_12,cci_12,dx_12,cov_list,cov_xtra,return_list
17761,2024-06-03,538.06,543.61,535.11,541.51,5116050,MTSS,24.02614,-156.839237,92.666893,[],"[0.8621921517140547, 0.7537682741242231, 0.190...",tic GD GMKN IMOEX LKOH...
17762,2024-06-03,1419.72,1434.32,1379.52,1407.92,2076053,NVTK,17.001599,-113.120166,97.394474,[],"[0.8621921517140547, 0.7537682741242231, 0.190...",tic GD GMKN IMOEX LKOH...
17763,2024-06-03,731.69,739.19,716.99,737.44,7536920,ROSN,34.850714,-138.073283,88.485347,[],"[0.8621921517140547, 0.7537682741242231, 0.190...",tic GD GMKN IMOEX LKOH...
17764,2024-06-03,411.87,413.87,403.37,409.32,61548130,SBER,40.349783,-205.86022,60.821344,[],"[0.8621921517140547, 0.7537682741242231, 0.190...",tic GD GMKN IMOEX LKOH...
17765,2024-06-03,34.23,34.685,32.15,33.88,153852300,SNGS,23.582352,-132.27459,90.427869,[],"[0.8621921517140547, 0.7537682741242231, 0.190...",tic GD GMKN IMOEX LKOH...


Проверяем корректность рассчитанных FinRL индикаторов

In [14]:
import talib as ta
sberp_high  = df[df['tic']=='SBER']['high'].to_numpy()
sberp_low   = df[df['tic']=='SBER']['low'].to_numpy()
sberp_close = df[df['tic']=='SBER']['close'].to_numpy()

In [18]:
rsi12 = ta.RSI(sberp_close, timeperiod=12)
cci12 = ta.CCI(sberp_high,sberp_low,sberp_close, timeperiod=12)

In [19]:
print(df.iloc[17764][['rsi_12','cci_12']])
print(rsi12[-1], cci12[-1])

rsi_12    40.349783
cci_12   -205.86022
Name: 17764, dtype: object
40.349782550662574 -205.86022042419992


In [20]:
df.iloc[-1]['cov_xtra']

array([ 0.86219215,  0.75376827,  0.19098976, -0.72698068,  0.94901461,
        0.88841615,  0.83966724,  0.14126898,  0.82608292, -0.18563108,
       -0.28781645,  0.62203695,  0.45002973,  0.04749333, -0.12110855,
        0.35105434,  0.69348466, -0.24044705])

In [21]:
df.index

Index([    0,     1,     2,     3,     4,     5,     6,     7,     8,     9,
       ...
       17756, 17757, 17758, 17759, 17760, 17761, 17762, 17763, 17764, 17765],
      dtype='int64', length=17766)

In [22]:
df.index = df.date.factorize()[0]

In [23]:
import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv

In [24]:
stock_dimension = len(df.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 9, State Space: 9


In [25]:
env_kwargs = {
    "hmax": 20,
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": fa.indicators, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4,
    "cov_xtra_names": fa.cov_xtra_names
}

In [61]:
e_train_gym = PortfolioEnv(df = df, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()

Ручная проверка действия в среде

In [62]:
e_train_gym.day, e_train_gym.portfolio_value, e_train_gym.cash, e_train_gym.current_state

(1409, 1000000, 1000000, array([0, 0, 0, 0, 0, 0, 0, 0, 0]))

In [63]:
act = e_train_gym.action_space.sample()
print(act)

[0.39291012 0.02095526 0.14783607 0.85654515 0.21874408 0.0918261
 0.6262958  0.58529353 0.24756992]


In [64]:
close_prices = df.loc[e_train_gym.day]['close'].to_numpy()
state, reward, term, _ = e_train_gym.step(act)
e_train_gym.portfolio_value, e_train_gym.cash, e_train_gym.current_state

(993304.832970959,
 14574.831601098063,
 array([  13,    0,  500, 1890,  144,   15,  284,  520, 1922]))

In [71]:
sum(close_prices * e_train_gym.current_state) + e_train_gym.cash
reward, e_train_gym.cash

(-0.006695167029040982, 14574.831601098063)

In [72]:
e_train_gym.date_memory, e_train_gym.asset_memory

([Timestamp('2022-02-09 00:00:00'), Timestamp('2022-02-10 00:00:00')],
 [1000000, 993304.832970959])

In [73]:
act2 = e_train_gym.action_space.sample()
print(act2)

[0.47467503 0.0613731  0.37635505 0.202459   0.44452304 0.51550364
 0.9187314  0.54571766 0.00973097]


In [74]:
state, reward, term, _ = e_train_gym.step(act2)
e_train_gym.portfolio_value, e_train_gym.cash, e_train_gym.current_state

(967301.3219926303,
 17763.54565016491,
 array([  14,    1, 1157,  398,  259,   80,  377,  438,   67]))

In [75]:
reward

-0.026178782298433678

In [36]:
def get_result(df_daily_return):
    DRL_strat = convert_daily_return_to_pyfolio_ts(df_daily_return)
    perf_func = timeseries.perf_stats 
    return perf_func( returns=DRL_strat, 
                                factor_returns=DRL_strat, 
                                positions=None, transactions=None, turnover_denom="AGB")

In [37]:
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise

In [97]:
def train_trade(alg, train, trade, optuna_params = None, log_name = None, size = None):
    #env_kwargs['lookback'] = lookback
    e_train_gym = PortfolioEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    
    policy_kwargs = None
    if size != None:
        policy_kwargs = {
            "net_arch": dict(pi=size, vf=size, qf=size),
        }

    agent = DRLAgent(env = env_train)

    model_params = None
    if optuna_params != None:
        model_params = optuna_params

    if log_name is None:
        log_name = alg

    model = agent.get_model(model_name = alg, model_kwargs = optuna_params, policy_kwargs = policy_kwargs, tensorboard_log = 'logs')

    trained_a2c = agent.train_model(model = model, 
                                tb_log_name = log_name,
                                total_timesteps = 100_000)

    e_trade_gym2 = PortfolioEnv(df = trade, reset_to_zero=True, **env_kwargs)

    df_daily_return, df_actions = DRLAgent.DRL_prediction(model=trained_a2c,
                        environment = e_trade_gym2)
    
    return trained_a2c, df_daily_return, df_actions

In [98]:
algs = ['a2c','ppo','ddpg','td3']
res = dict()
ret = dict()
act = dict()
for alg in algs:
  res[alg] = []
  ret[alg] = []
  act[alg] = []

for i in range(4):
  date0 = datetime.datetime(2016, 7, 1) #date_start + relativedelta(months = -12 * i)
  date1 = date_start + relativedelta(months = i)
  date2 = date_start + relativedelta(months = i + 1)

  train = data_split(df, date0, date1) #'2016-05-10', date1)
  trade = data_split(df, date1, date2)

  for alg in algs:
    print(date1, date2)
    print(alg)
    log_name = alg + '_' + str(7+i)
    model, df_daily_return, df_actions = train_trade(alg, train, trade, None, log_name=log_name, size=[192,128])
    res11 = get_result(df_daily_return)
    res[alg].append(res11)
    ret[alg].append(df_daily_return)
    act[alg].append(df_actions)
    print(alg, res11['Cumulative returns'], res11['Max drawdown'], res11['Sharpe ratio'])

2023-06-01 00:00:00 2023-07-01 00:00:00
a2c
{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device
Logging to logs\a2c_7_1




begin_total_asset:1000000
end_total_asset:719524.5368556973
Sharpe:  -0.687220376953859
begin_total_asset:1000000
end_total_asset:1043864.6023834564
Sharpe:  1.7178230280318036
----------------------------------------
| time/                 |              |
|    fps                | 111          |
|    iterations         | 100          |
|    time_elapsed       | 4            |
|    total_timesteps    | 500          |
| train/                |              |
|    entropy_loss       | -12.9        |
|    explained_variance | -670         |
|    learning_rate      | 0.0007       |
|    n_updates          | 99           |
|    policy_loss        | 0.113        |
|    reward             | -0.003320432 |
|    std                | 1.02         |
|    value_loss         | 0.0204       |
----------------------------------------
-----------------------------------------
| time/                 |               |
|    fps                | 114           |
|    iterations         | 200           |



begin_total_asset:1000000
end_total_asset:495424.53082368866
Sharpe:  -1.5551896847008695
begin_total_asset:1000000
end_total_asset:653728.530948714
Sharpe:  -0.24163201816542934
-------------------------------------
| time/              |              |
|    fps             | 158          |
|    iterations      | 1            |
|    time_elapsed    | 12           |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | 0.0010504258 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:498915.56751732074
Sharpe:  -1.023337273440418
begin_total_asset:1000000
end_total_asset:851602.0871183821
Sharpe:  -0.9205809778519459
begin_total_asset:1000000
end_total_asset:728894.2635986041
Sharpe:  -0.790205972145257
begin_total_asset:1000000
end_total_asset:487364.5052485421
Sharpe:  -1.2536980066358783
-------------------------------------------
| time/                   |               |
|    fps                  | 137     



begin_total_asset:1000000
end_total_asset:1232438.075748562
Sharpe:  3.670904849170572
begin_total_asset:1000000
end_total_asset:1203580.8893079485
Sharpe:  1.1537467846731924
begin_total_asset:1000000
end_total_asset:2026107.8338831004
Sharpe:  0.8799426044873786
begin_total_asset:1000000
end_total_asset:2154168.5209610392
Sharpe:  0.7860219293028676
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 50          |
|    time_elapsed    | 65          |
|    total_timesteps | 3286        |
| train/             |             |
|    actor_loss      | -1.33       |
|    critic_loss     | 0.022       |
|    learning_rate   | 0.001       |
|    n_updates       | 3185        |
|    reward          | 0.008946231 |
------------------------------------
begin_total_asset:1000000
end_total_asset:1055574.3745324702
Sharpe:  0.20811011212483133
begin_total_asset:1000000
end_total_asset:1170940.4813693187
Sharpe:  0.444



begin_total_asset:1000000
end_total_asset:1337484.6154504768
Sharpe:  0.5604965628896144
begin_total_asset:1000000
end_total_asset:2525078.6713488144
Sharpe:  0.9562940883786597
begin_total_asset:1000000
end_total_asset:2206178.7396145365
Sharpe:  1.004216772559439
begin_total_asset:1000000
end_total_asset:3515395.791989664
Sharpe:  1.1797216593260784
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 46           |
|    time_elapsed    | 114          |
|    total_timesteps | 5347         |
| train/             |              |
|    actor_loss      | 1.04         |
|    critic_loss     | 0.0692       |
|    learning_rate   | 0.001        |
|    n_updates       | 5246         |
|    reward          | 0.0016252438 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1229966.808454318
Sharpe:  2.0461963676850323
begin_total_asset:1000000
end_total_asset:2278318.9578142464
Sha



begin_total_asset:1000000
end_total_asset:1109162.2109252962
Sharpe:  1.4408296185268907
-----------------------------------------
| time/                 |               |
|    fps                | 118           |
|    iterations         | 100           |
|    time_elapsed       | 4             |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -12.9         |
|    explained_variance | -167          |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -1.79         |
|    reward             | -0.0053911977 |
|    std                | 1.01          |
|    value_loss         | 0.0623        |
-----------------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 120          |
|    iterations         | 200          |
|    time_elapsed       | 8            |
|    total_timesteps    | 1000    



begin_total_asset:1000000
end_total_asset:487630.7236384629
Sharpe:  -0.8139852434864696
begin_total_asset:1000000
end_total_asset:1044283.2870445832
Sharpe:  0.2719022922326578
begin_total_asset:1000000
end_total_asset:635258.4946172474
Sharpe:  -0.6411587219282238
begin_total_asset:1000000
end_total_asset:1049210.623584813
Sharpe:  1.7256049311338182
begin_total_asset:1000000
end_total_asset:1106986.1866676144
Sharpe:  1.4624734805146686
-------------------------------------
| time/              |              |
|    fps             | 149          |
|    iterations      | 1            |
|    time_elapsed    | 13           |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | 0.0063706846 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:785852.6787248191
Sharpe:  -0.35025639123891955
begin_total_asset:1000000
end_total_asset:408755.02094489196
Sharpe:  -0.7497122425003304
----------------------------------



begin_total_asset:1000000
end_total_asset:2538185.401459045
Sharpe:  1.2709005292964999
begin_total_asset:1000000
end_total_asset:2238793.219904697
Sharpe:  0.9572283160090512
begin_total_asset:1000000
end_total_asset:1238919.053290769
Sharpe:  3.538275763837333
begin_total_asset:1000000
end_total_asset:1117237.569203175
Sharpe:  0.32364690109593297
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 49           |
|    time_elapsed    | 67           |
|    total_timesteps | 3339         |
| train/             |              |
|    actor_loss      | -6.65        |
|    critic_loss     | 0.0207       |
|    learning_rate   | 0.001        |
|    n_updates       | 3238         |
|    reward          | 0.0015391117 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1339092.6276681516
Sharpe:  0.5424635040584369
begin_total_asset:1000000
end_total_asset:1451987.9607434913
Shar



begin_total_asset:1000000
end_total_asset:1211531.037561851
Sharpe:  0.4822689231492254
begin_total_asset:1000000
end_total_asset:1751278.9574590188
Sharpe:  0.5847554995322674
begin_total_asset:1000000
end_total_asset:1404223.4465979324
Sharpe:  0.8498558320099519
begin_total_asset:1000000
end_total_asset:1072486.0404752947
Sharpe:  0.2996490875050017
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 52           |
|    time_elapsed    | 60           |
|    total_timesteps | 3200         |
| train/             |              |
|    actor_loss      | 9.71         |
|    critic_loss     | 0.0738       |
|    learning_rate   | 0.001        |
|    n_updates       | 3099         |
|    reward          | 0.0010309007 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1826798.548497802
Sharpe:  1.070419228454852
begin_total_asset:1000000
end_total_asset:1132198.892826279
Shar



--------------------------------------
| time/                 |            |
|    fps                | 98         |
|    iterations         | 100        |
|    time_elapsed       | 5          |
|    total_timesteps    | 500        |
| train/                |            |
|    entropy_loss       | -12.8      |
|    explained_variance | -766       |
|    learning_rate      | 0.0007     |
|    n_updates          | 99         |
|    policy_loss        | -1.15      |
|    reward             | 0.00569981 |
|    std                | 1          |
|    value_loss         | 0.0481     |
--------------------------------------
begin_total_asset:1000000
end_total_asset:778895.764966996
Sharpe:  -0.1603604256390883
----------------------------------------
| time/                 |              |
|    fps                | 101          |
|    iterations         | 200          |
|    time_elapsed       | 9            |
|    total_timesteps    | 1000         |
| train/                |              |
|

KeyboardInterrupt: 

In [99]:
returns = dict()
max_dd = dict()
sharp = dict()
for alg in algs:
    returns[alg] = []
    max_dd[alg]  = []
    sharp[alg] = []
    for result in res[alg]:
        returns[alg].append(result['Cumulative returns'])
        max_dd[alg].append(result['Max drawdown'])
        sharp[alg].append(result['Sharpe ratio'])

In [100]:
returns

{'a2c': [0.0332920123153011, 0.06085893780299956],
 'ppo': [0.03953620404201441, 0.024666200455438547],
 'ddpg': [0.026988995661685555, 0.035048912451205894],
 'td3': [-0.024519644558903098, 0.052019653794041165]}

In [101]:
max_dd

{'a2c': [-0.01912261916057049, -0.012374844113516019],
 'ppo': [-0.02651635797445672, -0.012991885032946042],
 'ddpg': [-0.011239425238825804, -0.009172270663675305],
 'td3': [-0.04096272540703771, -0.009193375016478525]}

In [102]:
sharp

{'a2c': [3.833168087493906, 8.97123635701111],
 'ppo': [3.2310244627572664, 3.5105472193801033],
 'ddpg': [3.4959572702783923, 4.9218857994790115],
 'td3': [-1.9974839389017551, 8.541669149776345]}

In [84]:
res = dict()
ret = dict()
act = dict()

date_start = datetime.datetime(2023, 6, 1)
date0 = datetime.datetime(2016, 8, 1) #date_start + relativedelta(months = -12 * i)
date1 = date_start + relativedelta(months = 0)
date2 = date_start + relativedelta(months = 0 + 1)

print(date1, date2)
train = data_split(df, date0, date1) #'2016-05-10', date1)
trade = data_split(df, date1, date2)

2023-06-01 00:00:00 2023-07-01 00:00:00


In [85]:
alg = ''
optuna_params = None
#optuna_params = {"learning_rate": 1e-2, "buffer_size": 100_000, "action_noise": "ornstein_uhlenbeck", "target_policy_noise" : 0.5}
#optuna_params = {"learning_rate": 2e-5, "ent_coef": 0.005}
#optuna_params = {"ent_coef": 1, "learning_rate": 0.001}
#optuna_params = {"batch_size": 128, "buffer_size": 100000, "learning_rate": 0.0003, "learning_starts": 100, "ent_coef": "auto_0.1"}
log_name = alg + '_final_td3_' + str(9)
model, df_daily_return, df_actions = train_trade(alg, train, trade, optuna_params, log_name=log_name, size=[192,128])

{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cuda device
Logging to logs\td3_final_td3_9_5




begin_total_asset:1000000
end_total_asset:1997314.546450376
Sharpe:  0.7122777451925169
begin_total_asset:1000000
end_total_asset:1117325.570132
Sharpe:  0.334220476387253
begin_total_asset:1000000
end_total_asset:2127900.881390989
Sharpe:  0.7154155888863315
begin_total_asset:1000000
end_total_asset:1277461.7440616605
Sharpe:  0.43157322086451216
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 49           |
|    time_elapsed    | 93           |
|    total_timesteps | 4680         |
| train/             |              |
|    actor_loss      | 2.29         |
|    critic_loss     | 0.0414       |
|    learning_rate   | 0.001        |
|    n_updates       | 4579         |
|    reward          | 0.0069790105 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:867188.1873660703
Sharpe:  -0.2722223633270397
begin_total_asset:1000000
end_total_asset:1207534.0232728783
Sharpe

In [86]:
e_trade_gym2 = PortfolioEnv(df = trade, reset_to_zero=True, **env_kwargs)
state = e_trade_gym2.state

In [87]:
pred = model.predict(state)

In [88]:
pred

(array([1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32), None)

In [89]:
state, rew, term, _ = e_trade_gym2.step(pred)

In [90]:
rt = df_daily_return['daily_return'].to_numpy()

In [91]:
rt

array([ 0.        , -0.00079063, -0.00983513,  0.00648143, -0.00202162,
       -0.00188254,  0.00154436,  0.0064209 , -0.00249567,  0.00350078,
       -0.00142935,  0.00618597, -0.00177391, -0.00159547, -0.00301549,
       -0.01714915, -0.00099922,  0.02241922, -0.00190196,  0.02050364,
       -0.00149597])

In [92]:
np.prod(rt+1)

1.020130803280495

In [93]:
get_result(df_daily_return)

Annual return           0.270195
Cumulative returns      0.020131
Annual volatility       0.135994
Sharpe ratio            1.823920
Calmar ratio           11.079189
Stability               0.149051
Max drawdown           -0.024388
Omega ratio             1.445612
Sortino ratio           3.457097
Skew                    0.837484
Kurtosis                1.693009
Tail ratio              2.084736
Daily value at risk    -0.016149
Alpha                   0.000000
Beta                    1.000000
dtype: float64

In [134]:
trade.loc[0]['tic']

0    GMKN
0    LKOH
0    MAGN
0      MM
0    MTSS
0    NVTK
0    ROSN
0    SBER
0    SNGS
Name: tic, dtype: object

In [135]:
trade

Unnamed: 0,date,open,high,low,close,volume,tic,rsi_12,cci_12,dx_12,cov_list,cov_xtra,return_list
0,2023-08-01,10068.330000,10071.630000,10067.150000,10070.390000,25106100,GMKN,76.424845,215.142370,84.088373,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,8830.000000,8937.000000,8771.000000,8896.000000,1324998,LKOH,83.042272,158.752567,82.638239,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,80.048000,80.078000,79.478000,79.818000,17212120,MAGN,75.466668,106.126368,65.791820,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,156.172603,156.172603,156.172603,156.172603,1,MM,100.000000,129.477912,100.000000,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,545.110000,546.910000,543.110000,545.010000,4802040,MTSS,45.977731,-40.534283,5.922008,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22,2023-08-31,541.060000,542.110000,537.610000,538.710000,5301270,MTSS,41.667802,-66.093117,33.558085,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...
22,2023-08-31,1987.820000,2004.020000,1970.620000,1984.620000,832522,NVTK,67.993667,72.748710,53.094191,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...
22,2023-08-31,731.690000,732.940000,728.040000,730.640000,2565110,ROSN,66.340793,120.269856,51.537526,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...
22,2023-08-31,363.420000,364.620000,362.670000,363.220000,21957550,SBER,58.652072,94.074527,13.929651,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...


In [106]:
e_train_gym = PortfolioEnv(df = train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()
agent = DRLAgent(env = env_train)
model = agent.get_model(model_name = alg, model_kwargs = optuna_params, policy_kwargs = None, tensorboard_log = 'logs')

trained_a2c = agent.train_model(model = model, 
                                tb_log_name = log_name,
                                total_timesteps = 30_000)


{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device
Logging to logs\a2c_final_a2c_9_4
-----------------------------------------
| time/                 |               |
|    fps                | 29            |
|    iterations         | 100           |
|    time_elapsed       | 16            |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -12.7         |
|    explained_variance | -193          |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -4.47         |
|    reward             | -0.0020968118 |
|    value_loss         | 0.143         |
-----------------------------------------
begin_total_asset:1000000
end_total_asset:677590.5788263523
Sharpe:  -0.7154921932669398
----------------------------------------
| time/                 |              |
|    fps                | 30           |
|    iterations         | 200          |
|    ti

In [107]:
e_trade_gym2 = PortfolioEnv(df = trade, reset_to_zero=True, **env_kwargs)

state = e_trade_gym2.state


In [126]:
pred = trained_a2c.predict(state)

In [127]:
pred

(array([0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1],
       dtype=int64),
 None)

In [128]:
state, rew, term, _ = e_trade_gym2.step(pred)

In [129]:
rew

0.007766173104209378

In [158]:
res[alg] = []
ret[alg] = []
act[alg] = []
res[alg].append(get_result(df_daily_return))
ret[alg].append(df_daily_return)
act[alg].append(df_actions)

In [159]:
res

{'td3': [Annual return         -0.165986
  Cumulative returns    -0.015012
  Annual volatility      0.150527
  Sharpe ratio          -1.133765
  Calmar ratio          -2.711280
  Stability              0.427691
  Max drawdown          -0.061220
  Omega ratio            0.842712
  Sortino ratio         -1.616779
  Skew                   0.319968
  Kurtosis              -0.931438
  Tail ratio             1.156678
  Daily value at risk   -0.019642
  Alpha                  0.000000
  Beta                   1.000000
  dtype: float64]}

In [41]:
import optuna

In [45]:
loader2 = DataLoader()
df2 = loader2.LoadData()

df2.index = range(len(df2))
#df = df.drop(df[df['tic'] == 'IMOEX'].index)
df2 = df2.drop(df2[df2['tic'] == 'BZ'].index)
#df = df.drop(df[df['tic'] == 'GD'].index)
df2 = df2.drop(df2[df2['tic'] == 'USD'].index)

fa2 = FeaturesAdder(22)
df2 = fa2.Process(df2)

df2.index = df2.date.factorize()[0]

  df = pd.concat([df, dft[self.columns]], ignore_index=True)


Length of values (1999) does not match length of index (1997)
Length of values (1999) does not match length of index (1997)
Successfully added technical indicators


  df = df.ffill().bfill()
100%|██████████| 1974/1974 [00:17<00:00, 114.48it/s]


In [59]:
def objective(trial):

    loader2 = DataLoader()
    df2 = loader2.LoadData()

    df2.index = range(len(df2))
    #df = df.drop(df[df['tic'] == 'IMOEX'].index)
    df2 = df2.drop(df2[df2['tic'] == 'BZ'].index)
    #df = df.drop(df[df['tic'] == 'GD'].index)
    df2 = df2.drop(df2[df2['tic'] == 'USD'].index)
    lookback = 22#trial.suggest_categorical('lookback', [252])
    fa2 = FeaturesAdder(lookback)
    df2 = fa2.Process(df2)
    #tau = trial.suggest_categorical('tau', [0.001, 0.005, 0.01])

    #sigma = trial.suggest_float('sigma', 0, 1)
    bsize = trial.suggest_categorical('bsize', [10000, 50000, 100000])
    noise = trial.suggest_categorical('noise', ["ornstein_uhlenbeck", "normal"])

    #n_actions = stock_dimension
    #action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=sigma * np.ones(n_actions), theta=theta)
    # size1 = trial.suggest_categorical('size1', [32, 64, 128])
    # size2 = size1 * 2
    size = [128, 64]
    #ent_coef = trial.suggest_float('ent_coef', 0, 0.05)
    learning_rate = trial.suggest_float("learning_rate", 5e-6, 5e-3, log=True)
    #lookback = trial.suggest_categorical('lookback', [5, 10, 15, 22, 44, 66])

    sum_reward = 0
    sum_maxdd  = 0

    for i in range(4):
        date1 = date_start + relativedelta(months = i)
        date2 = date_start + relativedelta(months = i + 1)
        train = data_split(df2, '2016-05-10', date1)
        trade = data_split(df2, date1, date2)

        optuna_params = {"learning_rate": learning_rate, "buffer_size": bsize, "action_noise": "ornstein_uhlenbeck"}
        #optuna_params = {"learning_rate": learning_rate}#, "ent_coef": ent_coef}# "action_noise": "ornstein_uhlenbeck"}
        log_name = str(size) + '_' + str(lookback) + '_' + str(i)
        model, df_daily_return, df_actions = train_trade('td3', train, trade, None, log_name, size)
        
        result = get_result(df_daily_return)
        reward = result['Cumulative returns']
        max_dd = result['Max drawdown']
        sharp  = result['Sharpe ratio']

        print("Rew, DD, Sh", reward, max_dd, sharp)
        sum_reward += reward
        sum_maxdd += max_dd

    print("Total", sum_reward, sum_maxdd, sharp)
    return sum_reward

study = optuna.create_study(directions=["maximize"])
study.optimize(objective, n_trials=20)

[I 2024-06-23 14:26:08,936] A new study created in memory with name: no-name-eda3bef4-e5cb-4542-9e78-966bed97e7fc
  df = pd.concat([df, dft[self.columns]], ignore_index=True)


Length of values (1999) does not match length of index (1997)
Length of values (1999) does not match length of index (1997)


  df = df.ffill().bfill()


Successfully added technical indicators


100%|██████████| 1974/1974 [00:36<00:00, 53.46it/s]


{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cuda device
Logging to logs\[128, 64]_22_0_1




begin_total_asset:1000000
end_total_asset:1328804.5761858234
Sharpe:  0.5611658994842432
begin_total_asset:1000000
end_total_asset:1103836.133373412
Sharpe:  4.10179896083965
begin_total_asset:1000000
end_total_asset:1820169.431765897
Sharpe:  0.7969255424660227
begin_total_asset:1000000
end_total_asset:1333812.7718168045
Sharpe:  0.5996255303696812
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 44            |
|    time_elapsed    | 67            |
|    total_timesteps | 2982          |
| train/             |               |
|    actor_loss      | 1.47          |
|    critic_loss     | 0.114         |
|    learning_rate   | 0.001         |
|    n_updates       | 2881          |
|    reward          | 0.00090358907 |
--------------------------------------
begin_total_asset:1000000
end_total_asset:1107334.4305786227
Sharpe:  0.4024363943306529
begin_total_asset:1000000
end_total_asset:1018132.60



begin_total_asset:1000000
end_total_asset:1318650.4135951765
Sharpe:  0.5284407138969732
begin_total_asset:1000000
end_total_asset:892724.9130789972
Sharpe:  -0.10934792520271971
begin_total_asset:1000000
end_total_asset:1552342.927234988
Sharpe:  0.5287890287253493
begin_total_asset:1000000
end_total_asset:989830.0674499976
Sharpe:  0.11941069750924747
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 50           |
|    time_elapsed    | 56           |
|    total_timesteps | 2869         |
| train/             |              |
|    actor_loss      | -4.04        |
|    critic_loss     | 0.108        |
|    learning_rate   | 0.001        |
|    n_updates       | 2768         |
|    reward          | -0.001113208 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:942752.0679029975
Sharpe:  0.011518549135084648
begin_total_asset:1000000
end_total_asset:1727219.4953959861



begin_total_asset:1000000
end_total_asset:1980415.4168697067
Sharpe:  0.8011031745757455
begin_total_asset:1000000
end_total_asset:1717016.8685665885
Sharpe:  0.9291492766166308
begin_total_asset:1000000
end_total_asset:1469591.0161674125
Sharpe:  0.6434573503934055
begin_total_asset:1000000
end_total_asset:1150816.2652700073
Sharpe:  0.4576295083108517
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 47          |
|    time_elapsed    | 74          |
|    total_timesteps | 3523        |
| train/             |             |
|    actor_loss      | 2.08        |
|    critic_loss     | 0.049       |
|    learning_rate   | 0.001       |
|    n_updates       | 3422        |
|    reward          | 0.017000806 |
------------------------------------
begin_total_asset:1000000
end_total_asset:1826971.4826673293
Sharpe:  0.7841686187813658
begin_total_asset:1000000
end_total_asset:1277429.8361853443
Sharpe:  4.96

[W 2024-06-23 15:22:09,944] Trial 0 failed with parameters: {'bsize': 100000, 'noise': 'normal', 'learning_rate': 7.899454013765834e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Alex\AppData\Local\Temp\ipykernel_45272\2432288324.py", line 41, in objective
    model, df_daily_return, df_actions = train_trade('td3', train, trade, None, log_name, size)
  File "C:\Users\Alex\AppData\Local\Temp\ipykernel_45272\1449868205.py", line 23, in train_trade
    trained_a2c = agent.train_model(model = model,
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\finrl\agents\stablebaselines3\models.py", line 117, in train_model
    model = model.learn(
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\stable_baselines3\td3\td3.py", line 222, in learn
    return super().learn(

KeyboardInterrupt: 