In [1]:
import pandas as pd
import numpy as np
import random
import matplotlib
import matplotlib.pyplot as plt
#%matplotlib inline
import datetime
from dateutil.relativedelta import relativedelta

from finrl import config
from finrl import config_tickers
from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl.meta.env_portfolio_allocation.env_portfolio import StockPortfolioEnv
from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.plot import backtest_stats, backtest_plot, get_daily_return, get_baseline,convert_daily_return_to_pyfolio_ts
from finrl.meta.data_processor import DataProcessor
from finrl.meta.data_processors.processor_yahoofinance import YahooFinanceProcessor
import sys
sys.path.append("../FinRL-Library")



In [2]:
from pyfolio import timeseries

In [3]:
import os
if not os.path.exists("./" + config.DATA_SAVE_DIR):
    os.makedirs("./" + config.DATA_SAVE_DIR)
if not os.path.exists("./" + config.TRAINED_MODEL_DIR):
    os.makedirs("./" + config.TRAINED_MODEL_DIR)
if not os.path.exists("./" + config.TENSORBOARD_LOG_DIR):
    os.makedirs("./" + config.TENSORBOARD_LOG_DIR)
if not os.path.exists("./" + config.RESULTS_DIR):
    os.makedirs("./" + config.RESULTS_DIR)

<b>Дано:</b> Есть данные по некоторым ликвидным акциям (OHLCV). Цены по акциям учитывают дивидендные доходности, поэтому могут не совпадать с котировками, которые мы видим в текущий момент на рынке (но динамика ровно та же самая, но без гэпов). Есть данные по фонду денежного рынка (OHLCV) - безрисковый актив (аналог накопительного счета). В данной работе котировки синтетические - сгенерированы на основе текущих ставок рефинансирования, так как большинство имеющихся на данный момент фондов ведут свое начало от 22го года и наоборот, фонды, которые были до 22го года прекратили свое существование. Есть котировки по дополнительным рядам, назовем их вспомогательными: IMOEX (индекс мосбиржи), GD (золото), BZ (нефть). Рассматриваемые даты 06.16-05.24. 

Фонды денежного рынка это те, график которых вот такой:

![alt text](mm_fund.JPG "Title2")

<b>Найти:</b> С помощью модели RL предполагается научиться эффективно распределять денежные средства между имеющимися активами (рисковыми и безрисковым).

 В качестве состояния в среде планируется использовать индикаторы технического анализа, корреляцию активов со вспомогательными рядам, текущее распределение портфеля. 
 
 Обязательно должны быть учтены комиссии за совершение сделок.

In [4]:
from DataLoader import DataLoader
from FeaturesAdder import FeaturesAdder
from PortfolioEnvBox import PortfolioEnv
%load_ext autoreload
%autoreload 2

In [5]:
loader = DataLoader()
df = loader.LoadData()

  df = pd.concat([df, dft[self.columns]], ignore_index=True)


In [6]:
cnt = df.groupby('date').count()
cnt[cnt["open"] > 25]

Unnamed: 0_level_0,open,high,low,close,volume,tic
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1


In [7]:
df.shape

(25949, 7)

In [8]:
df.index = range(len(df))
#df = df.drop(df[df['tic'] == 'IMOEX'].index)
#df = df.drop(df[df['tic'] == 'BZ'].index)
#df = df.drop(df[df['tic'] == 'GD'].index)
df = df.drop(df[df['tic'] == 'USD'].index)

In [9]:
df['volume'] = pd.to_numeric(df['volume'])
df = df.astype({'volume' : 'double'})

In [10]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 23953 entries, 0 to 25947
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   date    23953 non-null  datetime64[ns]
 1   open    23953 non-null  float64       
 2   high    23953 non-null  float64       
 3   low     23953 non-null  float64       
 4   close   23953 non-null  float64       
 5   volume  23953 non-null  float64       
 6   tic     23953 non-null  object        
dtypes: datetime64[ns](1), float64(5), object(1)
memory usage: 1.5+ MB


In [11]:
fa = FeaturesAdder()
df0 = df.copy()
df = fa.Process(df)

100%|██████████| 1974/1974 [00:19<00:00, 101.36it/s]


In [12]:
df.tail()

Unnamed: 0,date,open,high,low,close,volume,tic,HL_rsi12_up,HL_rsi12_down,rsi12,obv,cov_list,cov_xtra,return_list
17761,2024-06-03,538.06,543.61,535.11,541.51,5116050.0,MTSS,31.029741,13.551594,24.02614,4.91933,[],"[0.7013046477435794, 0.6123415407147443, -0.19...",tic BZ GD GMKN IMOEX ...
17762,2024-06-03,1419.72,1434.32,1379.52,1407.92,2076053.0,NVTK,22.612618,17.001599,17.001599,1.026104,[],"[0.7013046477435794, 0.6123415407147443, -0.19...",tic BZ GD GMKN IMOEX ...
17763,2024-06-03,731.69,739.19,716.99,737.44,7536920.0,ROSN,40.545188,26.954882,34.850714,52.135294,[],"[0.7013046477435794, 0.6123415407147443, -0.19...",tic BZ GD GMKN IMOEX ...
17764,2024-06-03,411.87,413.87,403.37,409.32,61548130.0,SBER,60.360697,40.349783,40.349783,445.295899,[],"[0.7013046477435794, 0.6123415407147443, -0.19...",tic BZ GD GMKN IMOEX ...
17765,2024-06-03,34.23,34.685,32.15,33.88,153852300.0,SNGS,32.28291,23.582352,23.582352,207.700822,[],"[0.7013046477435794, 0.6123415407147443, -0.19...",tic BZ GD GMKN IMOEX ...


Проверяем корректность рассчитанных FinRL индикаторов

In [13]:
import talib as ta
tic_data = df0[df0['tic']=='SBER'].sort_values(by=["date"])
_high   = tic_data['high'].to_numpy()
_low    = tic_data['low'].to_numpy()
_close  = tic_data['close'].to_numpy()
_volume = tic_data['volume'].to_numpy()

In [14]:
rsi12 = ta.RSI(_close, timeperiod=fa.rsi_period)
obv = ta.OBV(_close, _volume) / 1e7

In [16]:
print(df.iloc[17764][['rsi12', 'obv']])
print(rsi12[-1], obv[-1])

rsi12     40.349783
obv      445.295899
Name: 17764, dtype: object
40.349782550662574 445.2958986


In [17]:
df['tic'].unique()

array(['GMKN', 'LKOH', 'MAGN', 'MM', 'MTSS', 'NVTK', 'ROSN', 'SBER',
       'SNGS'], dtype=object)

In [19]:
len(df.iloc[17368]['cov_xtra'])

27

In [20]:
df.index

Index([    0,     1,     2,     3,     4,     5,     6,     7,     8,     9,
       ...
       17756, 17757, 17758, 17759, 17760, 17761, 17762, 17763, 17764, 17765],
      dtype='int64', length=17766)

In [21]:
df.index = df.date.factorize()[0]

In [22]:
import numpy as np
import pandas as pd
from gym.utils import seeding
import gym
from gym import spaces
import matplotlib
import matplotlib.pyplot as plt
from stable_baselines3.common.vec_env import DummyVecEnv

In [23]:
stock_dimension = len(df.tic.unique())
state_space = stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 9, State Space: 9


In [24]:
env_kwargs = {
    "hmax": 20,
    "initial_amount": 1000000, 
    "transaction_cost_pct": 0.001, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": fa.indicators, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4,
    "cov_xtra_names": fa.cov_xtra_names
}

In [25]:
e_train_gym = PortfolioEnv(df = df, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()



Ручная проверка действия в среде

In [26]:
e_train_gym.day, e_train_gym.portfolio_value, e_train_gym.cash, e_train_gym.current_state

(230, 1000000, 1000000, array([0, 0, 0, 0, 0, 0, 0, 0, 0]))

In [27]:
act = e_train_gym.action_space.sample()
print(act)

[0.2652563  0.5192913  0.329495   0.45773244 0.87658024 0.60700244
 0.6961824  0.9369841  0.74715316]


In [28]:
close_prices = df.loc[e_train_gym.day]['close'].to_numpy()
state, reward, term, _ = e_train_gym.step(act)
reward, e_train_gym.portfolio_value, e_train_gym.cash, e_train_gym.current_state, e_train_gym.commission

(-0.008909245716712321,
 991090.7542832877,
 3838.157707945644,
 array([  64,   31, 1823,  762,  600,  160,  402, 1037, 4590]),
 995.1666756164379)

In [29]:
state, len(state)

(array([ 4.39192086e-03,  8.59804749e-03,  5.45553923e-03,  7.57880151e-03,
         1.45137787e-02,  1.00503051e-02,  1.15268826e-02,  1.55139017e-02,
         1.23708189e-02, -4.53852885e-03,  7.14469737e-04, -7.50700802e-03,
         3.99695102e-03, -1.21562021e-03, -3.96430438e-03, -1.25936130e-03,
        -7.84043258e-04, -4.55744715e-03,  3.97695251e-05,  1.70556947e-03,
        -2.70189398e-03, -2.51313657e-03,  4.14914079e-03, -2.70748867e-03,
         3.14910576e-03, -6.70200320e-03, -4.45712664e-03,  7.76940514e-03,
         4.94296767e-03,  7.07607217e-03, -5.46838854e-03,  5.51563443e-03,
         7.34434484e-03,  6.12380888e-03,  4.17888907e-03,  2.59416615e-03,
         4.01325354e-01,  4.56871878e-01,  3.86980332e-01,  1.00000000e+00,
         3.76505008e-01,  4.71669637e-01,  4.71738515e-01,  5.79428626e-01,
         6.21448394e-01,  3.28133225e-01,  4.10806447e-01,  2.77913435e-01,
         1.00000000e+00,  3.13880017e-01,  3.34885228e-01,  3.82728164e-01,
         3.8

In [30]:
sum(close_prices * e_train_gym.current_state) + e_train_gym.cash
reward, e_train_gym.cash

(-0.008909245716712321, 3838.157707945644)

In [31]:
e_train_gym.date_memory, e_train_gym.asset_memory

([Timestamp('2017-05-25 00:00:00'), Timestamp('2017-05-26 00:00:00')],
 [1000000, 991090.7542832877])

In [32]:
act2 = e_train_gym.action_space.sample()
print(act2)

[0.18991771 0.39101064 0.3148726  0.5051955  0.0128755  0.8839377
 0.18799774 0.10866006 0.9698709 ]


In [33]:
state, reward, term, _ = e_train_gym.step(act2)
reward, e_train_gym.portfolio_value, e_train_gym.cash, e_train_gym.current_state, e_train_gym.commission

(0.008506378286643839,
 999521.3471556166,
 2695.4551693157932,
 array([  69,   36, 2699, 1271,   13,  365,  165,  185, 8920]),
 743.1329495890409)

In [34]:
e_train_gym.total_commission

1738.2996252054788

Экспресс проверка что комиссия считается не криво. Поставим процент комиссии 10%. Первый день - покупаем равные доли. Второй день всё продаем. Ожидаем cash около 800_000. (Цены считаем примерно равными - всё ушло на комиссии)

In [35]:
e_train_gym_perc = PortfolioEnv(df = df, hmax = 20, initial_amount = 1000000, transaction_cost_pct = 0.1, 
    state_space = state_space, 
    stock_dim = stock_dimension, 
    tech_indicator_list = fa.indicators, 
    action_space = stock_dimension, 
    reward_scaling = 1e-4,
    cov_xtra_names = fa.cov_xtra_names)

In [36]:
e_train_gym_perc.cash

1000000

In [37]:
e_train_gym_perc.step(np.array([1] * stock_dimension))
e_train_gym_perc.cash, e_train_gym_perc.commission

(13121.121767123259, 89716.26165753425)

In [39]:
e_train_gym_perc.step(np.array([0] * stock_dimension))
e_train_gym_perc.cash, e_train_gym_perc.total_commission

(829831.4092054793, 180461.84915068495)

In [40]:
def get_result(df_daily_return):
    DRL_strat = convert_daily_return_to_pyfolio_ts(df_daily_return)
    perf_func = timeseries.perf_stats 
    return perf_func( returns=DRL_strat, 
                                factor_returns=DRL_strat, 
                                positions=None, transactions=None, turnover_denom="AGB")

In [41]:
from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise

In [42]:
def train_trade(alg, train, trade, optuna_params = None, log_name = None, size = None, trained_model = None, steps = 50_000):
    #env_kwargs['lookback'] = lookback
    e_train_gym = PortfolioEnv(df = train, **env_kwargs)
    env_train, _ = e_train_gym.get_sb_env()
    
    policy_kwargs = None
    if size != None:
        policy_kwargs = {
            "net_arch": dict(pi=size, vf=size, qf=size),
        }

    agent = DRLAgent(env = env_train)

    model_params = None
    if optuna_params != None:
        model_params = optuna_params

    if log_name is None:
        log_name = alg

    timesteps = steps
    if trained_model is None:
        model = agent.get_model(model_name = alg, model_kwargs = optuna_params, policy_kwargs = policy_kwargs, tensorboard_log = 'logs')
    else:
        model = trained_model
        timesteps = 25_000


    trained_model = agent.train_model(model = model, 
                                tb_log_name = log_name,
                                total_timesteps = timesteps)

    e_trade_gym2 = PortfolioEnv(df = trade, reset_to_zero=True, **env_kwargs)

    df_daily_return, df_actions = DRLAgent.DRL_prediction(model=trained_model,
                        environment = e_trade_gym2)
    
    return trained_model, e_trade_gym2.total_commission,  df_daily_return, df_actions

Поисследуем как показывают себя алгоритмы на истории. Будем брать период 12.2016-3.2023, сначала обучаем на всем промежутке алгоритм, затем дообучаем в течение 9ти месяцев (с 6.2023 по 3.2024. В данный период только первые 2 месяца рост, затем коррекции) - добавляем месяц в конце и убираем месяц в начале. Фиксируем результаты на тестовой выборке - следующий месяц за месяцем который добавили. Основная задача понять, как покажет себя каждый из алгоритмов в сравнении с индексом и безрисковым активом.

![alt text](imoex.jpg "Title2")

In [44]:
algs = ['a2c','ppo','ddpg','td3']

date_start = datetime.datetime(2023, 6, 1)

res_df = pd.DataFrame(columns=["alg", "month", "res", "dd", "sharp", "day_ret", "actions", "total_commission"])
for alg in algs:
  model = None
  for i in range(9):
    date0 = datetime.datetime(2016, 12, 1) + relativedelta(months = i)
    date1 = date_start + relativedelta(months = i)
    date2 = date_start + relativedelta(months = i + 1)

    train = data_split(df, date0, date1) #'2016-05-10', date1)
    trade = data_split(df, date1, date2)

    print(date1, date2)
    print(alg)
    log_name = alg + '_' + str(6+i)
    model, total_comm, df_daily_return, df_actions = train_trade(alg, train, trade, None, log_name=log_name, 
                                                                 size=[192,128], trained_model=None, steps=50_000)
    res11 = get_result(df_daily_return)

    print(alg, res11['Cumulative returns'], res11['Max drawdown'], res11['Sharpe ratio'], total_comm)
    res_df.loc[-1] = [alg, str(i), res11['Cumulative returns'], res11['Max drawdown'], res11['Sharpe ratio'], df_daily_return, df_actions, total_comm]
    res_df.index += 1

2023-06-01 00:00:00 2023-07-01 00:00:00
a2c
{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device
Logging to logs\a2c_6_1




-----------------------------------------
| time/                 |               |
|    fps                | 198           |
|    iterations         | 100           |
|    time_elapsed       | 2             |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -13.3         |
|    explained_variance | 0.788         |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -0.0984       |
|    reward             | -0.0035701783 |
|    std                | 1.07          |
|    value_loss         | 7.53e-05      |
-----------------------------------------
----------------------------------------
| time/                 |              |
|    fps                | 208          |
|    iterations         | 200          |
|    time_elapsed       | 4            |
|    total_timesteps    | 1000         |
| train/                |              |
|    entropy_loss       | -13.8        |




----------------------------------------
| time/                 |              |
|    fps                | 202          |
|    iterations         | 100          |
|    time_elapsed       | 2            |
|    total_timesteps    | 500          |
| train/                |              |
|    entropy_loss       | -13.4        |
|    explained_variance | -3.62        |
|    learning_rate      | 0.0007       |
|    n_updates          | 99           |
|    policy_loss        | -0.000842    |
|    reward             | 0.0073433686 |
|    std                | 1.07         |
|    value_loss         | 0.000542     |
----------------------------------------
begin_total_asset:1000000
end_total_asset:798866.6785616687
Sharpe:  -0.5256463481118399
---------------------------------------
| time/                 |             |
|    fps                | 205         |
|    iterations         | 200         |
|    time_elapsed       | 4           |
|    total_timesteps    | 1000        |
| train/       



----------------------------------------
| time/                 |              |
|    fps                | 197          |
|    iterations         | 100          |
|    time_elapsed       | 2            |
|    total_timesteps    | 500          |
| train/                |              |
|    entropy_loss       | -13.3        |
|    explained_variance | 0.169        |
|    learning_rate      | 0.0007       |
|    n_updates          | 99           |
|    policy_loss        | -0.148       |
|    reward             | -0.012526904 |
|    std                | 1.07         |
|    value_loss         | 0.000228     |
----------------------------------------
begin_total_asset:1000000
end_total_asset:796303.9360437705
Sharpe:  -0.3613103212819038
----------------------------------------
| time/                 |              |
|    fps                | 201          |
|    iterations         | 200          |
|    time_elapsed       | 4            |
|    total_timesteps    | 1000         |
| train/ 



begin_total_asset:1000000
end_total_asset:776771.9270014504
Sharpe:  -0.8209771206859762
------------------------------------------
| time/                 |                |
|    fps                | 189            |
|    iterations         | 100            |
|    time_elapsed       | 2              |
|    total_timesteps    | 500            |
| train/                |                |
|    entropy_loss       | -13.3          |
|    explained_variance | -3.19          |
|    learning_rate      | 0.0007         |
|    n_updates          | 99             |
|    policy_loss        | -0.229         |
|    reward             | -0.00084947306 |
|    std                | 1.06           |
|    value_loss         | 0.000526       |
------------------------------------------
---------------------------------------
| time/                 |             |
|    fps                | 201         |
|    iterations         | 200         |
|    time_elapsed       | 4           |
|    total_timesteps   



----------------------------------------
| time/                 |              |
|    fps                | 194          |
|    iterations         | 100          |
|    time_elapsed       | 2            |
|    total_timesteps    | 500          |
| train/                |              |
|    entropy_loss       | -13.3        |
|    explained_variance | 0.902        |
|    learning_rate      | 0.0007       |
|    n_updates          | 99           |
|    policy_loss        | -0.0946      |
|    reward             | 0.0044543575 |
|    std                | 1.06         |
|    value_loss         | 5.62e-05     |
----------------------------------------
begin_total_asset:1000000
end_total_asset:779195.2769937407
Sharpe:  -0.5180330392100552
begin_total_asset:1000000
end_total_asset:797291.2804219719
Sharpe:  -0.7908668377339146
----------------------------------------
| time/                 |              |
|    fps                | 197          |
|    iterations         | 200          |
| 



begin_total_asset:1000000
end_total_asset:741948.1506842737
Sharpe:  -4.514154841012512
begin_total_asset:1000000
end_total_asset:795560.7727912177
Sharpe:  -1.478704580756424
-----------------------------------------
| time/                 |               |
|    fps                | 192           |
|    iterations         | 100           |
|    time_elapsed       | 2             |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -13.3         |
|    explained_variance | -17.9         |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | 0.539         |
|    reward             | -0.0023018157 |
|    std                | 1.06          |
|    value_loss         | 0.00156       |
-----------------------------------------
begin_total_asset:1000000
end_total_asset:799250.6059894258
Sharpe:  -1.0801480013733815
begin_total_asset:1000000
end_total_asset:794787.1551866315
Sha



begin_total_asset:1000000
end_total_asset:777226.1856380822
Sharpe:  -0.40950666351458764
----------------------------------------
| time/                 |              |
|    fps                | 184          |
|    iterations         | 100          |
|    time_elapsed       | 2            |
|    total_timesteps    | 500          |
| train/                |              |
|    entropy_loss       | -13.4        |
|    explained_variance | -0.744       |
|    learning_rate      | 0.0007       |
|    n_updates          | 99           |
|    policy_loss        | -0.116       |
|    reward             | 0.0065000495 |
|    std                | 1.08         |
|    value_loss         | 9.52e-05     |
----------------------------------------
begin_total_asset:1000000
end_total_asset:797042.917356588
Sharpe:  -1.0063476233371842
-----------------------------------------
| time/                 |               |
|    fps                | 190           |
|    iterations         | 200           



begin_total_asset:1000000
end_total_asset:738394.2802152997
Sharpe:  -0.7001660823285369
---------------------------------------
| time/                 |             |
|    fps                | 196         |
|    iterations         | 100         |
|    time_elapsed       | 2           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -13.4       |
|    explained_variance | 0.476       |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | -0.0195     |
|    reward             | 0.005700258 |
|    std                | 1.08        |
|    value_loss         | 1.56e-05    |
---------------------------------------
begin_total_asset:1000000
end_total_asset:794805.1921220138
Sharpe:  -0.37836149357560683
----------------------------------------
| time/                 |              |
|    fps                | 199          |
|    iterations         | 200          |
|    time_elapsed



begin_total_asset:1000000
end_total_asset:796963.5207500691
Sharpe:  -2.3171790013519518
---------------------------------------
| time/                 |             |
|    fps                | 185         |
|    iterations         | 100         |
|    time_elapsed       | 2           |
|    total_timesteps    | 500         |
| train/                |             |
|    entropy_loss       | -13.4       |
|    explained_variance | 0.358       |
|    learning_rate      | 0.0007      |
|    n_updates          | 99          |
|    policy_loss        | 0.0957      |
|    reward             | 0.010208851 |
|    std                | 1.08        |
|    value_loss         | 6.41e-05    |
---------------------------------------
begin_total_asset:1000000
end_total_asset:773989.3958072999
Sharpe:  -0.9910311130100811
begin_total_asset:1000000
end_total_asset:788665.5934836046
Sharpe:  -1.390334171384877
---------------------------------------
| time/                 |             |
|    fps      



begin_total_asset:1000000
end_total_asset:789308.2397583023
Sharpe:  -0.64966156814319
begin_total_asset:1000000
end_total_asset:795241.2252154935
Sharpe:  -2.7519839069796483
begin_total_asset:1000000
end_total_asset:787322.8442804803
Sharpe:  -3.3018040012297196
begin_total_asset:1000000
end_total_asset:783720.9490600824
Sharpe:  -2.932015411975722
begin_total_asset:1000000
end_total_asset:794789.8740951102
Sharpe:  -2.4749954358215787
begin_total_asset:1000000
end_total_asset:795922.008565042
Sharpe:  -0.466225481621444
begin_total_asset:1000000
end_total_asset:764984.7819601644
Sharpe:  -7.796985461135016
begin_total_asset:1000000
end_total_asset:791438.129868315
Sharpe:  -8.524339976112975
begin_total_asset:1000000
end_total_asset:798029.2637009858
Sharpe:  -6.000909887231259
begin_total_asset:1000000
end_total_asset:796018.9810427403
Sharpe:  -8.901934782991452
begin_total_asset:1000000
end_total_asset:747942.1752486569
Sharpe:  -4.813520682899748
--------------------------------



begin_total_asset:1000000
end_total_asset:787875.6968949193
Sharpe:  -3.2420782750162465
begin_total_asset:1000000
end_total_asset:788001.393845683
Sharpe:  -5.255690578774139
begin_total_asset:1000000
end_total_asset:797862.6925724247
Sharpe:  -2.871212673105675
begin_total_asset:1000000
end_total_asset:788836.0882705785
Sharpe:  -0.4226452178952288
begin_total_asset:1000000
end_total_asset:795909.2801421244
Sharpe:  -2.46794939960331
begin_total_asset:1000000
end_total_asset:795250.3977415758
Sharpe:  -7.417747355498231
begin_total_asset:1000000
end_total_asset:798956.167429316
Sharpe:  -6.520466800006239
begin_total_asset:1000000
end_total_asset:723783.1910876043
Sharpe:  -2.870364941345783
begin_total_asset:1000000
end_total_asset:797755.3214318492
Sharpe:  -2.908294026131419
begin_total_asset:1000000
end_total_asset:780721.6221303828
Sharpe:  -1.5359730613444502
begin_total_asset:1000000
end_total_asset:774585.5210213969
Sharpe:  -1.884560246871847
--------------------------------



begin_total_asset:1000000
end_total_asset:787245.7160863467
Sharpe:  -0.4180577839854577
begin_total_asset:1000000
end_total_asset:798479.2447680259
Sharpe:  -2.6680484656343237
begin_total_asset:1000000
end_total_asset:796849.7470672479
Sharpe:  -3.0620129930994233
begin_total_asset:1000000
end_total_asset:789046.3847836278
Sharpe:  -1.6309018716937405
begin_total_asset:1000000
end_total_asset:775643.9757711228
Sharpe:  -4.866698190162445
begin_total_asset:1000000
end_total_asset:781019.2758875898
Sharpe:  -2.035123808960977
begin_total_asset:1000000
end_total_asset:790574.6385795074
Sharpe:  -1.3199628578777096
begin_total_asset:1000000
end_total_asset:784900.7755111234
Sharpe:  -1.833421345014585
-------------------------------------
| time/              |              |
|    fps             | 286          |
|    iterations      | 1            |
|    time_elapsed    | 7            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0



begin_total_asset:1000000
end_total_asset:781511.2582744929
Sharpe:  -1.5351716633504755
begin_total_asset:1000000
end_total_asset:795459.8120893707
Sharpe:  -4.644448736606585
begin_total_asset:1000000
end_total_asset:780544.3542070965
Sharpe:  -2.762394022238843
begin_total_asset:1000000
end_total_asset:793642.0632149288
Sharpe:  -0.6672842907097866
begin_total_asset:1000000
end_total_asset:788513.4139022343
Sharpe:  -4.063736232069709
begin_total_asset:1000000
end_total_asset:799627.5767683291
Sharpe:  -1.5209106669546135
begin_total_asset:1000000
end_total_asset:798738.3256620294
Sharpe:  -1.478144901412074
-------------------------------------
| time/              |              |
|    fps             | 271          |
|    iterations      | 1            |
|    time_elapsed    | 7            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0.024132436 |
-------------------------------------
begin_total_asset:1000000
end_total_ass



begin_total_asset:1000000
end_total_asset:781323.4191949735
Sharpe:  -2.160874889059791
begin_total_asset:1000000
end_total_asset:783585.6128900292
Sharpe:  -1.1766155076595117
begin_total_asset:1000000
end_total_asset:798945.4431241945
Sharpe:  -0.920738100274049
begin_total_asset:1000000
end_total_asset:759560.8151536955
Sharpe:  -0.5863769646917951
begin_total_asset:1000000
end_total_asset:798349.883803754
Sharpe:  -1.8798944376782218
begin_total_asset:1000000
end_total_asset:792285.8473770961
Sharpe:  -0.9651170074170559
begin_total_asset:1000000
end_total_asset:794455.5183055068
Sharpe:  -7.228964953807152
--------------------------------------
| time/              |               |
|    fps             | 279           |
|    iterations      | 1             |
|    time_elapsed    | 7             |
|    total_timesteps | 2048          |
| train/             |               |
|    reward          | -0.0010832635 |
--------------------------------------
begin_total_asset:1000000
end_



begin_total_asset:1000000
end_total_asset:795208.8920754938
Sharpe:  -3.383961460016732
begin_total_asset:1000000
end_total_asset:798992.7534087265
Sharpe:  -0.9134464535297381
begin_total_asset:1000000
end_total_asset:799142.0284568636
Sharpe:  -1.7285116745391476
begin_total_asset:1000000
end_total_asset:797139.9042121781
Sharpe:  -1.524292886166936
begin_total_asset:1000000
end_total_asset:776017.2998384259
Sharpe:  -1.48956856935942
begin_total_asset:1000000
end_total_asset:792325.9504376983
Sharpe:  -0.5498703210228194
begin_total_asset:1000000
end_total_asset:766139.0974174397
Sharpe:  -0.6682717968178269
--------------------------------------
| time/              |               |
|    fps             | 280           |
|    iterations      | 1             |
|    time_elapsed    | 7             |
|    total_timesteps | 2048          |
| train/             |               |
|    reward          | -0.0141173825 |
--------------------------------------
begin_total_asset:1000000
end_



begin_total_asset:1000000
end_total_asset:793494.8737477969
Sharpe:  -1.3120208321812619
begin_total_asset:1000000
end_total_asset:757621.5976433589
Sharpe:  -1.1322391468837856
begin_total_asset:1000000
end_total_asset:797954.0907369846
Sharpe:  -2.1337379563658274
begin_total_asset:1000000
end_total_asset:796691.578146479
Sharpe:  -7.8103040663074355
begin_total_asset:1000000
end_total_asset:797576.7516577684
Sharpe:  -1.9665069903641506
begin_total_asset:1000000
end_total_asset:799155.3648966732
Sharpe:  -0.4886391107688825
begin_total_asset:1000000
end_total_asset:798526.8883099327
Sharpe:  -4.1226621259682465
begin_total_asset:1000000
end_total_asset:783191.5032773409
Sharpe:  -1.6464844151394855
-------------------------------------
| time/              |              |
|    fps             | 276          |
|    iterations      | 1            |
|    time_elapsed    | 7            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | 



begin_total_asset:1000000
end_total_asset:796156.4860485063
Sharpe:  -1.6295113191780854
begin_total_asset:1000000
end_total_asset:756830.2687113257
Sharpe:  -1.1389891209792102
begin_total_asset:1000000
end_total_asset:797245.1177328757
Sharpe:  -1.0696562501063847
begin_total_asset:1000000
end_total_asset:799943.7452095194
Sharpe:  -0.9149209890222899
begin_total_asset:1000000
end_total_asset:770572.2840569166
Sharpe:  -1.443425794524224
begin_total_asset:1000000
end_total_asset:791686.3184990692
Sharpe:  -2.2402589498919863
begin_total_asset:1000000
end_total_asset:785921.3159704531
Sharpe:  -1.5044679222952624
begin_total_asset:1000000
end_total_asset:794488.4091262211
Sharpe:  -1.3844679725726758
-------------------------------------
| time/              |              |
|    fps             | 291          |
|    iterations      | 1            |
|    time_elapsed    | 7            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | 



begin_total_asset:1000000
end_total_asset:794917.4135145908
Sharpe:  -1.9446456013807565
begin_total_asset:1000000
end_total_asset:780184.4720655621
Sharpe:  -1.3663692068430202
begin_total_asset:1000000
end_total_asset:787197.2163183722
Sharpe:  -0.324048658212019
begin_total_asset:1000000
end_total_asset:768872.9743715362
Sharpe:  -0.8524096534982865
begin_total_asset:1000000
end_total_asset:797026.0190218077
Sharpe:  -2.0479742690867986
begin_total_asset:1000000
end_total_asset:799809.3684216171
Sharpe:  -2.5830482690978602
begin_total_asset:1000000
end_total_asset:755712.8395284384
Sharpe:  -2.8353124221927315
-------------------------------------
| time/              |              |
|    fps             | 284          |
|    iterations      | 1            |
|    time_elapsed    | 7            |
|    total_timesteps | 2048         |
| train/             |              |
|    reward          | -0.010672796 |
-------------------------------------
begin_total_asset:1000000
end_total_



begin_total_asset:1000000
end_total_asset:1216778.4445977947
Sharpe:  0.3517772330916938
begin_total_asset:1000000
end_total_asset:1404675.2819047482
Sharpe:  0.5313780272411285
begin_total_asset:1000000
end_total_asset:1987206.9636800983
Sharpe:  0.7064491893788779
begin_total_asset:1000000
end_total_asset:2071809.9003579917
Sharpe:  0.7126967220380616
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 82           |
|    time_elapsed    | 58           |
|    total_timesteps | 4808         |
| train/             |              |
|    actor_loss      | -0.134       |
|    critic_loss     | 3.29e-05     |
|    learning_rate   | 0.001        |
|    n_updates       | 4707         |
|    reward          | 0.0052700937 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1604148.265077006
Sharpe:  0.6470347521105924
begin_total_asset:1000000
end_total_asset:1810285.3506180018
S



begin_total_asset:1000000
end_total_asset:2026473.3507608464
Sharpe:  0.8625248336216041
begin_total_asset:1000000
end_total_asset:1584334.6064742075
Sharpe:  0.88034834822125
begin_total_asset:1000000
end_total_asset:2578525.111507023
Sharpe:  0.973504174245351
begin_total_asset:1000000
end_total_asset:1703519.5564590148
Sharpe:  0.6371333637933039
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 80           |
|    time_elapsed    | 62           |
|    total_timesteps | 5030         |
| train/             |              |
|    actor_loss      | -0.0346      |
|    critic_loss     | 6.9e-05      |
|    learning_rate   | 0.001        |
|    n_updates       | 4929         |
|    reward          | 0.0006312185 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:2070776.9759000165
Sharpe:  0.7359431133951472
begin_total_asset:1000000
end_total_asset:2164991.390751985
Sharp



begin_total_asset:1000000
end_total_asset:1683913.1104084584
Sharpe:  0.6603840423881485
begin_total_asset:1000000
end_total_asset:1805188.0679997099
Sharpe:  0.7151688792530829
begin_total_asset:1000000
end_total_asset:2833253.8884228696
Sharpe:  1.0828097620828372
begin_total_asset:1000000
end_total_asset:2729119.992099379
Sharpe:  0.9788433677982792
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 84          |
|    time_elapsed    | 67          |
|    total_timesteps | 5691        |
| train/             |             |
|    actor_loss      | -0.157      |
|    critic_loss     | 6.16e-05    |
|    learning_rate   | 0.001       |
|    n_updates       | 5590        |
|    reward          | 0.014579635 |
------------------------------------
begin_total_asset:1000000
end_total_asset:1954373.5565234225
Sharpe:  0.785261867045452
begin_total_asset:1000000
end_total_asset:3605545.934204879
Sharpe:  1.21905



begin_total_asset:1000000
end_total_asset:1439279.057528724
Sharpe:  0.6826035408679674
begin_total_asset:1000000
end_total_asset:2119673.0534929796
Sharpe:  0.6871677437673037
begin_total_asset:1000000
end_total_asset:2157595.492051969
Sharpe:  0.6989383540827286
begin_total_asset:1000000
end_total_asset:1912795.973229015
Sharpe:  0.6777249778052865
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 80           |
|    time_elapsed    | 66           |
|    total_timesteps | 5350         |
| train/             |              |
|    actor_loss      | -0.155       |
|    critic_loss     | 6.29e-05     |
|    learning_rate   | 0.001        |
|    n_updates       | 5249         |
|    reward          | 0.0015876141 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1558549.1105479845
Sharpe:  0.7015053844734702
begin_total_asset:1000000
end_total_asset:758471.6991540001
Shar



begin_total_asset:1000000
end_total_asset:1795713.3339357076
Sharpe:  1.0192449947279638
begin_total_asset:1000000
end_total_asset:2045100.4549830158
Sharpe:  1.0449444379274073
begin_total_asset:1000000
end_total_asset:2021303.866851598
Sharpe:  1.1250886020593591
begin_total_asset:1000000
end_total_asset:2367277.8039864874
Sharpe:  1.2198447768967378
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 81            |
|    time_elapsed    | 62            |
|    total_timesteps | 5096          |
| train/             |               |
|    actor_loss      | 0.0438        |
|    critic_loss     | 3.15e-05      |
|    learning_rate   | 0.001         |
|    n_updates       | 4995          |
|    reward          | 0.00031314313 |
--------------------------------------
begin_total_asset:1000000
end_total_asset:1829946.7187683275
Sharpe:  1.1425213943690595
begin_total_asset:1000000
end_total_asset:1700728



begin_total_asset:1000000
end_total_asset:2264534.273378567
Sharpe:  0.9360172365932521
begin_total_asset:1000000
end_total_asset:2002402.2274850095
Sharpe:  0.9255048394530698
begin_total_asset:1000000
end_total_asset:3359660.2396830274
Sharpe:  1.240585763259209
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 82           |
|    time_elapsed    | 51           |
|    total_timesteps | 4231         |
| train/             |              |
|    actor_loss      | -0.0396      |
|    critic_loss     | 3.1e-05      |
|    learning_rate   | 0.001        |
|    n_updates       | 4130         |
|    reward          | -0.008036797 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1639640.2523000147
Sharpe:  0.7897418398131174
begin_total_asset:1000000
end_total_asset:785788.7948589998
Sharpe:  -6.449239574087921
begin_total_asset:1000000
end_total_asset:1978362.883040016
Shar



begin_total_asset:1000000
end_total_asset:1886312.7388768692
Sharpe:  0.6727160414146316
begin_total_asset:1000000
end_total_asset:1791053.4935837528
Sharpe:  0.5887785014398688
begin_total_asset:1000000
end_total_asset:1405813.007408091
Sharpe:  0.7022088247709946
begin_total_asset:1000000
end_total_asset:2177374.121318988
Sharpe:  0.9869986135048808
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 81           |
|    time_elapsed    | 63           |
|    total_timesteps | 5211         |
| train/             |              |
|    actor_loss      | -0.0668      |
|    critic_loss     | 2.96e-05     |
|    learning_rate   | 0.001        |
|    n_updates       | 5110         |
|    reward          | -0.003402118 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:2650512.3845375143
Sharpe:  1.078110439161789
begin_total_asset:1000000
end_total_asset:1828663.9177416426
Sha



begin_total_asset:1000000
end_total_asset:2171704.74727181
Sharpe:  0.837681799256763
begin_total_asset:1000000
end_total_asset:1600407.2063310128
Sharpe:  0.6700577864169354
begin_total_asset:1000000
end_total_asset:750101.9914330001
Sharpe:  -6.154671219582431
begin_total_asset:1000000
end_total_asset:779873.5886939995
Sharpe:  -4.570989263255152
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 84          |
|    time_elapsed    | 30          |
|    total_timesteps | 2600        |
| train/             |             |
|    actor_loss      | -0.0072     |
|    critic_loss     | 3.19e-05    |
|    learning_rate   | 0.001       |
|    n_updates       | 2499        |
|    reward          | -0.06627889 |
------------------------------------
begin_total_asset:1000000
end_total_asset:2209553.997010013
Sharpe:  0.7535200784307792
begin_total_asset:1000000
end_total_asset:2087928.01324
Sharpe:  0.7067649114033



begin_total_asset:1000000
end_total_asset:784997.2279132885
Sharpe:  -0.48296198906735593
begin_total_asset:1000000
end_total_asset:1953830.4066347792
Sharpe:  0.8672588340674169
begin_total_asset:1000000
end_total_asset:1481425.568240796
Sharpe:  0.7543385978860152
begin_total_asset:1000000
end_total_asset:1881527.6478669886
Sharpe:  0.6789589256798512
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 83           |
|    time_elapsed    | 49           |
|    total_timesteps | 4154         |
| train/             |              |
|    actor_loss      | -0.099       |
|    critic_loss     | 4.51e-05     |
|    learning_rate   | 0.001        |
|    n_updates       | 4053         |
|    reward          | 0.0021680298 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:794739.3007409998
Sharpe:  -2.8460343770713767
begin_total_asset:1000000
end_total_asset:1710816.569599096
S



begin_total_asset:1000000
end_total_asset:1592044.79575092
Sharpe:  0.6002158674460654
begin_total_asset:1000000
end_total_asset:2068884.5680423747
Sharpe:  0.7390542315002172
begin_total_asset:1000000
end_total_asset:2362844.4651385443
Sharpe:  0.8896735899485757
begin_total_asset:1000000
end_total_asset:2004054.4010450605
Sharpe:  0.8733321776279999
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 83          |
|    time_elapsed    | 63          |
|    total_timesteps | 5323        |
| train/             |             |
|    actor_loss      | 0.0382      |
|    critic_loss     | 0.000128    |
|    learning_rate   | 0.001       |
|    n_updates       | 5222        |
|    reward          | 0.004941263 |
------------------------------------
begin_total_asset:1000000
end_total_asset:2255934.9647177174
Sharpe:  0.8863714364335049
begin_total_asset:1000000
end_total_asset:2472982.4910275517
Sharpe:  0.9358



begin_total_asset:1000000
end_total_asset:2069642.2858984137
Sharpe:  0.5919604963227217
begin_total_asset:1000000
end_total_asset:1728001.7460450141
Sharpe:  0.5399062937747321
begin_total_asset:1000000
end_total_asset:748538.1174750003
Sharpe:  -3.9491122431097003
begin_total_asset:1000000
end_total_asset:1993221.203074987
Sharpe:  0.6026676374890025
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 89            |
|    time_elapsed    | 48            |
|    total_timesteps | 4332          |
| train/             |               |
|    actor_loss      | -0.00671      |
|    critic_loss     | 0.000382      |
|    learning_rate   | 0.001         |
|    n_updates       | 4231          |
|    reward          | -0.0011992551 |
--------------------------------------
begin_total_asset:1000000
end_total_asset:739133.7795949996
Sharpe:  -4.398226651383125
begin_total_asset:1000000
end_total_asset:1547003.



begin_total_asset:1000000
end_total_asset:1955768.695117788
Sharpe:  0.8811259096013511
begin_total_asset:1000000
end_total_asset:1895933.1658006727
Sharpe:  0.7535858122602772
begin_total_asset:1000000
end_total_asset:1867251.573949986
Sharpe:  0.7618063126705312
begin_total_asset:1000000
end_total_asset:797018.712405
Sharpe:  -1.6446706266269258
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 96           |
|    time_elapsed    | 34           |
|    total_timesteps | 3330         |
| train/             |              |
|    actor_loss      | 0.0411       |
|    critic_loss     | 0.000274     |
|    learning_rate   | 0.001        |
|    n_updates       | 3229         |
|    reward          | -0.050753627 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:3086961.404379973
Sharpe:  0.9598120123650307
begin_total_asset:1000000
end_total_asset:1951653.0742849843
Sharpe:



begin_total_asset:1000000
end_total_asset:1468630.144779627
Sharpe:  0.6336571890186659
begin_total_asset:1000000
end_total_asset:1699640.3690716345
Sharpe:  0.7751252966495287
begin_total_asset:1000000
end_total_asset:1725909.749622684
Sharpe:  0.821575614229879
begin_total_asset:1000000
end_total_asset:1559754.5097911633
Sharpe:  0.7987896381271132
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 95            |
|    time_elapsed    | 46            |
|    total_timesteps | 4426          |
| train/             |               |
|    actor_loss      | 0.0797        |
|    critic_loss     | 0.000285      |
|    learning_rate   | 0.001         |
|    n_updates       | 4325          |
|    reward          | -0.0010558164 |
--------------------------------------
begin_total_asset:1000000
end_total_asset:1616406.1412340656
Sharpe:  0.6011938193009639
begin_total_asset:1000000
end_total_asset:1911343.5



begin_total_asset:1000000
end_total_asset:1249834.6373251325
Sharpe:  0.4614243372439227
begin_total_asset:1000000
end_total_asset:1977577.3754313525
Sharpe:  0.8908946977966632
begin_total_asset:1000000
end_total_asset:1669122.6648784801
Sharpe:  0.7426016899585163
begin_total_asset:1000000
end_total_asset:2433196.449305669
Sharpe:  1.023954000975163
-----------------------------------
| time/              |            |
|    episodes        | 4          |
|    fps             | 92         |
|    time_elapsed    | 47         |
|    total_timesteps | 4397       |
| train/             |            |
|    actor_loss      | -0.0366    |
|    critic_loss     | 7.25e-05   |
|    learning_rate   | 0.001      |
|    n_updates       | 4296       |
|    reward          | 0.02282304 |
-----------------------------------
begin_total_asset:1000000
end_total_asset:1945250.6212059043
Sharpe:  0.6528991654996096
begin_total_asset:1000000
end_total_asset:1589376.575466647
Sharpe:  1.0075771758129524
b



begin_total_asset:1000000
end_total_asset:1746715.7453543022
Sharpe:  0.685888912121198
begin_total_asset:1000000
end_total_asset:1746491.486893864
Sharpe:  0.6510940948457865
begin_total_asset:1000000
end_total_asset:2374283.9151999857
Sharpe:  0.7707131515248403
begin_total_asset:1000000
end_total_asset:1787059.970980018
Sharpe:  0.6400099770072681
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 93           |
|    time_elapsed    | 55           |
|    total_timesteps | 5218         |
| train/             |              |
|    actor_loss      | 0.0596       |
|    critic_loss     | 0.00047      |
|    learning_rate   | 0.001        |
|    n_updates       | 5117         |
|    reward          | -0.008524538 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:740029.5242949999
Sharpe:  -11.340640105907415
begin_total_asset:1000000
end_total_asset:2287318.07714998
Sharp



begin_total_asset:1000000
end_total_asset:1649404.2888708697
Sharpe:  0.6839187872498541
begin_total_asset:1000000
end_total_asset:2183240.7708093286
Sharpe:  0.827549021825734
begin_total_asset:1000000
end_total_asset:2151537.726329974
Sharpe:  0.8411706664339936
begin_total_asset:1000000
end_total_asset:2590666.5531089283
Sharpe:  0.9632669598524877
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 88           |
|    time_elapsed    | 56           |
|    total_timesteps | 5003         |
| train/             |              |
|    actor_loss      | 0.0148       |
|    critic_loss     | 0.000251     |
|    learning_rate   | 0.001        |
|    n_updates       | 4902         |
|    reward          | 0.0004258195 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:2452256.172128155
Sharpe:  1.035909729572098
begin_total_asset:1000000
end_total_asset:1701328.3151275564
Shar



begin_total_asset:1000000
end_total_asset:2334608.102262916
Sharpe:  0.8808533589003149
begin_total_asset:1000000
end_total_asset:2531180.9155492354
Sharpe:  0.8920819379848423
begin_total_asset:1000000
end_total_asset:1320479.407445552
Sharpe:  0.4111889669171875
begin_total_asset:1000000
end_total_asset:1774137.9043643323
Sharpe:  0.7540546379021815
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 93            |
|    time_elapsed    | 55            |
|    total_timesteps | 5144          |
| train/             |               |
|    actor_loss      | 0.0608        |
|    critic_loss     | 0.00011       |
|    learning_rate   | 0.001         |
|    n_updates       | 5043          |
|    reward          | 0.00046048503 |
--------------------------------------
begin_total_asset:1000000
end_total_asset:1831607.8163739168
Sharpe:  0.7596486500182937
begin_total_asset:1000000
end_total_asset:2020534.



begin_total_asset:1000000
end_total_asset:2222269.845666938
Sharpe:  0.9851444854122938
begin_total_asset:1000000
end_total_asset:794824.2472055347
Sharpe:  -5.527437936025392
begin_total_asset:1000000
end_total_asset:1756088.2066552213
Sharpe:  0.6979771787808554
begin_total_asset:1000000
end_total_asset:2766614.825092627
Sharpe:  1.0635177606711863
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 84           |
|    time_elapsed    | 52           |
|    total_timesteps | 4424         |
| train/             |              |
|    actor_loss      | -0.0511      |
|    critic_loss     | 0.000174     |
|    learning_rate   | 0.001        |
|    n_updates       | 4323         |
|    reward          | 0.0024522818 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:1619054.2246902063
Sharpe:  0.7162910330427218
begin_total_asset:1000000
end_total_asset:1765682.340396706
Shar

In [45]:
res_df.to_csv('50_9month_res_commiss.csv', index=False)

In [46]:
res_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 36 entries, 35 to 0
Data columns (total 8 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   alg               36 non-null     object 
 1   month             36 non-null     object 
 2   res               36 non-null     float64
 3   dd                36 non-null     float64
 4   sharp             36 non-null     float64
 5   day_ret           36 non-null     object 
 6   actions           36 non-null     object 
 7   total_commission  36 non-null     int64  
dtypes: float64(3), int64(1), object(4)
memory usage: 2.5+ KB


In [47]:
res_df['month'] = pd.to_numeric(res_df['month'])

Посчитаем, какую доходность за эти же промежутки времени имели индекс и фонд

In [48]:
df2 = loader.LoadData()
imoex = df2[df2['tic'] == 'IMOEX']
imoex.index = imoex['date']
grouped = imoex.groupby(pd.Grouper(freq='ME')).first()[['date','close']]
grouped.index = range(len(grouped))
grouped['change'] = grouped['close'].pct_change().shift(-1)

mm = df2[df2['tic'] == 'MM']
mm.index = mm['date']
grouped2 = mm.groupby(pd.Grouper(freq='ME')).first()[['date','close']]
grouped2.index = range(len(grouped2))
grouped['change_MM'] = grouped2['close'].pct_change().shift(-1)

  df = pd.concat([df, dft[self.columns]], ignore_index=True)


In [49]:
gr = imoex.groupby(pd.Grouper(freq='ME'))
max_dd = []
for name, group in gr:
    Roll_Max = group['close'].rolling(22, min_periods=1).max()
    Daily_Drawdown = group['close']/Roll_Max - 1.0
    Max_Daily_Drawdown = Daily_Drawdown.rolling(22, min_periods=1).min()    
    max_dd.append(Max_Daily_Drawdown.min())
grouped['max_dd'] = max_dd
grouped['max_dd_MM'] = 0

In [50]:
tested_period = grouped.tail(13).head(9) #c 3.2023 и дальше
tested_period.index = range(9)

In [51]:
tested_period

Unnamed: 0,date,close,change,change_MM,max_dd,max_dd_MM
0,2023-06-01,2721.73,0.026527,0.004245,-0.022052,0
1,2023-07-03,2793.93,0.107272,0.004025,-0.012537,0
2,2023-08-01,3093.64,0.044514,0.005728,-0.033608,0
3,2023-09-01,3231.35,-0.030644,0.006785,-0.0764,0
4,2023-10-02,3132.33,0.023685,0.00693,-0.020892,0
5,2023-11-01,3206.52,-0.020031,0.007743,-0.025392,0
6,2023-12-01,3142.29,-0.003838,0.008776,-0.042469,0
7,2024-01-03,3130.23,0.031838,0.007853,-0.00863,0
8,2024-02-01,3229.89,0.011384,0.007792,-0.038623,0


Сравним доходности индекса, фонда и каждого из алгоритмов

In [52]:
all_returns = {'imoex' : 0}
all_returns['mm'] = tested_period['change_MM'].values - tested_period['change'].values
for alg in algs:
    all_returns[alg] = res_df[res_df['alg'] == alg].sort_values(by=['month'])['res'].values - tested_period['change'].values

In [53]:
all_returns_df = pd.DataFrame(all_returns)

In [55]:
%matplotlib qt

In [56]:
fig, ax = plt.subplots()
bplot = ax.boxplot(all_returns_df.values)  # will be used to label x-ticks
ax.set_xticklabels(all_returns_df.columns)
plt.show()

![alt text](returns5.png "Title2")

Видно, что доходности по фонду и алгоритмам в среднем ниже доходности по индексу. Более-менее сопостовима с индексом доходность TD3 (и всё стабильно - без выбросов)

In [57]:
all_drawdown = {'imoex' : tested_period['max_dd']}
all_drawdown['mm'] = 0
for alg in algs:
    all_drawdown[alg] = res_df[res_df['alg'] == alg].sort_values(by=['month'])['dd'].values

In [58]:
all_drawdown_df = pd.DataFrame(all_drawdown)

In [59]:
fig, ax = plt.subplots()
bplot = ax.boxplot(all_drawdown_df.values)  # will be used to label x-ticks
ax.set_xticklabels(all_drawdown_df.columns)
plt.show()

Посмотрим максимальные просадки на месяцах, на которых проводили тестирование

![alt text](max_dd5.png "Title2")

Доходность за 9 месяцев и средняя просадка по индексу

In [89]:
np.round(np.prod(1 + tested_period['change'].to_numpy()), 2), np.round(np.average(tested_period['max_dd'].to_numpy()), 3)

(1.2, -0.031)

Доходность за 9 месяцев и средняя просадка по фонду

In [90]:
np.round(np.prod(1 + tested_period['change_MM'].to_numpy()), 2), np.round(np.average(tested_period['max_dd_MM'].to_numpy()), 3)

(1.06, 0.0)

In [85]:
for name, group in res_df.groupby(by=['alg']):
    print(name[0], np.round(np.prod(group['res']+1),2), np.round(np.average(group['dd']), 3), np.round(np.average(group['sharp']), 2))

a2c 1.11 -0.023 1.84
ddpg 1.09 -0.026 1.15
ppo 1.07 -0.02 1.04
td3 1.21 -0.024 2.45


При примерно той же доходности для TD3 видим, что средняя просадка меньше на 0.5%. Выбираю для дальнейшей работы TD3

In [61]:
res_df[res_df['alg'] == 'td3']

Unnamed: 0,alg,month,res,dd,sharp,day_ret,actions,total_commission
8,td3,0,0.050837,-0.018555,5.587266,date daily_return 0 2023-06-01 ...,date ...,0
7,td3,1,0.101314,-0.016303,11.326173,date daily_return 0 2023-07-03 ...,date ...,0
6,td3,2,0.041977,-0.015613,4.696443,date daily_return 0 2023-08-01 ...,date ...,0
5,td3,3,-0.013364,-0.053834,-1.407984,date daily_return 0 2023-09-01 ...,date ...,0
4,td3,4,0.032729,-0.015848,4.047729,date daily_return 0 2023-10-02 ...,date ...,0
3,td3,5,-0.018197,-0.023854,-2.589071,date daily_return 0 2023-11-01 ...,date ...,0
2,td3,6,0.013922,-0.033613,1.299756,date daily_return 0 2023-12-01 ...,date ...,0
1,td3,7,0.005074,-0.00514,2.240755,date daily_return 0 2024-01-03 ...,date ...,0
0,td3,8,-0.015104,-0.03017,-3.15494,date daily_return 0 2024-02-01 ...,date ...,0


In [75]:
loader2 = DataLoader()
df2 = loader2.LoadData()

df2.index = range(len(df2))
#df = df.drop(df[df['tic'] == 'IMOEX'].index)
#df2 = df2.drop(df2[df2['tic'] == 'BZ'].index)
#df = df.drop(df[df['tic'] == 'GD'].index)
df2 = df2.drop(df2[df2['tic'] == 'USD'].index)

df2 = df2.astype({'volume' : 'double'})

fa2 = FeaturesAdder(22)
df2 = fa2.Process(df2)
df2.index = df2.date.factorize()[0]

i = 0
date02 = datetime.datetime(2016, 12, 1) + relativedelta(months = i)
date12 = date_start + relativedelta(months = i)
date22 = date_start + relativedelta(months = i + 1)

train2 = data_split(df2, date02, date12) #'2016-05-10', date1)
trade2 = data_split(df2, date12, date22)

  df = pd.concat([df, dft[self.columns]], ignore_index=True)
100%|██████████| 1974/1974 [00:20<00:00, 97.26it/s] 


Попробуем поварьировать параметры. Будем осуществлять подбор на месяце, который показал худший результат по просадке (09.23 - month=3)

Чтобы в случайный процесс Орнштейна-Уленбека (в контексте FinRL) можно было передать сигму - немного подправил код библиотеки. Зашитый 0.1 исправил на значение параметра

![alt text](action_noise0.JPG "Title2")
![alt text](action_noise20.JPG "Title2")

In [63]:
import optuna

In [78]:
def objective_noise(trial):
    #size = trial.suggest_categorical('size', [[128, 64], [192,128], [256, 128]])

    sigma = trial.suggest_categorical('sigma',[0.1, 0.3, 0.5, 0.7, 1])
    noise = trial.suggest_categorical("noise", ["ornstein_uhlenbeck", "normal"])
    #gamma = trial.suggest_categorical('gamma',[0.99, 0.95])

    #learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-3, log=True)

    sum_reward = 0
    sum_maxdd  = 0
    #optuna_params = {"learning_rate": learning_rate, "target_policy_noise": sigma, "action_noise": noise, "gamma": gamma}
    optuna_params = {"target_policy_noise": sigma, "action_noise": noise}
    log_name = "objective_noise" + '_' + noise + str(sigma) + '2_' + str(i)
    model, commission, df_daily_return, df_actions = train_trade('td3', train2, trade2, optuna_params, log_name, [192,128])
    
    result = get_result(df_daily_return)
    reward = result['Cumulative returns']
    max_dd = result['Max drawdown']
    sharp  = result['Sharpe ratio']

    print("Rew, DD, Sh, commiss", reward, max_dd, sharp, commission)
    sum_reward += reward
    sum_maxdd += max_dd

    print("Total", sum_reward, sum_maxdd, sharp)
    return sum_reward, max_dd

study = optuna.create_study(directions=["maximize", "maximize"])
study.optimize(objective_noise, n_trials=10)

[I 2024-06-28 14:55:59,077] A new study created in memory with name: no-name-b11c89c1-c1da-4adf-a639-3dd2998eab92


{'target_policy_noise': 0.5, 'action_noise': NormalActionNoise(mu=[0. 0. 0. 0. 0. 0. 0. 0. 0.], sigma=[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5])}
Using cuda device
Logging to logs\objective_noise_normal0.52_0_1




begin_total_asset:1000000
end_total_asset:1148657.143302988
Sharpe:  0.2739799154024772
begin_total_asset:1000000
end_total_asset:1539434.9767265355
Sharpe:  0.5431110923985152
begin_total_asset:1000000
end_total_asset:797105.1235808517
Sharpe:  -0.25327214324873915
begin_total_asset:1000000
end_total_asset:781437.2479220722
Sharpe:  -0.27041547355497797
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 90          |
|    time_elapsed    | 43          |
|    total_timesteps | 3930        |
| train/             |             |
|    actor_loss      | -0.0877     |
|    critic_loss     | 0.000192    |
|    learning_rate   | 0.001       |
|    n_updates       | 3829        |
|    reward          | -0.05744761 |
------------------------------------
begin_total_asset:1000000
end_total_asset:1632533.6259408067
Sharpe:  0.5469170019051651
begin_total_asset:1000000
end_total_asset:1515137.204682235
Sharpe:  0.61

[W 2024-06-28 14:59:21,171] Trial 0 failed with parameters: {'sigma': 0.5, 'noise': 'normal'} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Alex\AppData\Local\Temp\ipykernel_13540\1517043441.py", line 15, in objective_noise
    model, commission, df_daily_return, df_actions = train_trade('td3', train2, trade2, optuna_params, log_name, [192,128])
  File "C:\Users\Alex\AppData\Local\Temp\ipykernel_13540\2215292903.py", line 29, in train_trade
    trained_model = agent.train_model(model = model,
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\finrl\agents\stablebaselines3\models.py", line 117, in train_model
    model = model.learn(
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\stable_baselines3\td3\td3.py", line 222, in learn
    return super().learn(
  File

KeyboardInterrupt: 

In [77]:
res_df[res_df['month'] == 0]

Unnamed: 0,alg,month,res,dd,sharp,day_ret,actions,total_commission
35,a2c,0,0.022154,-0.017458,3.097764,date daily_return 0 2023-06-01 ...,date ...,0
26,ppo,0,0.027995,-0.012072,5.165934,date daily_return 0 2023-06-01 ...,date ...,0
17,ddpg,0,0.029573,-0.012886,4.412521,date daily_return 0 2023-06-01 ...,date ...,0
8,td3,0,0.050837,-0.018555,5.587266,date daily_return 0 2023-06-01 ...,date ...,0


При подборе параметров лучший результат показала итерация с параметрами
lr=0.00024 sigma=0.5 (target_policy_noise) size=192,128 gamma=0.95

Результат R=0.1% (положительный!), maxDD=-4.47% (при просадке индекса на -7.1% и лучше чем по всем алгоритмам)

In [66]:
res = dict()
ret = dict()
act = dict()

date_start = datetime.datetime(2023, 6, 1)
date0 = datetime.datetime(2016, 12, 1) #date_start + relativedelta(months = -12 * i)
date1 = date_start + relativedelta(months = 0)
date2 = date_start + relativedelta(months = 0 + 1)

print(date1, date2)
train = data_split(df, date0, date1) #'2016-05-10', date1)
trade = data_split(df, date1, date2)

2023-06-01 00:00:00 2023-07-01 00:00:00


In [None]:
from stable_baselines3.common.callbacks import StopTrainingOnNoModelImprovement
stop_train = StopTrainingOnNoModelImprovement(max_no_improvement_evals=20, min_evals=10000)

In [72]:
alg = 'td3'
optuna_params = None
optuna_params = {"learning_rate": 6e-4, "action_noise": "ornstein_uhlenbeck", "target_policy_noise" : 0.5, "gamma" : 0.95}
#{"use_sde" : True}#

log_name = alg + '_final_' + alg + '_256x128_' + str(6)
model, commiss, df_daily_return, df_actions = train_trade(alg, train, trade, optuna_params, log_name=log_name, size=[256,128], 
                                                          trained_model=None, steps = 50_000)

{'learning_rate': 0.0006, 'action_noise': OrnsteinUhlenbeckActionNoise(mu=[0. 0. 0. 0. 0. 0. 0. 0. 0.], sigma=[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]), 'target_policy_noise': 0.5, 'gamma': 0.95}
Using cuda device
Logging to logs\td3_final_td3_256x128_6_2




begin_total_asset:1000000
end_total_asset:1366100.04215632
Sharpe:  0.47435886270614985
begin_total_asset:1000000
end_total_asset:3071316.3037507255
Sharpe:  1.134251402974538
begin_total_asset:1000000
end_total_asset:798175.0345518225
Sharpe:  -3.577449779779459
begin_total_asset:1000000
end_total_asset:2977409.0279262145
Sharpe:  1.180278963723153
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 81           |
|    time_elapsed    | 50           |
|    total_timesteps | 4123         |
| train/             |              |
|    actor_loss      | -0.00843     |
|    critic_loss     | 0.000192     |
|    learning_rate   | 0.0006       |
|    n_updates       | 4022         |
|    reward          | 0.0021446028 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:2172083.5761398096
Sharpe:  0.8910268795162445
begin_total_asset:1000000
end_total_asset:3103816.308803918
Sharp

In [73]:
get_result(df_daily_return)

Annual return          0.122385
Cumulative returns     0.009668
Annual volatility      0.077190
Sharpe ratio           1.532774
Calmar ratio           7.337376
Stability              0.000638
Max drawdown          -0.016680
Omega ratio            1.301376
Sortino ratio          2.618270
Skew                   0.468810
Kurtosis               0.269767
Tail ratio             1.204741
Daily value at risk   -0.009255
Alpha                  0.000000
Beta                   1.000000
dtype: float64

In [86]:
res_df[res_df['month'] == 0]

Unnamed: 0,alg,month,res,dd,sharp,day_ret,actions,total_commission
35,a2c,0,0.03705,-0.023224,3.911956,date daily_return 0 2023-06-01 ...,date ...,0
26,ppo,0,0.034117,-0.017872,4.491486,date daily_return 0 2023-06-01 ...,date ...,0
17,ddpg,0,0.039359,-0.022248,3.798652,date daily_return 0 2023-06-01 ...,date ...,0
8,td3,0,0.031701,-0.014878,3.633125,date daily_return 0 2023-06-01 ...,date ...,0


In [46]:
np.set_printoptions(suppress=True)

In [71]:
np.round(df_actions['actions'].iloc[1], 3), np.round(df_actions['actions'].iloc[2], 3), np.round(df_actions['actions'].iloc[3], 3)

(array([1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32),
 array([1., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32))

С новыми параметрами не так хорошо всё на первом интервале. Видно что очень резко изменяется состав портфеля от шага к шагу. Попробуем провести несколько обучений с гораздо бОльшими комиссиями, посмотрим уменьшит ли разброс в бумагах

In [118]:
#for i in range(20,19,-1):
i = 20
commiss = 0.01 * i
env_kwargs = {
    "hmax": 20,
    "initial_amount": 1000000, 
    "transaction_cost_pct": commiss, 
    "state_space": state_space, 
    "stock_dim": stock_dimension, 
    "tech_indicator_list": fa.indicators, 
    "action_space": stock_dimension, 
    "reward_scaling": 1e-4,
    "cov_xtra_names": fa.cov_xtra_names
}

optuna_params = {"learning_rate": 2.4e-4, "action_noise": "ornstein_uhlenbeck", "target_policy_noise" : 0.5, "gamma" : 0.95}
    
log_name = alg + '_final_' + alg + '_192x128_' + str(commiss) + str(6)
model, commiss, df_daily_return, df_actions = train_trade(alg, train, trade, optuna_params, log_name=log_name, size=[192,128], trained_model=None, steps = 50_000)

{'learning_rate': 0.00024, 'action_noise': OrnsteinUhlenbeckActionNoise(mu=[0. 0. 0. 0. 0. 0. 0. 0. 0.], sigma=[0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5]), 'target_policy_noise': 0.5, 'gamma': 0.95}
Using cuda device
Logging to logs\td3_final_td3_192x128_0.26_1


  target_quantity = np.int32(money_per_security/close_prices * securities_to_be) #сколько должно быть бумаг


begin_total_asset:1000000
end_total_asset:-1785674142673.3774
Sharpe:  2.7079200229848075
begin_total_asset:1000000
end_total_asset:196136030934.97473
Sharpe:  5.6585726834241346
begin_total_asset:1000000
end_total_asset:162568388.6135661
Sharpe:  6.596815042354246
begin_total_asset:1000000
end_total_asset:186967273.1087626
Sharpe:  6.558743477259751
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 85          |
|    time_elapsed    | 49          |
|    total_timesteps | 4260        |
| train/             |             |
|    actor_loss      | -0.465      |
|    critic_loss     | 0.0049      |
|    learning_rate   | 0.00024     |
|    n_updates       | 4159        |
|    reward          | 0.004004511 |
------------------------------------
begin_total_asset:1000000
end_total_asset:487005105752.3441
Sharpe:  3.6776098033052964
begin_total_asset:1000000
end_total_asset:330189326.91397107
Sharpe:  8.648047

In [117]:
#model.save('start_rev_6_5.mdl')

In [74]:
np.round(df_actions['actions'].iloc[1], 3), np.round(df_actions['actions'].iloc[2], 3), np.round(df_actions['actions'].iloc[3], 3)

(array([0.001, 0.   , 0.   , 0.204, 0.   , 0.204, 0.204, 0.204, 0.184],
       dtype=float32),
 array([0.162, 0.   , 0.   , 0.203, 0.   , 0.203, 0.187, 0.203, 0.042],
       dtype=float32),
 array([0.136, 0.   , 0.   , 0.196, 0.   , 0.196, 0.19 , 0.196, 0.085],
       dtype=float32))

In [119]:
get_result(df_daily_return)

  np.divide(average_annual_return, annualized_downside_risk, out=out)


Annual return          4704.133221
Cumulative returns        1.023241
Annual volatility         0.701154
Sharpe ratio             12.567191
Calmar ratio                   NaN
Stability                 0.959702
Max drawdown              0.000000
Omega ratio                    NaN
Sortino ratio                  inf
Skew                      2.577366
Kurtosis                  6.861365
Tail ratio               68.689227
Daily value at risk      -0.053371
Alpha                     0.000000
Beta                      1.000000
dtype: float64

In [None]:
for commission in range(20,0,-1):
    env_kwargs = {
        "hmax": 20,
        "initial_amount": 1000000, 
        "transaction_cost_pct": 0.001, 
        "state_space": state_space, 
        "stock_dim": stock_dimension, 
        "tech_indicator_list": fa.indicators, 
        "action_space": stock_dimension, 
        "reward_scaling": 1e-4,
        "cov_xtra_names": fa.cov_xtra_names
    }

In [143]:
trade['tic'].unique()

array(['GMKN', 'LKOH', 'MAGN', 'MM', 'MTSS', 'NVTK', 'ROSN', 'SBER',
       'SNGS'], dtype=object)

In [131]:
e_trade_gym2 = PortfolioEnv(df = trade, reset_to_zero=True, **env_kwargs)
state = e_trade_gym2.state

In [148]:
pred = model.predict(state)

In [149]:
pred

(array([0., 1., 0., 0., 0., 1., 1., 0., 1.], dtype=float32), None)

In [147]:
state, rew, term, _ = e_trade_gym2.step(pred)

In [125]:
rew, state

(0.003297320236000116,
 array([0.00000000e+00, 2.25000000e-02, 2.24999976e-02, 0.00000000e+00,
        0.00000000e+00, 2.25000000e-02, 0.00000000e+00, 2.25000000e-02,
        6.70552254e-10, 5.26898889e-01, 8.95671276e-01, 6.71976567e-01,
        1.00000000e+00, 8.37575875e-01, 6.85314562e-01, 7.99657240e-01,
        7.93949291e-01, 5.97809204e-01, 4.17596976e-01, 6.30545238e-01,
        5.28558044e-01, 1.00000000e+00, 7.43010357e-01, 5.75401456e-01,
        7.00817979e-01, 6.25582041e-01, 4.74691856e-01, 5.26898889e-01,
        6.30545238e-01, 6.71976567e-01, 1.00000000e+00, 8.37575875e-01,
        6.85314562e-01, 7.99657240e-01, 6.45095332e-01, 5.97809204e-01]))

In [175]:
rt = df_daily_return['daily_return'].to_numpy()

In [176]:
rt

array([ 0.        ,  0.00604526, -0.00158031,  0.00666416, -0.00342287,
       -0.00092551,  0.00389716,  0.01388395, -0.00193682,  0.00575052,
       -0.00026622, -0.00018848, -0.00224985, -0.0003685 , -0.0041405 ,
       -0.01351069,  0.0027398 ,  0.00485066, -0.01434165,  0.0142861 ,
        0.00073639])

In [177]:
np.prod(rt+1)

1.0155489635145343

In [53]:
get_result(df_daily_return)

Annual return           0.238035
Cumulative returns      0.017953
Annual volatility       0.092022
Sharpe ratio            2.365222
Calmar ratio           11.942949
Stability               0.306204
Max drawdown           -0.019931
Omega ratio             1.556182
Sortino ratio           3.663851
Skew                   -0.432479
Kurtosis                1.209552
Tail ratio              1.492475
Daily value at risk    -0.010730
Alpha                   0.000000
Beta                    1.000000
dtype: float64

In [134]:
trade.loc[0]['tic']

0    GMKN
0    LKOH
0    MAGN
0      MM
0    MTSS
0    NVTK
0    ROSN
0    SBER
0    SNGS
Name: tic, dtype: object

In [135]:
trade

Unnamed: 0,date,open,high,low,close,volume,tic,rsi_12,cci_12,dx_12,cov_list,cov_xtra,return_list
0,2023-08-01,10068.330000,10071.630000,10067.150000,10070.390000,25106100,GMKN,76.424845,215.142370,84.088373,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,8830.000000,8937.000000,8771.000000,8896.000000,1324998,LKOH,83.042272,158.752567,82.638239,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,80.048000,80.078000,79.478000,79.818000,17212120,MAGN,75.466668,106.126368,65.791820,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,156.172603,156.172603,156.172603,156.172603,1,MM,100.000000,129.477912,100.000000,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
0,2023-08-01,545.110000,546.910000,543.110000,545.010000,4802040,MTSS,45.977731,-40.534283,5.922008,[],"[0.9573373323070703, 0.9443874258900921, 0.946...",tic GD GMKN IMOEX LKOH ...
...,...,...,...,...,...,...,...,...,...,...,...,...,...
22,2023-08-31,541.060000,542.110000,537.610000,538.710000,5301270,MTSS,41.667802,-66.093117,33.558085,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...
22,2023-08-31,1987.820000,2004.020000,1970.620000,1984.620000,832522,NVTK,67.993667,72.748710,53.094191,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...
22,2023-08-31,731.690000,732.940000,728.040000,730.640000,2565110,ROSN,66.340793,120.269856,51.537526,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...
22,2023-08-31,363.420000,364.620000,362.670000,363.220000,21957550,SBER,58.652072,94.074527,13.929651,[],"[0.5870239002247921, 0.8765191268071332, 0.732...",tic GD GMKN IMOEX LKOH ...


In [106]:
e_train_gym = PortfolioEnv(df = train, **env_kwargs)
env_train, _ = e_train_gym.get_sb_env()
agent = DRLAgent(env = env_train)
model = agent.get_model(model_name = alg, model_kwargs = optuna_params, policy_kwargs = None, tensorboard_log = 'logs')

trained_a2c = agent.train_model(model = model, 
                                tb_log_name = log_name,
                                total_timesteps = 30_000)


{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cuda device
Logging to logs\a2c_final_a2c_9_4
-----------------------------------------
| time/                 |               |
|    fps                | 29            |
|    iterations         | 100           |
|    time_elapsed       | 16            |
|    total_timesteps    | 500           |
| train/                |               |
|    entropy_loss       | -12.7         |
|    explained_variance | -193          |
|    learning_rate      | 0.0007        |
|    n_updates          | 99            |
|    policy_loss        | -4.47         |
|    reward             | -0.0020968118 |
|    value_loss         | 0.143         |
-----------------------------------------
begin_total_asset:1000000
end_total_asset:677590.5788263523
Sharpe:  -0.7154921932669398
----------------------------------------
| time/                 |              |
|    fps                | 30           |
|    iterations         | 200          |
|    ti

In [107]:
e_trade_gym2 = PortfolioEnv(df = trade, reset_to_zero=True, **env_kwargs)

state = e_trade_gym2.state


In [126]:
pred = trained_a2c.predict(state)

In [127]:
pred

(array([0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1],
       dtype=int64),
 None)

In [128]:
state, rew, term, _ = e_trade_gym2.step(pred)

In [129]:
rew

0.007766173104209378

In [158]:
res[alg] = []
ret[alg] = []
act[alg] = []
res[alg].append(get_result(df_daily_return))
ret[alg].append(df_daily_return)
act[alg].append(df_actions)

In [159]:
res

{'td3': [Annual return         -0.165986
  Cumulative returns    -0.015012
  Annual volatility      0.150527
  Sharpe ratio          -1.133765
  Calmar ratio          -2.711280
  Stability              0.427691
  Max drawdown          -0.061220
  Omega ratio            0.842712
  Sortino ratio         -1.616779
  Skew                   0.319968
  Kurtosis              -0.931438
  Tail ratio             1.156678
  Daily value at risk   -0.019642
  Alpha                  0.000000
  Beta                   1.000000
  dtype: float64]}

In [41]:
import optuna

In [45]:
loader2 = DataLoader()
df2 = loader2.LoadData()

df2.index = range(len(df2))
#df = df.drop(df[df['tic'] == 'IMOEX'].index)
df2 = df2.drop(df2[df2['tic'] == 'BZ'].index)
#df = df.drop(df[df['tic'] == 'GD'].index)
df2 = df2.drop(df2[df2['tic'] == 'USD'].index)

fa2 = FeaturesAdder(22)
df2 = fa2.Process(df2)

df2.index = df2.date.factorize()[0]

  df = pd.concat([df, dft[self.columns]], ignore_index=True)


Length of values (1999) does not match length of index (1997)
Length of values (1999) does not match length of index (1997)
Successfully added technical indicators


  df = df.ffill().bfill()
100%|██████████| 1974/1974 [00:17<00:00, 114.48it/s]


In [59]:
def objective(trial):

    loader2 = DataLoader()
    df2 = loader2.LoadData()

    df2.index = range(len(df2))
    #df = df.drop(df[df['tic'] == 'IMOEX'].index)
    df2 = df2.drop(df2[df2['tic'] == 'BZ'].index)
    #df = df.drop(df[df['tic'] == 'GD'].index)
    df2 = df2.drop(df2[df2['tic'] == 'USD'].index)
    lookback = 22#trial.suggest_categorical('lookback', [252])
    fa2 = FeaturesAdder(lookback)
    df2 = fa2.Process(df2)
    #tau = trial.suggest_categorical('tau', [0.001, 0.005, 0.01])

    #sigma = trial.suggest_float('sigma', 0, 1)
    bsize = trial.suggest_categorical('bsize', [10000, 50000, 100000])
    noise = trial.suggest_categorical('noise', ["ornstein_uhlenbeck", "normal"])

    #n_actions = stock_dimension
    #action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), sigma=sigma * np.ones(n_actions), theta=theta)
    # size1 = trial.suggest_categorical('size1', [32, 64, 128])
    # size2 = size1 * 2
    size = [128, 64]
    #ent_coef = trial.suggest_float('ent_coef', 0, 0.05)
    learning_rate = trial.suggest_float("learning_rate", 5e-6, 5e-3, log=True)
    #lookback = trial.suggest_categorical('lookback', [5, 10, 15, 22, 44, 66])

    sum_reward = 0
    sum_maxdd  = 0

    for i in range(4):
        date1 = date_start + relativedelta(months = i)
        date2 = date_start + relativedelta(months = i + 1)
        train = data_split(df2, '2016-05-10', date1)
        trade = data_split(df2, date1, date2)

        optuna_params = {"learning_rate": learning_rate, "buffer_size": bsize, "action_noise": "ornstein_uhlenbeck"}
        #optuna_params = {"learning_rate": learning_rate}#, "ent_coef": ent_coef}# "action_noise": "ornstein_uhlenbeck"}
        log_name = str(size) + '_' + str(lookback) + '_' + str(i)
        model, df_daily_return, df_actions = train_trade('td3', train, trade, None, log_name, size)
        
        result = get_result(df_daily_return)
        reward = result['Cumulative returns']
        max_dd = result['Max drawdown']
        sharp  = result['Sharpe ratio']

        print("Rew, DD, Sh", reward, max_dd, sharp)
        sum_reward += reward
        sum_maxdd += max_dd

    print("Total", sum_reward, sum_maxdd, sharp)
    return sum_reward

study = optuna.create_study(directions=["maximize"])
study.optimize(objective, n_trials=20)

[I 2024-06-23 14:26:08,936] A new study created in memory with name: no-name-eda3bef4-e5cb-4542-9e78-966bed97e7fc
  df = pd.concat([df, dft[self.columns]], ignore_index=True)


Length of values (1999) does not match length of index (1997)
Length of values (1999) does not match length of index (1997)


  df = df.ffill().bfill()


Successfully added technical indicators


100%|██████████| 1974/1974 [00:36<00:00, 53.46it/s]


{'batch_size': 100, 'buffer_size': 1000000, 'learning_rate': 0.001}
Using cuda device
Logging to logs\[128, 64]_22_0_1




begin_total_asset:1000000
end_total_asset:1328804.5761858234
Sharpe:  0.5611658994842432
begin_total_asset:1000000
end_total_asset:1103836.133373412
Sharpe:  4.10179896083965
begin_total_asset:1000000
end_total_asset:1820169.431765897
Sharpe:  0.7969255424660227
begin_total_asset:1000000
end_total_asset:1333812.7718168045
Sharpe:  0.5996255303696812
--------------------------------------
| time/              |               |
|    episodes        | 4             |
|    fps             | 44            |
|    time_elapsed    | 67            |
|    total_timesteps | 2982          |
| train/             |               |
|    actor_loss      | 1.47          |
|    critic_loss     | 0.114         |
|    learning_rate   | 0.001         |
|    n_updates       | 2881          |
|    reward          | 0.00090358907 |
--------------------------------------
begin_total_asset:1000000
end_total_asset:1107334.4305786227
Sharpe:  0.4024363943306529
begin_total_asset:1000000
end_total_asset:1018132.60



begin_total_asset:1000000
end_total_asset:1318650.4135951765
Sharpe:  0.5284407138969732
begin_total_asset:1000000
end_total_asset:892724.9130789972
Sharpe:  -0.10934792520271971
begin_total_asset:1000000
end_total_asset:1552342.927234988
Sharpe:  0.5287890287253493
begin_total_asset:1000000
end_total_asset:989830.0674499976
Sharpe:  0.11941069750924747
-------------------------------------
| time/              |              |
|    episodes        | 4            |
|    fps             | 50           |
|    time_elapsed    | 56           |
|    total_timesteps | 2869         |
| train/             |              |
|    actor_loss      | -4.04        |
|    critic_loss     | 0.108        |
|    learning_rate   | 0.001        |
|    n_updates       | 2768         |
|    reward          | -0.001113208 |
-------------------------------------
begin_total_asset:1000000
end_total_asset:942752.0679029975
Sharpe:  0.011518549135084648
begin_total_asset:1000000
end_total_asset:1727219.4953959861



begin_total_asset:1000000
end_total_asset:1980415.4168697067
Sharpe:  0.8011031745757455
begin_total_asset:1000000
end_total_asset:1717016.8685665885
Sharpe:  0.9291492766166308
begin_total_asset:1000000
end_total_asset:1469591.0161674125
Sharpe:  0.6434573503934055
begin_total_asset:1000000
end_total_asset:1150816.2652700073
Sharpe:  0.4576295083108517
------------------------------------
| time/              |             |
|    episodes        | 4           |
|    fps             | 47          |
|    time_elapsed    | 74          |
|    total_timesteps | 3523        |
| train/             |             |
|    actor_loss      | 2.08        |
|    critic_loss     | 0.049       |
|    learning_rate   | 0.001       |
|    n_updates       | 3422        |
|    reward          | 0.017000806 |
------------------------------------
begin_total_asset:1000000
end_total_asset:1826971.4826673293
Sharpe:  0.7841686187813658
begin_total_asset:1000000
end_total_asset:1277429.8361853443
Sharpe:  4.96

[W 2024-06-23 15:22:09,944] Trial 0 failed with parameters: {'bsize': 100000, 'noise': 'normal', 'learning_rate': 7.899454013765834e-05} because of the following error: KeyboardInterrupt().
Traceback (most recent call last):
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\optuna\study\_optimize.py", line 196, in _run_trial
    value_or_values = func(trial)
  File "C:\Users\Alex\AppData\Local\Temp\ipykernel_45272\2432288324.py", line 41, in objective
    model, df_daily_return, df_actions = train_trade('td3', train, trade, None, log_name, size)
  File "C:\Users\Alex\AppData\Local\Temp\ipykernel_45272\1449868205.py", line 23, in train_trade
    trained_a2c = agent.train_model(model = model,
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\finrl\agents\stablebaselines3\models.py", line 117, in train_model
    model = model.learn(
  File "c:\Users\Alex\anaconda3\envs\FinRL\lib\site-packages\stable_baselines3\td3\td3.py", line 222, in learn
    return super().learn(

KeyboardInterrupt: 