In [6]:
import numpy as np
import torch
import os
import pandas as pd
from matplotlib import pyplot as plt
from tqdm.auto import tqdm
from src.model import CNN2d, DDPG
from src.train_utils import fit_agent, agent_trading, rewards_plot
from src.stock_env import StockTradingMultipleEnv
from IPython.core.display_functions import clear_output

# Подготовка данных
data = pd.read_csv("data/df_rus_processed.csv", index_col=0)
data["date"] = pd.to_datetime(data.date)
data = data[data["date"] < pd.to_datetime("2023-10-15")]
stock_list = list(set(data.tic))

device = "cuda" if torch.cuda.is_available() else "cpu"

# Разделение данных
train_data = data.pivot(index="date", columns="tic", values="close").reset_index()

In [7]:
train_data

tic,date,AGRO,AKRN,ALRS,BANE,CBOM,CHMF,ENPG,FEES,FESH,...,SBER,SIBN,SNGSP,TATN,TCSG,TRNFP,UNAC,VSMO,VTBR,YNDX
0,2023-09-15 10:01:00,1197.2,19068.0,78.08,1899.5,6.761,1297.2,502.8,0.12414,100.06,...,259.88,658.65,48.345,602.8,3474.0,147300.0,2.1055,51260.0,0.026625,2500.6
1,2023-09-15 10:02:00,1201.2,19198.0,77.90,1898.0,6.767,1296.4,503.6,0.12426,101.60,...,259.57,659.15,48.260,602.3,3470.0,146900.0,2.0580,51300.0,0.026625,2501.6
2,2023-09-15 10:03:00,1203.6,19198.0,77.74,1899.0,6.765,1293.0,503.0,0.12314,101.69,...,259.52,659.70,48.250,602.8,3473.0,146050.0,1.9240,51320.0,0.026545,2502.8
3,2023-09-15 10:04:00,1207.4,19190.0,77.82,1917.0,6.766,1294.2,503.6,0.12340,101.75,...,259.80,659.50,48.250,602.9,3482.5,146200.0,1.9755,51120.0,0.026550,2507.2
4,2023-09-15 10:05:00,1204.2,19190.0,77.80,1910.5,6.765,1292.2,503.6,0.12340,100.92,...,259.94,657.80,48.210,603.0,3485.5,146450.0,2.0385,50820.0,0.026555,2502.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10915,2023-10-13 18:36:00,1350.0,18996.0,76.62,2018.0,7.015,1374.0,516.6,0.11544,95.35,...,263.59,744.40,53.080,626.0,3478.5,141750.0,1.8855,41800.0,0.025450,2518.6
10916,2023-10-13 18:37:00,1349.6,18998.0,76.63,2019.5,7.015,1374.0,516.2,0.11548,95.39,...,263.59,744.95,53.100,626.0,3479.0,141800.0,1.8895,41800.0,0.025470,2521.2
10917,2023-10-13 18:38:00,1349.8,18996.0,76.64,2019.5,7.015,1374.4,516.2,0.11546,95.43,...,263.60,744.85,53.100,626.3,3476.5,141800.0,1.8905,41800.0,0.025470,2520.6
10918,2023-10-13 18:39:00,1349.8,19000.0,76.68,2017.5,7.015,1374.4,516.4,0.11544,95.42,...,263.60,744.75,53.080,626.3,3479.5,141800.0,1.8895,41800.0,0.025480,2519.6


In [8]:
# Создание среды
env = StockTradingMultipleEnv(
    data=train_data,
    stock_list=stock_list,
    initial_amount=1e6,
    window_size=10,
    reward_scaling=1e-6,
)

In [11]:
logging = True
episode_n = 300
w_size = 50
study_name = f'cnn_large_{len(stock_list)}_stocks_w_size_{w_size}_epochs_{episode_n}'

if logging:
    for i in ['results']:
        if os.path.exists(f'{i}/{study_name}'):
            #raise ValueError(f"folder {i}/{study_name} already exists!")
            print(f'warning: path {i}/{study_name} exists!')
        else:
            os.makedirs(f'{i}/{study_name}')

In [12]:
#CNN
policy_kwargs = {'dim_list': [512, 256], 'output_dim': len(stock_list)}
policy_type = 'cnn'
policy = CNN2d
state_dim = w_size

ddpg_agent = DDPG(policy=policy, policy_type=policy_type, kwargs=policy_kwargs, multiple_stocks=True,
                  state_dim=state_dim, device=device, action_dim=1, action_max=1, batch_size=512, noise_decrease=1e-3)

total_train_rewards, total_val_rewards = [], []
best_val_reward = None
agent_actions = {}

In [13]:
for episode in tqdm(range(100)):
    total_reward_train = fit_agent(ddpg_agent, env, stock_list=stock_list, w_size=w_size)
    total_train_rewards.append(total_reward_train)

    total_reward_val = agent_trading(ddpg_agent, env, stock_list=stock_list, w_size=w_size)
    total_val_rewards.append(total_reward_val)

    if best_val_reward is None:
        best_val_reward = total_reward_val
    else:
        if (total_reward_val >= best_val_reward) & (episode >= 20):
            best_val_reward = total_reward_val
            if logging:
                ddpg_agent.save(dir=f'DDPG_agent/{study_name}')

    clear_output(True)
    rewards_plot(total_train_rewards, total_val_rewards,
                 save=episode == (episode_n - 1), path=f'results/{study_name}/rewards_plot.png')

total_reward_test = agent_trading(ddpg_agent, env, stock_list=stock_list, w_size=w_size)
print("total test reward for last model: {:.3f}%".format(total_reward_test * 100))

  0%|          | 0/100 [00:00<?, ?it/s]

state shape: (50, 49)


RuntimeError: Given groups=1, weight of size [64, 128, 3], expected input[128, 27, 10] to have 128 channels, but got 27 channels instead

In [None]:
ddpg_agent.save(dir='DDPG_agent/cnn_30_stocks_train_2000_w_size_50_epochs_300')

In [None]:
plt.figure(figsize=(20, 5))
plt.title(f'actions by each stock distribution: \nDDPG, policy type: {policy_type}:')
plt.hist(np.array(env.actions_buffer))
plt.show()

In [None]:
print(env.state_story[-1])

In [None]:
plt.plot(env.portfolio_size_story)
plt.show()