In [1]:
import torch
import pandas as pd

from rl_portfolio.environment import PortfolioOptimizationEnv
from rl_portfolio.algorithm import PolicyGradient
from rl_portfolio.architecture import EI3

import logging
logging.getLogger('matplotlib.font_manager').disabled = True

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

In [2]:
nasdaq_temporal = pd.read_csv("../../Temporal_Relational_Stock_Ranking_FinRL/temporal_data/NASDAQ_temporal_data.csv")
nasdaq_temporal = nasdaq_temporal[nasdaq_temporal["tic"].isin(["AAPL", "CMCSA", "CSCO", "FB", "HBAN", "INTC", "MSFT", "MU", "NVDA", "QQQ", "XIV"])]
nasdaq_temporal

Unnamed: 0,day,open,high,low,close,volume,tic
2,0,0.424556,0.424768,0.429769,0.442628,0.445018,AAPL
185,0,0.444677,0.447059,0.444616,0.441948,0.458681,CMCSA
215,0,0.524828,0.529260,0.524642,0.515638,0.539094,CSCO
310,0,0.145431,0.146840,0.148497,0.145465,0.152980,FB
395,0,0.429832,0.429663,0.423030,0.420449,0.442424,HBAN
...,...,...,...,...,...,...,...
1276940,1244,0.967030,0.976910,0.977162,0.979823,0.987446,MSFT
1276947,1244,0.841787,0.871981,0.908756,0.897054,0.869767,MU
1276981,1244,0.873101,0.906527,0.945962,0.948746,0.882605,NVDA
1277112,1244,0.980573,0.986027,0.985772,0.982449,0.987930,QQQ


In [3]:
df_portfolio = nasdaq_temporal[["day", "tic", "close", "high", "low"]]

df_portfolio_train = df_portfolio[df_portfolio["day"] < 979]
df_portfolio_test = df_portfolio[df_portfolio["day"] >= 979]

environment_train = PortfolioOptimizationEnv(
        df_portfolio_train,
        initial_amount=100000,
        state_normalization="by_last_value",
        comission_fee_pct=0.0025,
        time_window=50,
        features=["close", "high", "low"],
        time_column="day",
        print_metrics=False,
        plot_graphs=False,
        comission_fee_model="trf_approx"
    )

environment_test = PortfolioOptimizationEnv(
        df_portfolio_test,
        initial_amount=100000,
        state_normalization="by_last_value",
        comission_fee_pct=0.0025,
        time_window=50,
        features=["close", "high", "low"],
        time_column="day",
        print_metrics=False,
        plot_graphs=False,
    )

In [4]:
algo = PolicyGradient(
    environment_train,
    policy=EI3,
    validation_env=environment_test,
    lr=1e-2,
    # parameter_noise=0.01,
    use_tensorboard=True,
    device=device
)

In [5]:
algo.train(episodes=30)

100%|██████████| 30/30 [06:01<00:00, 12.07s/it]
