## Import

In [1]:
import pickle
import matplotlib.pyplot as plt
import warnings

from config import CONFIG
from easydict import EasyDict

from env.env_f import *
from state.state import *
from agent.PPOAgent_ms import *
from models.CTTS import *
from trainer.nonEpisodic import *
from trainer.Episodic import *
from utils.setDevice import *
from utils.timestepRelated import *
from visualization.methods import *

In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)

## Load

In [3]:
with open('../data/processed/kospi200_ffill_clean_version.pkl', 'rb') as f:
    df = pickle.load(f)

df.head(5)

Unnamed: 0,date,time,open,high,low,close,prevClose,vol
2010-02-16 09:01:00,20100216,901,207.55,207.65,207.5,207.6,207.5,3985.0
2010-02-16 09:02:00,20100216,902,207.6,207.65,207.25,207.55,207.5,5095.0
2010-02-16 09:03:00,20100216,903,207.55,207.8,207.5,207.6,207.5,2175.0
2010-02-16 09:04:00,20100216,904,207.55,207.85,207.55,207.8,207.5,1301.0
2010-02-16 09:05:00,20100216,905,207.8,208.15,207.8,208.05,207.5,3870.0


## Set State info • scaler

In [4]:
target_values = ['open', 'high', 'low', 'close', 
                'vol','return_5', 'return_10', 'volume_change', 'ema_5', 
                'ema_20', 'ema_cross', 'cci', 'sar', '%K', 
                # '%D', 'roc', 'rsi', 'obv', 'ad_line', 
                'bb_upper', 'bb_lower', 'bb_width', 'atr', 'gap_size']

scaler = RobustScaler()

## Set Env info 

In [5]:
train_valid_timestep = split_date_ranges_by_group(df.index[:50000])
train_valid_timestep 

[(('2010-02-16', '2010-03-22'), ('2010-03-23', '2010-03-25')),
 (('2010-03-26', '2010-04-28'), ('2010-04-29', '2010-05-03')),
 (('2010-05-04', '2010-06-09'), ('2010-06-10', '2010-06-14')),
 (('2010-06-15', '2010-07-19'), ('2010-07-20', '2010-07-22')),
 (('2010-07-23', '2010-08-27'), ('2010-08-30', '2010-09-01'))]

In [6]:
# 
execution_strength = CONFIG.MAX_CONTRACT 
action_space = list(range(-execution_strength, execution_strength+1))
n_actions = len(action_space)
device = get_device() # torch.device("cpu")  

CONFIG.INPUT_DIM = len(target_values)
CONFIG.REWARD_FTN = reward_unrealized_pnl_diff_log
CONFIG.DONE_FTN = is_day_changed
CONFIG.TRAIN_VALID_TIMESTEP = train_valid_timestep
CONFIG.SCALER = scaler

In [7]:
state =  State(target_values)

model = MultiStatePV(
    input_dim=CONFIG.INPUT_DIM,
    agent_input_dim=CONFIG.AGENT_INPUT_DIM,
    embed_dim=CONFIG.EMBED_DIM,
    kernel_size=CONFIG.KERNEL_SIZE,
    stride=CONFIG.STRIDE,
    action_size=CONFIG.N_ACTIONS,
    device=device,
    agent_hidden_dim=CONFIG.AGENT_HIDDEN_DIM,
    agent_out_dim=CONFIG.AGENT_OUT_DIM,
    fusion_hidden_dim=CONFIG.FUSION_HIDDEN_DIM,
    num_layers=CONFIG.NUM_LAYERS,
    num_heads=CONFIG.NUM_HEADS,
    d_ff=CONFIG.D_FF,
    dropout=CONFIG.DROPOUT
)

agent = PPOAgent(
    action_space=CONFIG.ACTION_SPACE,
    n_actions=CONFIG.N_ACTIONS,
    model=model,
    value_coeff=CONFIG.VALUE_COEFF,
    entropy_coeff=CONFIG.ENTROPY_COEFF,
    clip_eps=CONFIG.CLIP_EPS,
    gamma=CONFIG.GAMMA,
    lr=CONFIG.LR,
    batch_size=CONFIG.BATCH_SIZE,
    epoch=CONFIG.EPOCH,
    device=device
)

In [None]:
trainer = EpisodicTrainer( # NonEpisodicTrainer
    df=df,
    env=FuturesEnvironment,
    train_valid_timestep=CONFIG.TRAIN_VALID_TIMESTEP,
    window_size=CONFIG.WINDOW_SIZE,
    state=state,
    reward_ftn=CONFIG.REWARD_FTN,
    done_ftn=CONFIG.DONE_FTN,
    start_budget=CONFIG.START_BUDGET,
    scaler=CONFIG.SCALER,
    position_cap=CONFIG.POSITION_CAP,
    agent=agent,
    model=model,
    optimizer=optim.Adam,
    device=CONFIG.DEVICE,
    n_steps=CONFIG.N_STEPS,
    ma_interval=CONFIG.MA_INTERVAL,
    save_interval=CONFIG.SAVE_INTERVAL,
    path=CONFIG.PATH
)

trainer()

Robust Scaling Completed.
Robust Scaling Completed.
>>>> Train : ('2010-02-16', '2010-03-22')
[0|Train] Ep 001 | info:  | Maintained for: 100 | Reward:  -11 | Loss:  0.254 | Pos(short/hold/long): 48% / 9% / 43% | Strength: 2.02 |
[0|Train] Ep 002 | info:  | Maintained for: 200 | Reward:  -48 | Loss:  1.662 | Pos(short/hold/long): 48% / 4% / 48% | Strength: 2.24 |
[0|Train] Ep 003 | info:  | Maintained for: 300 | Reward:  -19 | Loss:  0.304 | Pos(short/hold/long): 43% / 7% / 50% | Strength: 1.91 |
[0|Train] Ep 005 | info:  | Maintained for: 416 | Reward:  -19 | Loss:  0.360 | Pos(short/hold/long): 49% / 8% / 43% | Strength: 2.25 |
[0|Train] Ep 006 | info:  | Maintained for: 516 | Reward:  -50 | Loss:  0.893 | Pos(short/hold/long): 48% / 11% / 41% | Strength: 2.15 |
[0|Train] Ep 007 | info:  | Maintained for: 616 | Reward:  -24 | Loss:  0.428 | Pos(short/hold/long): 49% / 13% / 38% | Strength: 1.85 |
[0|Train] Ep 009 | info:  | Maintained for: 732 | Reward:  -20 | Loss:  1.226 | Pos(shor