## Import

In [9]:
import pickle
import matplotlib.pyplot as plt
import warnings

from config import CONFIG
from easydict import EasyDict

from env.env import *
from state.state import *
from agent.PPOAgent_ms import *
from models.CTTS import *
from trainer.nonEpisodic import *
from utils.setDevice import *
from utils.timestepRelated import *
from visualization.methods import *

In [10]:
warnings.filterwarnings("ignore", category=FutureWarning)

## Load

In [11]:
with open('../data/processed/kospi200_ffill_clean_version.pkl', 'rb') as f:
    df = pickle.load(f)

df.head(5)

Unnamed: 0,date,time,open,high,low,close,prevClose,vol
2010-02-16 09:01:00,20100216,901,207.55,207.65,207.5,207.6,207.5,3985.0
2010-02-16 09:02:00,20100216,902,207.6,207.65,207.25,207.55,207.5,5095.0
2010-02-16 09:03:00,20100216,903,207.55,207.8,207.5,207.6,207.5,2175.0
2010-02-16 09:04:00,20100216,904,207.55,207.85,207.55,207.8,207.5,1301.0
2010-02-16 09:05:00,20100216,905,207.8,208.15,207.8,208.05,207.5,3870.0


## Set State info • scaler

In [12]:
target_values = ['open', 'high', 'low', 'close', 
                'vol','return_5', 'return_10', 'volume_change', 'ema_5', 
                'ema_20', 'ema_cross', 'cci', 'sar', '%K', 
                # '%D', 'roc', 'rsi', 'obv', 'ad_line', 
                'bb_upper', 'bb_lower', 'bb_width', 'atr', 'gap_size']

scaler = RobustScaler()

## Set Env info 

In [13]:
train_valid_timestep = split_date_ranges_by_group(df.index)
train_valid_timestep 

[(('2010-02-16', '2011-12-05'), ('2011-12-06', '2012-02-16')),
 (('2012-02-17', '2013-12-10'), ('2013-12-11', '2014-02-25')),
 (('2014-02-26', '2015-12-21'), ('2015-12-22', '2016-03-09')),
 (('2016-03-10', '2018-01-08'), ('2018-01-09', '2018-03-22')),
 (('2018-03-23', '2020-01-22'), ('2020-01-23', '2020-04-03'))]

In [14]:
# 
execution_strength = CONFIG.MAX_CONTRACT 
action_space = list(range(-execution_strength, execution_strength+1))
n_actions = len(action_space)
device = get_device() # torch.device("cpu")  

CONFIG.INPUT_DIM = len(target_values)
CONFIG.REWARD_FTN = reward_sharpe_ratio
CONFIG.DONE_FTN = is_day_changed
CONFIG.TRAIN_VALID_TIMESTEP = train_valid_timestep
CONFIG.SCALER = scaler

In [15]:
state = State(target_values, fixed_agent_dim=CONFIG.AGENT_INPUT_DIM)

model = MultiStatePV(
    input_dim=CONFIG.INPUT_DIM,
    agent_input_dim=CONFIG.AGENT_INPUT_DIM,
    embed_dim=CONFIG.EMBED_DIM,
    kernel_size=CONFIG.KERNEL_SIZE,
    stride=CONFIG.STRIDE,
    action_size=CONFIG.N_ACTIONS,
    device=device,
    agent_hidden_dim=CONFIG.AGENT_HIDDEN_DIM,
    agent_out_dim=CONFIG.AGENT_OUT_DIM,
    fusion_hidden_dim=CONFIG.FUSION_HIDDEN_DIM,
    num_layers=CONFIG.NUM_LAYERS,
    num_heads=CONFIG.NUM_HEADS,
    d_ff=CONFIG.D_FF,
    dropout=CONFIG.DROPOUT
)

agent = PPOAgent(
    action_space=CONFIG.ACTION_SPACE,
    n_actions=CONFIG.N_ACTIONS,
    model=model,
    value_coeff=CONFIG.VALUE_COEFF,
    entropy_coeff=CONFIG.ENTROPY_COEFF,
    clip_eps=CONFIG.CLIP_EPS,
    gamma=CONFIG.GAMMA,
    lr=CONFIG.LR,
    batch_size=CONFIG.BATCH_SIZE,
    epoch=CONFIG.EPOCH,
    device=device
)

In [16]:
trainer = NonEpisodicTrainer(
    df=df,
    env=FuturesEnvironment,
    train_valid_timestep=CONFIG.TRAIN_VALID_TIMESTEP,
    window_size=CONFIG.WINDOW_SIZE,
    state=state,
    reward_ftn=CONFIG.REWARD_FTN,
    done_ftn=CONFIG.DONE_FTN,
    start_budget=CONFIG.START_BUDGET,
    scaler=CONFIG.SCALER,
    position_cap=CONFIG.POSITION_CAP,
    agent=agent,
    model=model,
    optimizer=optim.Adam,
    device=CONFIG.DEVICE,
    n_steps=CONFIG.N_STEPS,
    ma_interval=CONFIG.MA_INTERVAL,
    save_interval=CONFIG.SAVE_INTERVAL,
    path=CONFIG.PATH
)

trainer()

Robust Scaling Completed.
Robust Scaling Completed.
>>>> Train : ('2010-02-16', '2011-12-05')
[ 0 |Train] Episode 0 | Loss:  1.3481 | (short : 44 %, hold : 12%, long: 44%) | (Ave) Strength: 220.00 |Reward: -96 | Avg(50): -95.87 | Maintained Len: 100
[ 0 |Train] Episode 1 | Loss:  1.0947 | (short : 48 %, hold : 13%, long: 39%) | (Ave) Strength: 215.50 |Reward: -70 | Avg(50): -83.16 | Maintained Len: 200
[ 0 |Train] Episode 2 | Loss:  1.2103 | (short : 45 %, hold : 14%, long: 41%) | (Ave) Strength: 215.00 |Reward: -74 | Avg(50): -80.12 | Maintained Len: 300
[ 0 |Train] Episode 3 | Loss:  0.0057 | (short : 62 %, hold : 0%, long: 38%) | (Ave) Strength: 172.00 |Reward:  -2 | Avg(50): -60.47 | Maintained Len: 316
[ 0 |Train] Episode 4 | Loss: -0.4156 | (short : 44 %, hold : 18%, long: 38%) | (Ave) Strength: 180.60 |Reward:  24 | Avg(50): -43.63 | Maintained Len: 416
[ 0 |Train] Episode 5 | Loss:  1.2412 | (short : 34 %, hold : 13%, long: 53%) | (Ave) Strength: 188.67 |Reward: -70 | Avg(50): 

KeyboardInterrupt: 