## Import

In [1]:
import pickle
import matplotlib.pyplot as plt
import warnings

from config import CONFIG
from easydict import EasyDict

from env.env import *
from state.state import *
from agent.PPOAgent_ms import *
from models.CTTS import *
from trainer.nonEpisodic import *
from utils.setDevice import *
from utils.timestepRelated import *
from visualization.methods import *

In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)

## Load

In [3]:
with open('../data/processed/kospi200_ffill_clean_version.pkl', 'rb') as f:
    df = pickle.load(f)

df.head(5)

Unnamed: 0,date,time,open,high,low,close,prevClose,vol
2010-02-16 09:01:00,20100216,901,207.55,207.65,207.5,207.6,207.5,3985.0
2010-02-16 09:02:00,20100216,902,207.6,207.65,207.25,207.55,207.5,5095.0
2010-02-16 09:03:00,20100216,903,207.55,207.8,207.5,207.6,207.5,2175.0
2010-02-16 09:04:00,20100216,904,207.55,207.85,207.55,207.8,207.5,1301.0
2010-02-16 09:05:00,20100216,905,207.8,208.15,207.8,208.05,207.5,3870.0


## Set State info • scaler

In [4]:
target_values = ['open', 'high', 'low', 'close', 
                'vol','return_5', 'return_10', 'volume_change', 'ema_5', 
                'ema_20', 'ema_cross', 'cci', 'sar', '%K', 
                # '%D', 'roc', 'rsi', 'obv', 'ad_line', 
                'bb_upper', 'bb_lower', 'bb_width', 'atr', 'gap_size']

scaler = RobustScaler()

## Set Env info 

In [5]:
train_valid_timestep = split_date_ranges_by_group(df.index[:50000])
train_valid_timestep 

[(('2010-02-16', '2010-03-22'), ('2010-03-23', '2010-03-25')),
 (('2010-03-26', '2010-04-28'), ('2010-04-29', '2010-05-03')),
 (('2010-05-04', '2010-06-09'), ('2010-06-10', '2010-06-14')),
 (('2010-06-15', '2010-07-19'), ('2010-07-20', '2010-07-22')),
 (('2010-07-23', '2010-08-27'), ('2010-08-30', '2010-09-01'))]

In [6]:
# 
execution_strength = CONFIG.MAX_CONTRACT 
action_space = list(range(-execution_strength, execution_strength+1))
n_actions = len(action_space)
device = get_device() # torch.device("cpu")  

CONFIG.INPUT_DIM = len(target_values)
CONFIG.REWARD_FTN = reward_unrealized_pnl_diff_log
CONFIG.DONE_FTN = is_day_changed
CONFIG.TRAIN_VALID_TIMESTEP = train_valid_timestep
CONFIG.SCALER = scaler

In [7]:
state =  State(target_values)

model = MultiStatePV(
    input_dim=CONFIG.INPUT_DIM,
    agent_input_dim=CONFIG.AGENT_INPUT_DIM,
    embed_dim=CONFIG.EMBED_DIM,
    kernel_size=CONFIG.KERNEL_SIZE,
    stride=CONFIG.STRIDE,
    action_size=CONFIG.N_ACTIONS,
    device=device,
    agent_hidden_dim=CONFIG.AGENT_HIDDEN_DIM,
    agent_out_dim=CONFIG.AGENT_OUT_DIM,
    fusion_hidden_dim=CONFIG.FUSION_HIDDEN_DIM,
    num_layers=CONFIG.NUM_LAYERS,
    num_heads=CONFIG.NUM_HEADS,
    d_ff=CONFIG.D_FF,
    dropout=CONFIG.DROPOUT
)

agent = PPOAgent(
    action_space=CONFIG.ACTION_SPACE,
    n_actions=CONFIG.N_ACTIONS,
    model=model,
    value_coeff=CONFIG.VALUE_COEFF,
    entropy_coeff=CONFIG.ENTROPY_COEFF,
    clip_eps=CONFIG.CLIP_EPS,
    gamma=CONFIG.GAMMA,
    lr=CONFIG.LR,
    batch_size=CONFIG.BATCH_SIZE,
    epoch=CONFIG.EPOCH,
    device=device
)

In [None]:
trainer = NonEpisodicTrainer(
    df=df,
    env=FuturesEnvironment,
    train_valid_timestep=CONFIG.TRAIN_VALID_TIMESTEP,
    window_size=CONFIG.WINDOW_SIZE,
    state=state,
    reward_ftn=CONFIG.REWARD_FTN,
    done_ftn=CONFIG.DONE_FTN,
    start_budget=CONFIG.START_BUDGET,
    scaler=CONFIG.SCALER,
    position_cap=CONFIG.POSITION_CAP,
    agent=agent,
    model=model,
    optimizer=optim.Adam,
    device=CONFIG.DEVICE,
    n_steps=CONFIG.N_STEPS,
    ma_interval=CONFIG.MA_INTERVAL,
    save_interval=CONFIG.SAVE_INTERVAL,
    path=CONFIG.PATH
)

trainer()

Robust Scaling Completed.
Robust Scaling Completed.
>>>> Train : ('2010-02-16', '2010-03-22')
[0|Train] Episode 0 | Loss:  0.3058 | (short : 45 %, hold : 9%, long: 46%) | (Ave) Strength: 239.00 |Reward: -22 | Avg(50): -21.94 | Maintained Len: 100
[0|Train] Episode 1 | Loss:  0.1285 | (short : 42 %, hold : 18%, long: 40%) | (Ave) Strength: 228.50 |Reward:  -9 | Avg(50): -15.30 | Maintained Len: 200
[0|Train] Episode 2 | Loss:  0.2542 | (short : 42 %, hold : 16%, long: 42%) | (Ave) Strength: 218.33 |Reward: -16 | Avg(50): -15.40 | Maintained Len: 300
[0|Train] Episode 3 | Loss: -0.0348 | (short : 35 %, hold : 18%, long: 47%) | (Ave) Strength: 213.00 |Reward:  -4 | Avg(50): -12.60 | Maintained Len: 400
[0|Train] Episode 4 | Loss:  0.2549 | (short : 34 %, hold : 30%, long: 36%) | (Ave) Strength: 207.00 |Reward: -13 | Avg(50): -12.73 | Maintained Len: 500
[0|Train] Episode 5 | Loss:  0.2116 | (short : 35 %, hold : 24%, long: 41%) | (Ave) Strength: 200.00 |Reward: -24 | Avg(50): -14.68 | Mai

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


True
end_of_data
[0: latest model |Valid] Episode 0 |(short : 27 %, hold : 11%, long: 62%) | (Ave) Strength: 1309.00 |Reward: -110 | Avg(50): -110.15 | Maintained Steps: 631

  - 총 에피소드 수: 1
  - 평균 보상: -110.15
  - 평균 유지 시간: nan step
  - 마지막 수익: -10585237.61

True
end_of_data
[0: highest reward |Valid] Episode 0 |(short : 31 %, hold : 10%, long: 59%) | (Ave) Strength: 1366.00 |Reward: -99 | Avg(50): -98.51 | Maintained Steps: 631

  - 총 에피소드 수: 1
  - 평균 보상: -98.51
  - 평균 유지 시간: nan step
  - 마지막 수익: -74591453.00

True
end_of_data
[0: per steps |Valid] Episode 0 |(short : 30 %, hold : 11%, long: 59%) | (Ave) Strength: 1336.00 |Reward: -56 | Avg(50): -56.17 | Maintained Steps: 631

  - 총 에피소드 수: 1
  - 평균 보상: -56.17
  - 평균 유지 시간: nan step
  - 마지막 수익: -32491281.69

✅ 시각화 저장 완료: /Users/ijimin/Documents/GitHub/YOLO-Futures/logs/I0V
[Saved] latest_model
[Saved] reward_king_model
[Saved] 8 recent models
Robust Scaling Completed.
Robust Scaling Completed.
>>>> Train : ('2010-03-26', '2010-04-28')