## Import

In [1]:
import pickle
import matplotlib.pyplot as plt
import warnings

from config import CONFIG
from easydict import EasyDict

from env.env_f import *
from state.state import *
from agent.PPOAgent_ms import *
from models.CTTS import *
from trainer.nonEpisodic import *
from trainer.Episodic import *
from datahandler.scaler import *
from utils.setDevice import *
from utils.timestepRelated import *
from visualization.methods import *

In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)

## Load

In [3]:
with open('../data/processed/kospi200_ffill_clean_version.pkl', 'rb') as f:
    df = pickle.load(f)

print(df.shape)
df.head(5)

(941402, 8)


Unnamed: 0,date,time,open,high,low,close,prevClose,vol
2010-02-16 09:01:00,20100216,901,207.55,207.65,207.5,207.6,207.5,3985.0
2010-02-16 09:02:00,20100216,902,207.6,207.65,207.25,207.55,207.5,5095.0
2010-02-16 09:03:00,20100216,903,207.55,207.8,207.5,207.6,207.5,2175.0
2010-02-16 09:04:00,20100216,904,207.55,207.85,207.55,207.8,207.5,1301.0
2010-02-16 09:05:00,20100216,905,207.8,208.15,207.8,208.05,207.5,3870.0


## Set State info • scaler

In [4]:
# target_values = ['open', 'high', 'low', 'close', 
#                 'vol','return_5', 'return_10', 'volume_change', 'ema_5', 
#                 'ema_20', 'ema_cross', 'cci', 'sar', '%K', 
#                 '%D', 'roc', 'rsi', 'obv', 'ad_line', 
#                 'bb_upper', 'bb_lower', 'bb_width', 'atr', 'gap_size']

target_values = ['close', 'high', 'low',
                'ema_5', 'ema_20', 'ema_cross',
                'rsi', '%K', '%D', 'cci',
                'atr', 'bb_width',
                'obv', 'volume_change']

scaler = RobustScaler()

## Set Env info 

In [5]:
train_valid_timestep = split_date_ranges_by_group(df.index, n_group=10, train_ratio=0.9) # [:50000]
train_valid_timestep 

[(('2010-02-16', '2011-01-07'), ('2011-01-10', '2011-02-16')),
 (('2011-02-17', '2012-01-10'), ('2012-01-11', '2012-02-16')),
 (('2012-02-17', '2013-01-14'), ('2013-01-15', '2013-02-19')),
 (('2013-02-20', '2014-01-17'), ('2014-01-20', '2014-02-25')),
 (('2014-02-26', '2015-01-26'), ('2015-01-27', '2015-03-05')),
 (('2015-03-06', '2016-01-28'), ('2016-01-29', '2016-03-09')),
 (('2016-03-10', '2017-02-03'), ('2017-02-06', '2017-03-13')),
 (('2017-03-14', '2018-02-12'), ('2018-02-13', '2018-03-22')),
 (('2018-03-23', '2019-02-25'), ('2019-02-26', '2019-04-02')),
 (('2019-04-03', '2020-02-28'), ('2020-03-02', '2020-04-03'))]

In [6]:
device = get_device() # torch.device("cpu")  

CONFIG.INPUT_DIM = len(target_values)
CONFIG.REWARD_FTN = risk_adjusted_pnl_reward
CONFIG.DONE_FTN = is_day_changed
CONFIG.TRAIN_VALID_TIMESTEP = train_valid_timestep
CONFIG.SCALER = scaler

In [7]:
state =  State(target_values)

model = MultiStatePV(
    input_dim=CONFIG.INPUT_DIM,
    agent_input_dim=CONFIG.AGENT_INPUT_DIM,
    embed_dim=CONFIG.EMBED_DIM,
    kernel_size=CONFIG.KERNEL_SIZE,
    stride=CONFIG.STRIDE,
    action_size=CONFIG.N_ACTIONS,
    device=device,
    agent_hidden_dim=CONFIG.AGENT_HIDDEN_DIM,
    agent_out_dim=CONFIG.AGENT_OUT_DIM,
    fusion_hidden_dim=CONFIG.FUSION_HIDDEN_DIM,
    num_layers=CONFIG.NUM_LAYERS,
    num_heads=CONFIG.NUM_HEADS,
    d_ff=CONFIG.D_FF,
    dropout=CONFIG.DROPOUT
)

agent = PPOAgent(
    action_space=CONFIG.ACTION_SPACE,
    n_actions=CONFIG.N_ACTIONS,
    model=model,
    value_coeff=CONFIG.VALUE_COEFF,
    entropy_coeff=CONFIG.ENTROPY_COEFF,
    clip_eps=CONFIG.CLIP_EPS,
    gamma=CONFIG.GAMMA,
    lr=CONFIG.LR,
    batch_size=CONFIG.BATCH_SIZE,
    epoch=CONFIG.EPOCH,
    device=device
)

In [None]:
trainer = EpisodicTrainer( # NonEpisodicTrainer
    df=df,
    env=FuturesEnvironment,
    train_valid_timestep=CONFIG.TRAIN_VALID_TIMESTEP,
    window_size=CONFIG.WINDOW_SIZE,
    state=state,
    reward_ftn=CONFIG.REWARD_FTN,
    done_ftn=CONFIG.DONE_FTN,
    start_budget=CONFIG.START_BUDGET,
    scaler=CONFIG.SCALER,
    position_cap=CONFIG.POSITION_CAP,
    agent=agent,
    model=model,
    optimizer=optim.Adam,
    device=CONFIG.DEVICE,
    n_steps=CONFIG.N_STEPS,
    ma_interval=CONFIG.MA_INTERVAL,
    save_interval=CONFIG.SAVE_INTERVAL,
    path=CONFIG.PATH
)

trainer.save(CONFIG)
trainer()

✅ 설정 저장 완료: /Users/ijimin/Documents/GitHub/YOLO-Futures/logs/test5/setting.txt
Robust Scaling Completed.
Robust Scaling Completed.
>>>> Train : ('2010-02-16', '2011-01-07')
[0|Train] Ep 001 | info:  | Maintained for: 100 | Reward:  -47 | Loss:  0.712 | Pos(short/hold/long): 39% / 6% / 55% | Strength: 5.05 |
[0|Train] Ep 002 | info:  | Maintained for: 200 | Reward:  -49 | Loss:  0.814 | Pos(short/hold/long): 48% / 5% / 47% | Strength: 4.82 |
[0|Train] Ep 003 | info: done | Maintained for: 266 | Reward:  -21 | Loss:  0.329 | Pos(short/hold/long): 33% / 9% / 58% | Strength: 4.82 |
[0|Train] Ep 004 | info:  | Maintained for: 366 | Reward:  -46 | Loss:  0.639 | Pos(short/hold/long): 43% / 6% / 51% | Strength: 5.50 |
[0|Train] Ep 005 | info:  | Maintained for: 466 | Reward:  -52 | Loss:  0.781 | Pos(short/hold/long): 42% / 2% / 56% | Strength: 5.71 |
[0|Train] Ep 006 | info: done | Maintained for: 532 | Reward:  -34 | Loss:  0.611 | Pos(short/hold/long): 41% / 11% / 48% | Strength: 4.70 |
[0

  _, ax = plt.subplots(figsize=(12,6))


✅ 시각화 저장 완료: /Users/ijimin/Documents/GitHub/YOLO-Futures/logs/test5/visualization/I21T
>>>>> reset the env : bankrupt occured. Go Back To Start.
[0|Train] Ep 212 | info:  | Maintained for: 100 | Reward:   -1 | Loss: -0.141 | Pos(short/hold/long): 21% / 43% / 36% | Strength: 1.99 |
[0|Train] Ep 213 | info:  | Maintained for: 200 | Reward:  -23 | Loss:  0.167 | Pos(short/hold/long): 27% / 30% / 43% | Strength: 2.45 |
[0|Train] Ep 214 | info: done | Maintained for: 266 | Reward:  -21 | Loss:  0.088 | Pos(short/hold/long): 26% / 30% / 44% | Strength: 2.55 |
[0|Train] Ep 215 | info:  | Maintained for: 366 | Reward:  -12 | Loss:  0.069 | Pos(short/hold/long): 23% / 41% / 36% | Strength: 1.84 |
[0|Train] Ep 216 | info:  | Maintained for: 466 | Reward:   -9 | Loss: -0.033 | Pos(short/hold/long): 19% / 48% / 33% | Strength: 1.72 |
[0|Train] Ep 217 | info: done | Maintained for: 532 | Reward:  -17 | Loss:  0.023 | Pos(short/hold/long): 18% / 41% / 41% | Strength: 2.26 |
[0|Train] Ep 218 | info: 

  plt.savefig(path)


✅ 시각화 저장 완료: /Users/ijimin/Documents/GitHub/YOLO-Futures/logs/test5/visualization/I28T
>>>>> reset the env : bankrupt occured. Go Back To Start.
[0|Train] Ep 650 | info:  | Maintained for: 100 | Reward:    9 | Loss: -0.189 | Pos(short/hold/long): 2% / 95% / 3% | Strength: 0.20 |
📁 1. Account Status (계좌 상태)
⏱️  Current Timestep   : 2010-02-16 12:19:00
💰  Available Balance  : 23,515,699 KRW
💼  Margin Deposit     : 6,623,138 KRW
💸  Transaction Costs  : 126,164 KRW
📉  Unrealized PnL     : 132,500 KRW
💵  Realized PnL       : 220,762 KRW
💰  Total Equity       : 23,648,199 KRW
⚖️  Avg Entry Price    : 210.26
💼  Current Position   : long (1)
📊  Execution Strength : 6/10
🔢  Total Trades       : 5
📁 2. Performance Metrics (성과 지표)
💵  Total Return       : -21.17%
🏆  Episode Win Rate   : 13.0% (30/230)
🎯  Trade Win Rate     : 75.0% (3/4)
📊  Sharpe Ratio       : 0.227
📉  Max Drawdown       : -37.1%
📁 3. Trade History (거래 기록)
✅  Completed Trades   : 4
💰  Episode PnL        : 220,762 KRW
💹  Last Trade