In [1]:
# ==== Extra knobs to mirror efom_runner_v4 ====
OVERRIDE_END_WITH_NOW = True
SAFETY_LAG_MINUTES    = 2
ALIGN_END_TO_INTERVAL = True
SHORT_FILL_LIMIT      = 60  # ~2 weeks of 07/19 stamps


In [2]:
import re
import pandas as pd
import numpy as np
from typing import Sequence, Optional

def _parse_interval(s: Optional[str]) -> pd.Timedelta:
    if not s: return pd.Timedelta(0)
    try:
        return pd.to_timedelta(s)
    except Exception:
        if s.endswith('m') and s[:-1].isdigit():
            return pd.Timedelta(minutes=int(s[:-1]))
        raise

def _override_end_date_now(cfg):
    if not OVERRIDE_END_WITH_NOW:
        return
    now_ts = pd.Timestamp.now(tz=cfg.tz) - pd.Timedelta(minutes=SAFETY_LAG_MINUTES)
    if (not cfg.recorded) and ALIGN_END_TO_INTERVAL:
        step = _parse_interval(cfg.interval)
        if step > pd.Timedelta(0):
            now_ts = pd.Timestamp(((now_ts.value // step.value) * step.value), tz=cfg.tz)
        else:
            now_ts = now_ts.floor('T')
    else:
        now_ts = now_ts.floor('T')
    cfg.end_date = now_ts.strftime('%Y-%m-%d %H:%M')
    print(f"[CONFIG] end_date overridden → {cfg.end_date} ({cfg.tz})")

def last_9_or_21(now_local: pd.Timestamp) -> pd.Timestamp:
    # returns the last 09:00 or 21:00 <= now (tz-naive)
    now_local = pd.Timestamp(now_local)
    if now_local.tzinfo is None:
        now_local = now_local.tz_localize('Asia/Seoul')
    else:
        now_local = now_local.tz_convert('Asia/Seoul')
    last = ((now_local - pd.Timedelta(hours=9)).floor('12H') + pd.Timedelta(hours=9))
    return last.tz_localize(None)

def _ampm_targets_from_X_range(X_12h: pd.DataFrame,
                               times: Sequence[str] = ("07:00","19:00")) -> pd.DataFrame:
    idx = X_12h.index
    d0 = pd.to_datetime(idx.min()).normalize()
    d1 = pd.to_datetime(idx.max()).normalize()
    days = pd.date_range(d0 - pd.Timedelta(days=1), d1 + pd.Timedelta(days=1), freq="D")
    targets = []
    for d in days:
        for t in times:
            hh, mm = map(int, t.split(":"))
            targets.append(d + pd.Timedelta(hours=hh, minutes=mm))
    return pd.DataFrame({"date": pd.to_datetime(targets)}).sort_values("date").reset_index(drop=True)

def _ensure_date_col(df: pd.DataFrame) -> pd.DataFrame:
    if 'date' in df.columns:
        d = pd.to_datetime(df['date'], errors='coerce')
    elif isinstance(df.index, pd.DatetimeIndex):
        d = pd.to_datetime(df.index)
        df = df.reset_index(drop=True)
    else:
        raise ValueError("Need a datetime index or 'date' column")
    df = df.copy()
    # keep local-naive wall time (if tz-aware)
    if getattr(d, 'dt', None) is not None and d.dt.tz is not None:
        d = d.dt.tz_convert('Asia/Seoul').dt.tz_localize(None)
    df['date'] = d
    return df

def _build_gas_from_X(X_12h: pd.DataFrame,
                      targets: pd.DataFrame,
                      tol: pd.Timedelta = pd.Timedelta(hours=3)) -> pd.DataFrame:
    gas_cols_x = ['Ethylene_gas','Ethane_gas','Propylene_gas','Propane_gas','n-Butane_gas','i-Butane_gas']
    have = [c for c in gas_cols_x if c in X_12h.columns]
    if not have:
        have = [c for c in ['Ethylene','Ethane','Propylene','Propane','n-Butane','i-Butane'] if c in X_12h.columns]
        rename = {}
    else:
        rename = {c: c.replace('_gas','') for c in have}

    g = (X_12h[have].sort_index()
                    .reset_index()
                    .rename(columns={X_12h.index.name or X_12h.columns[0]: 'ts'}))
    g['ts'] = pd.to_datetime(g['ts'], errors='coerce')

    out = pd.merge_asof(
        left=targets.sort_values('date'),
        right=g.sort_values('ts'),
        left_on='date', right_on='ts',
        direction='nearest', tolerance=tol
    ).drop(columns=['ts'])

    if rename:
        out = out.rename(columns=rename)
    canon = ['Ethylene','Ethane','Propylene','Propane','n-Butane','i-Butane']
    keep = [c for c in canon if c in out.columns]
    if keep:
        out[keep] = out[keep].ffill()
    return out

def _alias_C11_plus(df: pd.DataFrame) -> pd.DataFrame:
    out = df.copy()
    for fam in ['n-Paraffin','i-Paraffin','Olefin','Naphthene','Aromatic']:
        src = f"C11 {fam}"
        tgt = f"C11+ {fam}"
        if src in out.columns and tgt not in out.columns:
            out[tgt] = pd.to_numeric(out[src], errors='coerce')
    return out

def _unify_gas_columns_for_srto(df: pd.DataFrame, keep_canon_only: bool = True) -> pd.DataFrame:
    out = df.copy()
    canon = ['Ethylene','Ethane','Propylene','Propane','n-Butane','i-Butane']
    for c in canon:
        cg = f"{c}_gas"
        if (c not in out.columns) and (cg in out.columns):
            out[c] = pd.to_numeric(out[cg], errors='coerce')
        elif (c in out.columns) and (cg in out.columns):
            a = pd.to_numeric(out[c], errors='coerce')
            b = pd.to_numeric(out[cg], errors='coerce')
            out[c] = b.where(b.notna(), a)
    if keep_canon_only:
        drop = [f"{c}_gas" for c in canon if f"{c}_gas" in out.columns]
        if drop:
            out = out.drop(columns=drop)
    return out

def _sanitize_comp_for_srto(merged_lims: pd.DataFrame) -> pd.DataFrame:
    ml = merged_lims.copy()
    if 'date' in ml.columns:
        ml['date'] = pd.to_datetime(ml['date'], errors='coerce')
        ml = ml.sort_values('date').set_index('date')

    pona_fam  = ['Paraffins','Olefins','Naphthenes','Aromatics','n-Paraffin','i-Paraffin']
    canon_gas = ['Ethylene','Ethane','Propylene','Propane','n-Butane','i-Butane']
    pat = re.compile(r'^C(4|5|6|7|8|9|10|11\+?)\s+(n-?Paraffin|i-?Paraffin|Olefin|Naphthene|Aromatic)$', re.I)
    per_carbon = [c for c in ml.columns if pat.match(str(c))]

    cols = [c for c in pona_fam + canon_gas + per_carbon if c in ml.columns]
    if cols:
        ml[cols] = ml[cols].apply(pd.to_numeric, errors='coerce')
        ml[cols] = ml[cols].ffill(limit=SHORT_FILL_LIMIT).bfill(limit=SHORT_FILL_LIMIT)
        ml[cols] = ml[cols].fillna(0.0)

    ml['date'] = ml.index
    return ml.reset_index(drop=True)


In [5]:
from src.pipeline import DownloadConfig, AuthenticationMode


In [4]:
# JUPYTER EFOM RUNNER — CONFIG

from pathlib import Path
import pandas as pd
import numpy as np

# Core orchestrator + helpers
from src import main
from src.main import ensure_pi_download

# Data loading primitives (prioritize downloaded_csv)
from src.data_loading import DataPaths, ResampleConfig, DataPipeline

# SRTO / SPYRO plumbing
from src.srto_pipeline import SRTOConfig, RCOTSweepConfig, FeedConfig, SRTOPipeline
from src.srto_components import component_index, MW

# PI downloader types
from src.pipeline import DownloadConfig, AuthenticationMode

# ==== User knobs ====
MODE           = "online"            # 'historical' | 'closed_loop' | 'online'
DOWNLOAD_PI    = True                # pull from PI first (incremental)
OUT_DIR        = Path("prod_out/jupyter")
INPUT_DIR      = Path("input")
INTER_DIR      = Path("intermediate")
DOWNLOADED_CSV = "pi_firstrow.csv"   # relative to INTER_DIR

# if you want historical window for non-online tests:
START_STR      = "2024-09-01"
END_STR        = "2025-09-16"        # None for open end

# SRTO bits (adjust paths if needed)
SRTO_DLL = Path(r"C:\Program Files\Pyrotec\SRTO")
SPY7S = [
    r"01. GF_HYBRID MODE_SRTO7_NAPH.SPY7",
    r"04. LF_NAPH MODE_SRTO7.SPY7",
    r"07. GF_GAS MODE_SRTO7.SPY7",
]

# Prices cleanup rule
REPLACE_PRICE_ZEROS = True

# Set output base for artifacts
main.set_out_dir_base(OUT_DIR)


In [5]:

pi_csv = INTER_DIR / DOWNLOADED_CSV
pi_csv.parent.mkdir(parents=True, exist_ok=True)

if DOWNLOAD_PI or not pi_csv.exists():
    cfg = DownloadConfig(
        pi_server="172.17.21.117",
        auth_mode=AuthenticationMode.WINDOWS_AUTHENTICATION,
        pi_username="", pi_password="", pi_domain=None,
        tz="Asia/Seoul",
        start_date=(START_STR or "2024-09-01 00:00"),
        end_date=(END_STR),                  # will be overridden below
        interval="1m",
        chunk_days=7,
        recorded=False,
        sheet_name="python_import",
        column_name="tags",
        input_dir=str(INPUT_DIR),
        tags_excel=str(INPUT_DIR / "EFOM_input_data_tag_list.xlsx"),
        out_csv=str(pi_csv),
        out_parquet="",
        incremental=True,
        override_end_with_now=True, safety_lag_minutes=2, align_end_to_interval=True,
    )
    _override_end_date_now(cfg)
    ensure_pi_download(cfg)


[CONFIG] end_date overridden → 2025-09-22 14:52 (Asia/Seoul)
[CONFIG] start_date=2024-09-01 00:00:00, end_date=NOW (override), interval=1m, recording=False, incremental=True
[CONFIG] end_date overridden → 2025-09-22 14:52 (Asia/Seoul)
2024-09-01 00:00:00+09:00 2025-09-22 14:52:00+09:00
DownloadConfig(pi_server='172.17.21.117', auth_mode=<AuthenticationMode.WINDOWS_AUTHENTICATION: 0>, pi_username='', pi_password='', pi_domain=None, tz='Asia/Seoul', start_date='2024-09-01 00:00:00', end_date='2025-09-22 14:52', interval='1m', chunk_days=7, recorded=False, sheet_name='python_import', column_name='tags', sheet_name_config='pipeline_config', column_name_tag_config='tags', column_name_value_config='values', input_dir='input', tags_excel='input\\EFOM_input_data_tag_list.xlsx', out_csv='intermediate\\pi_firstrow.csv', out_parquet='', incremental=True, override_end_with_now=True, safety_lag_minutes=2, align_end_to_interval=True)
[TAGS] 172 tags read from first row of input\EFOM_input_data_tag_l



[SAVE] CSV  → intermediate\pi_firstrow.csv | shape=(556732, 172)


In [6]:

from pathlib import Path
import pandas as pd
import numpy as np

# Core orchestrator + helpers
from src import main
from src.main import ensure_pi_download

# Data loading primitives (prioritize downloaded_csv)
from src.data_loading import DataPaths, ResampleConfig, DataPipeline

# SRTO / SPYRO plumbing
from src.srto_pipeline import SRTOConfig, RCOTSweepConfig, FeedConfig, SRTOPipeline
from src.srto_components import component_index, MW

# PI downloader types
from src.pipeline import DownloadConfig, AuthenticationMode


In [7]:

# Prices cleanup rule
REPLACE_PRICE_ZEROS = True


In [11]:
paths = DataPaths(
    input_dir=INPUT_DIR,
    inter_dir=INTER_DIR,
    downloaded_csv=DOWNLOADED_CSV,
    input_excel="EFOM_input_data_tag_list.xlsx",
    prod_excel="1. 생산량 Data_'23.07~'25.05_R1_송부용.xlsx",
    furn_excel="2. Furnace Data_'23.07~'25.05_R0.xlsx",
    nap_excel="Nap Feed 조성분석값.xlsx",
    gas_excel="Gas Feed 조성분석값.xlsx",
    recycle_excel="6. 에탄 및 프로판 데이터.xlsx",
    price_csv="price.csv",
    util_excel="#1ECU 유틸리티사용량일별데이터.xlsx",
    fresh_excel="7. Gas Furnace Feed Data_'23.07~'25.05_r2.xlsx",
    prod_pkl="df_production_v4.pkl",
    furn_pkl="furnace.pkl",
    nap_pkl="df_feed_naptha.pkl",
    gas_pkl="df_feed_gas.pkl",
    fresh_pkl="df_feed_fresh_v3.pkl",
    rec_pkl="df_recycle.pkl",
    prod_header=2, furn_header=2, nap_header=1, gas_header=1, rec_header=4, fresh_header=3,
)
cfg = ResampleConfig(hour_freq='h', win12_freq='12h', win12_offset='9h')

feature_rename = {
    'Naph': 'Naphtha_chamber1', 'T-DAO': 'T-DAO_chamber1', 'DS': 'DS_chamber1',
    'RCOT Ave.': 'RCOT_chamber1', 'Excess O2': "Excess O2_chamber1",
    'Naph.1': 'Naphtha_chamber2', 'T-DAO.1': 'T-DAO_chamber2','DS.1': 'DS_chamber2',
    'RCOT Ave..1': 'RCOT_chamber2', 'Excess O2.1': "Excess O2_chamber2",
    'Naph.2': 'Naphtha_chamber3', 'T-DAO.2': 'T-DAO_chamber3','DS.2': 'DS_chamber3',
    'RCOT Ave..2': 'RCOT_chamber3', 'Excess O2.2': "Excess O2_chamber3",
    'Naph.3': 'Naphtha_chamber4', 'GAS': 'Gas Feed_chamber4','DS.3': 'DS_chamber4',
    'RCOT Ave..3': 'RCOT_chamber4', 'Excess O2.3': "Excess O2_chamber4",
    'Naph.4': 'Naphtha_chamber5', 'GAS.1': 'Gas Feed_chamber5','DS.4': 'DS_chamber5',
    'RCOT Ave..4': 'RCOT_chamber5', 'Excess O2.4': "Excess O2_chamber5",
    'Naph.5': 'Naphtha_chamber6', 'GAS.2': 'Gas Feed_chamber6','DS.5': 'DS_chamber6',
    'RCOT Ave..5': 'RCOT_chamber6', 'Excess O2.5': "Excess O2_chamber6",
}
target_rename = { 'Unnamed: 36':'steam','ECU F/G':'fuel_gas','ECU Elec..1':'electricity' }

dp = DataPipeline(paths, cfg).run(feature_rename, target_rename)
art = dp.artifacts()
X_12h, Y_12h, prices_df = art['X_12h'], art['Y_12h'], art['price_df']

if REPLACE_PRICE_ZEROS:
    prices_df = prices_df.replace(0, pd.NA).ffill()
prices_df = prices_df.loc[~prices_df.index.duplicated(keep='first')]

print("Loaded:", X_12h.shape, Y_12h.shape, prices_df.shape)


  prices_df = prices_df.replace(0, pd.NA).ffill()


Loaded: (774, 98) (774, 9) (387, 13)


In [12]:
X_12h

Unnamed: 0_level_0,Ethylene_prod,Propylene_prod,MixedC4_prod,RPG_prod,PFO_prod,C2Recycle,C3Recycle,Hydrogen_prod,Tail_Gas_prod,Ethane_prod,...,i-Butane_gas,RCOT_naphtha_chamber4,RCOT_gas_chamber4,RCOT_naphtha_chamber5,RCOT_gas_chamber5,RCOT_naphtha_chamber6,RCOT_gas_chamber6,FreshFeed_C3 LPG,FreshFeed_MX Offgas,feed_qty
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-09-01 09:00:00,66.961969,31.875999,18.915362,33.082066,5.525592,17.748620,10.451746,1.932102,33.937773,17.748620,...,1.921667,880.027119,880.020830,0.0,28.965482,0.0,880.026279,45.032904,15.960949,60.993853
2024-09-01 21:00:00,67.015402,31.322504,19.002719,32.443917,5.491209,17.645969,10.903391,1.930902,34.434181,17.645969,...,1.921667,879.992959,879.987157,0.0,28.254550,0.0,879.991018,45.212228,15.932669,61.144897
2024-09-02 09:00:00,69.300842,32.614699,19.870455,34.842525,5.197984,18.398275,10.941583,1.971435,35.374823,18.398275,...,1.973750,879.442822,879.372944,0.0,27.293488,0.0,879.849443,43.909099,15.738533,59.647631
2024-09-02 21:00:00,74.605789,35.845847,21.142220,37.968441,4.312466,19.974149,10.643060,2.053634,38.222157,19.974149,...,1.973750,880.012074,879.971904,0.0,95.850268,0.0,879.981582,41.988303,15.891017,57.879320
2024-09-03 09:00:00,74.808790,35.416598,21.110860,36.018180,6.923870,20.133305,9.999946,2.052681,38.259773,20.133305,...,1.915000,879.985571,879.980632,0.0,669.419350,0.0,879.978009,42.274970,15.691889,57.966860
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-09-20 21:00:00,80.189162,41.233319,22.422373,40.293263,9.210038,19.499534,15.651687,2.305761,39.389699,19.499534,...,1.589583,0.000000,885.016104,0.0,884.983174,0.0,0.000000,60.941494,17.859549,78.801043
2025-09-21 09:00:00,79.254790,41.400782,22.642964,44.251763,8.512621,18.254823,16.376972,2.242943,39.861093,18.254823,...,1.582500,0.000000,884.968799,0.0,884.957212,0.0,0.000000,64.448326,15.381159,79.829485
2025-09-21 21:00:00,80.346128,40.415717,22.585680,39.392188,8.033801,18.368733,16.985584,2.178664,39.696486,18.368733,...,1.582500,0.000000,884.933871,0.0,884.897974,0.0,0.000000,63.201942,15.068631,78.270573
2025-09-22 09:00:00,79.669902,38.986834,22.491383,41.006816,8.843282,18.093634,16.541116,2.194195,39.153700,18.093634,...,1.573323,0.000000,885.020712,0.0,885.015224,0.0,0.000000,62.000936,15.967714,77.968650


In [9]:
X_12h.columns

Index(['Ethylene_prod', 'Propylene_prod', 'MixedC4_prod', 'RPG_prod',
       'PFO_prod', 'C2Recycle', 'C3Recycle', 'Hydrogen_prod', 'Tail_Gas_prod',
       'Ethane_prod', 'Propane_prod', 'T-DAO_chamber1', 'Naphtha_chamber1',
       'DS_chamber1', 'Conv' O2_chamber1', 'Stack 온도_chamber1',
       'RCOT_chamber1', 'SPS 생산_chamber1', 'Fuel Duty_chamber1',
       'T-DAO_chamber2', 'Naphtha_chamber2', 'DS_chamber2',
       'Conv' O2_chamber2', 'Stack 온도_chamber2', 'RCOT_chamber2',
       'SPS 생산_chamber2', 'Fuel Duty_chamber2', 'T-DAO_chamber3',
       'Naphtha_chamber3', 'DS_chamber3', 'Conv' O2_chamber3',
       'Stack 온도_chamber3', 'RCOT_chamber3', 'SPS 생산_chamber3',
       'Fuel Duty_chamber3', 'Gas Feed_chamber4', 'Naphtha_chamber4',
       'DS_chamber4', 'Conv' O2_chamber4', 'Stack 온도_chamber4',
       'RCOT #1_naphtha_chamber4', 'RCOT #3_naphtha_chamber4',
       'RCOT #5_naphtha_chamber4', 'RCOT #7_naphtha_chamber4',
       'RCOT #2_gas_chamber4', 'RCOT #4_gas_chamber4', 'RCOT #6_gas

In [9]:
pi_csv = INTER_DIR / DOWNLOADED_CSV

In [17]:
X_12h

Unnamed: 0_level_0,Ethylene_prod,Propylene_prod,MixedC4_prod,RPG_prod,PFO_prod,C2Recycle,C3Recycle,Hydrogen_prod,Tail_Gas_prod,Ethane_prod,...,i-Butane_gas,RCOT_naphtha_chamber4,RCOT_gas_chamber4,RCOT_naphtha_chamber5,RCOT_gas_chamber5,RCOT_naphtha_chamber6,RCOT_gas_chamber6,FreshFeed_C3 LPG,FreshFeed_MX Offgas,feed_qty
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-09-01 09:00:00,66.961969,31.875999,18.915362,33.082066,5.525592,17.748620,10.451746,1.932102,33.937773,17.748620,...,1.921667,880.027119,880.020830,0.0,28.965482,0.0,880.026279,45.032904,15.960949,60.993853
2024-09-01 21:00:00,67.015402,31.322504,19.002719,32.443917,5.491209,17.645969,10.903391,1.930902,34.434181,17.645969,...,1.921667,879.992959,879.987157,0.0,28.254550,0.0,879.991018,45.212228,15.932669,61.144897
2024-09-02 09:00:00,69.300842,32.614699,19.870455,34.842525,5.197984,18.398275,10.941583,1.971435,35.374823,18.398275,...,1.973750,879.442822,879.372944,0.0,27.293488,0.0,879.849443,43.909099,15.738533,59.647631
2024-09-02 21:00:00,74.605789,35.845847,21.142220,37.968441,4.312466,19.974149,10.643060,2.053634,38.222157,19.974149,...,1.973750,880.012074,879.971904,0.0,95.850268,0.0,879.981582,41.988303,15.891017,57.879320
2024-09-03 09:00:00,74.808790,35.416598,21.110860,36.018180,6.923870,20.133305,9.999946,2.052681,38.259773,20.133305,...,1.915000,879.985571,879.980632,0.0,669.419350,0.0,879.978009,42.274970,15.691889,57.966860
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-09-20 21:00:00,80.189162,41.233319,22.422373,40.293263,9.210038,19.499534,15.651687,2.305761,39.389699,19.499534,...,1.589583,0.000000,885.016104,0.0,884.983174,0.0,0.000000,60.941494,17.859549,78.801043
2025-09-21 09:00:00,79.254790,41.400782,22.642964,44.251763,8.512621,18.254823,16.376972,2.242943,39.861093,18.254823,...,1.582500,0.000000,884.968799,0.0,884.957212,0.0,0.000000,64.448326,15.381159,79.829485
2025-09-21 21:00:00,80.346128,40.415717,22.585680,39.392188,8.033801,18.368733,16.985584,2.178664,39.696486,18.368733,...,1.582500,0.000000,884.933871,0.0,884.897974,0.0,0.000000,63.201942,15.068631,78.270573
2025-09-22 09:00:00,79.669902,38.986834,22.491383,41.006816,8.843282,18.093634,16.541116,2.194195,39.153700,18.093634,...,1.578834,0.000000,885.020712,0.0,885.015224,0.0,0.000000,62.000936,15.967714,77.968650


In [None]:


EPS = 1e-9          # treat absolute values ≤ EPS as zero
SHORT_FILL_LIMIT = 2  # or whatever you use

canon = ['Ethylene','Ethane','Propylene','Propane','n-Butane','i-Butane']

# 1) Merge PI → lims_full (as you already do)

# 2) Start from your current lims creation
lims = lims_full.drop(columns=drops).rename(columns=renames).copy()
lims = _ensure_date_col(lims).sort_values('date').reset_index(drop=True)

# 3) Merge gas_map from X_12h (OK), but treat all-zero rows as missing
lims = lims.merge(gas_map, on='date', how='left').sort_values('date').reset_index(drop=True)
for c in canon:
    if c in lims.columns:
        lims[c] = pd.to_numeric(lims[c], errors='coerce')
present = [c for c in canon if c in lims.columns]
if present:
    zr = (lims[present].abs().sum(axis=1) <= EPS)  # ← treat all-zero gas vector as missing
    lims.loc[zr, present] = np.nan
    lims[present] = lims[present].ffill()

# 4) Backfill from merged_lims2 (Excel) — keep your loading but also zero→NaN
merged_lims2 = load_feed_data(
    nap_path=paths.input_dir / "복사본 (2024-25) ECU 투입 납사 세부성상-wt%.xlsx",
    gas_path=paths.input_dir / "Gas Feed 조성분석값.xlsx", header=1
)
merged_lims2['date'] = pd.to_datetime(merged_lims2['date'], errors='coerce')
merged_lims2 = merged_lims2.dropna(subset=['date']).sort_values('date')
gcan = [c for c in canon if c in merged_lims2.columns]
if gcan:
    merged_lims2[gcan] = merged_lims2[gcan].apply(pd.to_numeric, errors='coerce')
    zr2 = (merged_lims2[gcan].abs().sum(axis=1) <= EPS)
    merged_lims2.loc[zr2, gcan] = np.nan
    merged_lims2[gcan] = merged_lims2[gcan].ffill().bfill()

merged_lims2 = _alias_C11_plus(merged_lims2)

# 5) As-of join selected columns from merged_lims2 (your code skeleton)
pona_fam = ['Paraffins','n-Paraffin','i-Paraffin','Olefins','Naphthenes','Aromatics']
pat = re.compile(r'^C(4|5|6|7|8|9|10|11\+|12\+)\s+(n-?Paraffin|i-?Paraffin|Olefin|Naphthene|Aromatic)$', re.I)
per_carbon_cols = [c for c in merged_lims2.columns if pat.match(str(c))]
src_cols = [c for c in (pona_fam + gcan + per_carbon_cols) if c in merged_lims2.columns]

if src_cols:
    ml2_map = pd.merge_asof(
        left=lims[['date']].sort_values('date'),
        right=merged_lims2[['date'] + src_cols].sort_values('date'),
        left_on='date', right_on='date',
        direction='backward', tolerance=pd.Timedelta(days=1)
    )

    # 6) Coalesce PONA and per-carbon directly (keep your fill behavior)
    fill_cols = [c for c in (pona_fam + per_carbon_cols) if c in ml2_map.columns]
    for c in fill_cols:
        if c not in lims.columns:
            lims[c] = np.nan
        lims[c] = pd.to_numeric(lims[c], errors='coerce')
        lims[c] = lims[c].where(lims[c].notna(), pd.to_numeric(ml2_map[c], errors='coerce'))
    if fill_cols:
        lims[fill_cols] = lims[fill_cols].ffill(limit=SHORT_FILL_LIMIT).bfill(limit=SHORT_FILL_LIMIT)

    # 7) ***Coalesce GAS: prefer non-zero Excel over zero/NaN canonical***
    for c in canon:
        # make sure both sides exist numerically
        if c not in lims.columns:
            lims[c] = np.nan
        lims[c] = pd.to_numeric(lims[c], errors='coerce')

        src_excel = pd.to_numeric(ml2_map.get(c), errors='coerce') if c in ml2_map.columns else None
        if src_excel is not None:
            # treat canonical near-zero as missing so Excel can fill
            need = lims[c].isna() | (lims[c].abs() <= EPS)
            # only take Excel values that are non-zero / finite
            valid_excel = src_excel.where(src_excel.abs() > EPS)
            lims.loc[need, c] = valid_excel.loc[need]

    # After coalescing, still smooth a little (short fills only)
    lims[gcan] = lims[gcan].ffill(limit=SHORT_FILL_LIMIT).bfill(limit=SHORT_FILL_LIMIT)

# 8) If *_x / *_y leaked in from earlier merges, unify and drop them
for c in canon:
    for alt in (f'{c}_y', f'{c}_x', f'{c}_gas'):
        if alt in lims.columns:
            altv = pd.to_numeric(lims[alt], errors='coerce')
            need = lims[c].isna() | (lims[c].abs() <= EPS)
            lims.loc[need, c] = altv.loc[need].where(altv.abs() > EPS)
            lims.drop(columns=[alt], inplace=True, errors='ignore')

# 9) SRTO wants only canonical gas columns – keep them and sanitize
lims = _ensure_date_col(lims).sort_values('date').reset_index(drop=True)
# Optional final sanity: rows with all-zero gas → NaN to force upstream checks
if set(canon).issubset(lims.columns):
    zr_final = (lims[canon].abs().sum(axis=1) <= EPS)
    lims.loc[zr_final, canon] = np.nan

# 10) Final output for the rest of the flow
merged_lims = _sanitize_comp_for_srto(lims)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  feed_gas['date'] = pd.to_datetime(feed_gas['date'], errors='coerce')


In [11]:
sel_spy7 = [(p if Path(p).is_absolute() else (SRTO_DLL / p)) for p in (SPY7S or [])]
srto_config  = SRTOConfig(SRTO_DLL, sel_spy7, component_index, MW)
sweep_config = RCOTSweepConfig(rcot_min=790.0, rcot_max=900.0, rcot_step=2.0,
                               chunk_size=10, n_jobs=6, save_checkpoints=True)

canonical_gas = ['Ethylene','Ethane','Propylene','Propane','n-Butane','i-Butane']
gas_cols = [c for c in canonical_gas if c in X_12h.columns] or canonical_gas
feed_config  = FeedConfig(gas_components=gas_cols)
pipeline = SRTOPipeline(srto_config, sweep_config, feed_config)

_SHORT_TO_SRTO = {
    'Ethylene':'Ethylene','Propylene':'Propylene','MixedC4':'MixedC4','RPG':'RPG',
    'Ethane':'Ethane','Propane':'Propane',
    'Fuel_Gas':'Fuel_Gas','Fuel Gas':'Fuel_Gas','FG':'Fuel_Gas','FuelGas':'Fuel_Gas',
    'Tail Gas':'Tail_Gas', 'Tail_Gas':'Tail_Gas'
}

class _SpyroMemo:
    def __init__(self, fn, key_cols=None, decimals=4, maxsize=200000):
        self.fn = fn; self.key_cols = tuple(key_cols) if key_cols is not None else None
        self.dec = decimals; self.cache = {}; self.maxsize = maxsize
    def _select_cols(self, row):
        if self.key_cols is None:
            return tuple(c for c in row.index if c.startswith('RCOT') or c.startswith('Naphtha_chamber') or c.startswith('Gas Feed_chamber'))
        return self.key_cols
    def _to_num(self, x):
        try: v = float(x)
        except Exception: v = 0.0
        if v != v: v = 0.0
        return round(v, self.dec)
    def _sig(self, row, short_key):
        cols = self._select_cols(row)
        vals = tuple(self._to_num(row.get(c, 0.0)) for c in cols)
        return (short_key, cols, vals)
    def __call__(self, row, short_key, ctx=None):
        k = self._sig(row, short_key); v = self.cache.get(k)
        if v is not None: return v
        out = self.fn(row, short_key, ctx)
        if len(self.cache) < self.maxsize: self.cache[k] = out
        return out

def _make_spyro_fn(pipeline, merged_lims):
    def _spyro_row(row_like: pd.Series, short_key: str, ctx=None) -> float:
        ts = getattr(row_like, 'name', None)
        if ts is None:
            return 0.0
        try:
            comp_row = merged_lims.loc[merged_lims['date'] <= ts].iloc[-1]
        except Exception:
            comp_row = merged_lims.iloc[0]
        spot = pipeline.predict_spot_plant(row_like, comp_row, feed_thr=0.1)
        if spot.get('status') != 'ok':
            return 0.0
        key = _SHORT_TO_SRTO.get(short_key, short_key)
        return float(spot['totals_tph'].get(key, 0.0))
    return _SpyroMemo(_spyro_row)

spyro_fn = _make_spyro_fn(pipeline, merged_lims)
print("SRTO + Spyro ready. Gas cols for SRTO:", gas_cols)


SRTO + Spyro ready. Gas cols for SRTO: ['Ethylene', 'Ethane', 'Propylene', 'Propane', 'n-Butane', 'i-Butane']


In [13]:
if MODE == 'online':
    now_local = pd.Timestamp.now(tz='Asia/Seoul')
    target = last_9_or_21(now_local)
    idx = X_12h.index[X_12h.index <= target]
    if len(idx) == 0:
        raise RuntimeError(f"No X_12h stamps <= {target} to run online.")
    latest = idx.max()
    assert latest.hour in (9, 21), f"Got {latest}, expected 09:00 or 21:00"
    start = latest.normalize()
    end   = start
    online_opts = dict(online_latest_only=True)
else:
    start = pd.to_datetime(START_STR)
    end   = (pd.to_datetime(END_STR) if END_STR else None)
    online_opts = {}

act_hook = (main.default_actuation_logger_factory(OUT_DIR) if MODE == 'online' else None)

main.run_production(
    X_12h=X_12h, Y_12h=Y_12h, merged_lims=merged_lims, pipeline=pipeline,
    prices_df=prices_df, total_spyro_yield_for_now=spyro_fn,
    start=start, end=end, mode=MODE,
    closed_loop_opts=dict(
        apply_timing='next_day',
        hold_policy='hold_until_next',
        ml_train_mode='historical',
        gp_train_mode='historical',
        cache_tag=('' if MODE=='historical' else '_sim'),
        **online_opts,
    ),
    act_hook=act_hook,
)


  last = ((now_local - pd.Timedelta(hours=9)).floor('12H') + pd.Timedelta(hours=9))



🕒 2025-09-22 21:00 — processing stamp
🔧 ML baseline yields (t+1): {'Ethylene_prod_t+1': 79.703, 'Propylene_prod_t+1': 39.816, 'MixedC4_prod_t+1': 21.904, 'RPG_prod_t+1': 41.469, 'Ethane_prod_t+1': 19.942, 'Propane_prod_t+1': 16.025, 'Hydrogen_prod_t+1': 2.349, 'Tail_Gas_prod_t+1': 39.211}
✅ Fidelity alpha search — done
✅ Fidelity checks — passed
🚀 Optimization — started
✅ Optimization — done

=== MULTI-KNOB RESULT ===
Status: ok
ΔMargin $/h: 291.13
RCOT* (per chamber):
  RCOT_chamber1              844.13 →  849.13  (Δ +5.00)
  RCOT_chamber3              844.14 →  849.14  (Δ +5.00)
  RCOT_gas_chamber4          885.12 →  885.12  (Δ +0.00)
  RCOT_gas_chamber5          885.13 →  885.13  (Δ +0.00)

=== PRICE SNAPSHOT @ 2025-09-22 21:00:00 ===
{'Ethylene': 783.9918212890625, 'Propylene': 740.0, 'Mixed C4': 732.8759765625, 'RPG': 600.6470336914062, 'Hydrogen': 1735.5777587890625, 'Tail Gas': 628.0305786132812, 'Fuel Gas': 587.0800170898438, 'PN': 606.75, 'Gas Feed': 0.0, 'LPG': 574.259948730

In [14]:
X_12h

Unnamed: 0_level_0,Ethylene_prod,Propylene_prod,MixedC4_prod,RPG_prod,PFO_prod,C2Recycle,C3Recycle,Hydrogen_prod,Tail_Gas_prod,Ethane_prod,...,i-Butane_gas,RCOT_naphtha_chamber4,RCOT_gas_chamber4,RCOT_naphtha_chamber5,RCOT_gas_chamber5,RCOT_naphtha_chamber6,RCOT_gas_chamber6,FreshFeed_C3 LPG,FreshFeed_MX Offgas,feed_qty
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-09-01 09:00:00,66.961969,31.875999,18.915362,33.082066,5.525592,17.748620,10.451746,1.932102,33.937773,17.748620,...,1.921667,880.027119,880.020830,0.0,28.965482,0.0,880.026279,45.032904,15.960949,60.993853
2024-09-01 21:00:00,67.015402,31.322504,19.002719,32.443917,5.491209,17.645969,10.903391,1.930902,34.434181,17.645969,...,1.921667,879.992959,879.987157,0.0,28.254550,0.0,879.991018,45.212228,15.932669,61.144897
2024-09-02 09:00:00,69.300842,32.614699,19.870455,34.842525,5.197984,18.398275,10.941583,1.971435,35.374823,18.398275,...,1.973750,879.442822,879.372944,0.0,27.293488,0.0,879.849443,43.909099,15.738533,59.647631
2024-09-02 21:00:00,74.605789,35.845847,21.142220,37.968441,4.312466,19.974149,10.643060,2.053634,38.222157,19.974149,...,1.973750,880.012074,879.971904,0.0,95.850268,0.0,879.981582,41.988303,15.891017,57.879320
2024-09-03 09:00:00,74.808790,35.416598,21.110860,36.018180,6.923870,20.133305,9.999946,2.052681,38.259773,20.133305,...,1.915000,879.985571,879.980632,0.0,669.419350,0.0,879.978009,42.274970,15.691889,57.966860
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-09-20 21:00:00,80.189162,41.233319,22.422373,40.293263,9.210038,19.499534,15.651687,2.305761,39.389699,19.499534,...,1.589583,0.000000,885.016104,0.0,884.983174,0.0,0.000000,60.941494,17.859549,78.801043
2025-09-21 09:00:00,79.254790,41.400782,22.642964,44.251763,8.512621,18.254823,16.376972,2.242943,39.861093,18.254823,...,1.582500,0.000000,884.968799,0.0,884.957212,0.0,0.000000,64.448326,15.381159,79.829485
2025-09-21 21:00:00,80.346128,40.415717,22.585680,39.392188,8.033801,18.368733,16.985584,2.178664,39.696486,18.368733,...,1.582500,0.000000,884.933871,0.0,884.897974,0.0,0.000000,63.201942,15.068631,78.270573
2025-09-22 09:00:00,79.669902,38.986834,22.491383,41.006816,8.843282,18.093634,16.541116,2.194195,39.153700,18.093634,...,1.578834,0.000000,885.020712,0.0,885.015224,0.0,0.000000,62.000936,15.967714,77.968650


In [None]:
# === ML prediction backtest (90D ending at the chosen online stamp) ===
TARGET_COLS = [c for c in main.TARGET_COLS if c in Y_12h.columns]
MIN_TR_ROWS = main.MIN_TR_ROWS

def ml_prediction_check(
    *,
    X_12h: pd.DataFrame,
    Y_12h: pd.DataFrame,
    start: pd.Timestamp | None,
    end: pd.Timestamp | None,
    lookback: pd.Timedelta,
    target_cols: list[str],
    min_tr_rows: int,
    cache_tag: str = "_eval"
) -> tuple[pd.DataFrame, pd.DataFrame]:
    idx = pd.DatetimeIndex(X_12h.index)
    if idx.tz is not None:
        idx = idx.tz_convert('Asia/Seoul').tz_localize(None)
        X_12h = X_12h.copy(); X_12h.index = idx
        if Y_12h.index.tz is not None:
            Y_12h = Y_12h.copy()
            Y_12h.index = Y_12h.index.tz_convert('Asia/Seoul').tz_localize(None)

    end = pd.Timestamp(end) if end is not None else X_12h.index.max()
    start = pd.Timestamp(start) if start is not None else (end - pd.Timedelta(days=90))
    if start > end:
        start, end = end - pd.Timedelta(days=90), end

    stamps = X_12h.index[(X_12h.index >= start) & (X_12h.index <= end)].sort_values()
    if len(stamps) == 0:
        return pd.DataFrame(), pd.DataFrame()
    tcols = [c for c in target_cols if c in Y_12h.columns]
    if not tcols:
        return pd.DataFrame(), pd.DataFrame()

    preds_all = main.ensure_ml_preds_for(
        stamps=stamps,
        Xsrc=X_12h,
        Ysrc=Y_12h,
        lookback=lookback,
        target_cols=tcols,
        mode="historical",
        train_mode="historical",
        cache_tag=cache_tag,
        Y_sim_state=None,
    )
    y_pred = preds_all.reindex(stamps)[tcols]
    y_true = Y_12h.reindex(stamps)[tcols]

    rows = []
    eps = 1e-9
    for c in tcols:
        yt = pd.to_numeric(y_true[c], errors="coerce")
        yp = pd.to_numeric(y_pred[c], errors="coerce")
        mask = yt.notna() & yp.notna()
        n = int(mask.sum())
        if n == 0:
            rows.append(dict(target=c, n=0, rmse=np.nan, mae=np.nan, mape_pct=np.nan, r2=np.nan, bias=np.nan, corr=np.nan))
            continue
        e = (yp[mask] - yt[mask]).to_numpy(float)
        ae = np.abs(e)
        rmse = float(np.sqrt(np.mean(e**2)))
        mae  = float(np.mean(ae))
        m_mask = np.abs(yt[mask].to_numpy(float)) > eps
        mape = float(np.mean(ae[m_mask] / np.abs(yt[mask].to_numpy(float)[m_mask]))*100.0) if m_mask.any() else np.nan
        bias = float(np.mean(e))
        yt_arr = yt[mask].to_numpy(float)
        ss_res = float(np.sum(e**2))
        ss_tot = float(np.sum((yt_arr - yt_arr.mean())**2))
        r2 = float(1.0 - ss_res/ss_tot) if ss_tot > eps else np.nan
        corr = float(np.corrcoef(yt_arr, yp[mask].to_numpy(float))[0,1]) if n > 1 else np.nan
        rows.append(dict(target=c, n=n, rmse=rmse, mae=mae, mape_pct=mape, r2=r2, bias=bias, corr=corr))

    metrics = pd.DataFrame(rows).set_index("target").sort_index()
    return y_pred, metrics

# Run the backtest
end_eval = latest  # keep this; don't overwrite with undefined END_EVAL_STR
start_eval = end_eval - pd.Timedelta(days=90)

preds, metrics = ml_prediction_check(
    X_12h=X_12h,
    Y_12h=Y_12h,
    start=start_eval,
    end=end_eval,
    lookback=pd.Timedelta("180D"),
    target_cols=TARGET_COLS,
    min_tr_rows=MIN_TR_ROWS,
    cache_tag="_eval"
)

print("\n=== ML metrics (90D ending", end_eval, ") ===")
print(metrics)

OUT_DIR.mkdir(parents=True, exist_ok=True)
metrics.to_excel(OUT_DIR / "metrics.xlsx")
preds.to_csv(OUT_DIR / "preds_eval.csv")

def _extract_mape_for_push(metrics: pd.DataFrame) -> dict:
    if metrics is None or metrics.empty:
        return {}
    def get(tgt):
        try:
            v = metrics.loc[tgt, 'mape_pct']
            return float(v) if pd.notna(v) else None
        except Exception:
            return None
    return {
        'mape_ethy': get('Ethylene_prod_t+1'),
        'mape_prop': get('Propylene_prod_t+1'),
        'mape_mc4':  get('MixedC4_prod_t+1'),
        'mape_rpg':  get('RPG_prod_t+1'),
    }

TAG_MAP = {
    "rcot1": "M10_EFOM_RCOT1",
    "rcot2": "M10_EFOM_RCOT2",
    "rcot3": "M10_EFOM_RCOT3",
    "rcot4_nap": "M10_EFOM_RCOT4_NAP",
    "rcot4_gas": "M10_EFOM_RCOT4_GAS",
    "rcot5_nap": "M10_EFOM_RCOT5_NAP",
    "rcot5_gas": "M10_EFOM_RCOT5_GAS",
    "rcot6_nap": "M10_EFOM_RCOT6_NAP",
    "rcot6_gas": "M10_EFOM_RCOT6_GAS",
    "eth_prod": "M10_EFOM_ETH_PROD",
    "prop_prod": "M10_EFOM_PROP_PROD",
    "mc4_prod": "M10_EFOM_MC4_PROD",
    "rpg_prod": "M10_EFOM_RPG_PROD",
    "margin_hourly": "M10_EFOM_MARGIN_HOURLY",
    "performance": "M10_EFOM_PERFORMANCE",
    "timestamp_str": "M10_EFOM_TIMESTAMP",
    "mape_ethy": "M10_EFOM_MAPE_ETHY",
    "mape_prop": "M10_EFOM_MAPE_PROP",
    "mape_mc4": "M10_EFOM_MAPE_MC4",
    "mape_rpg": "M10_EFOM_MAPE_RPG",
}

def _latest_recs_from_outdir(out_dir: Path):
    cand = [out_dir / "rcot_recommendations_sim.csv", out_dir / "rcot_recommendations.csv"]
    if not any(p.exists() for p in cand):
        wild = sorted(out_dir.glob("rcot_recommendations*.csv"))
        if wild:
            cand.append(wild[-1])
    rec_path = next((p for p in cand if p.exists()), None)
    if not rec_path:
        return None, {}, {}, {}
    recs = pd.read_csv(rec_path, parse_dates=["timestamp"]).sort_values("timestamp")
    last = recs.iloc[-1]
    ts = pd.Timestamp(last["timestamp"])

    rcots = {}
    for c in recs.columns:
        if c.startswith("rcot_opt_") and pd.notna(last[c]):
            rcots[c.replace("rcot_opt_", "")] = float(last[c])

    prods = {}
    for p in ["Ethylene","Propylene","MixedC4","RPG"]:
        col = f"{p}_opt_tph"
        if col in recs.columns and pd.notna(last[col]):
            prods[p] = float(last[col])

    extras = dict(
        margin_hourly=float(last.get("margin_current_per_h", 0.0)) if pd.notna(last.get("margin_current_per_h", np.nan)) else 0.0,
        performance=0.0,
    )
    return ts, rco


NameError: name 'main' is not defined