### Core 9 — Data Contract & Target Definition (core9_01)

본 노트북은 예측 모델을 만들지 않는다.

본 노트북의 목적은:
- Core 9에서 사용할 **예측 대상(사건 정의)** 을
- 데이터 계약(Data Contract) 형태로 **고정**하는 것이다.

즉,
- “무엇을 예측하는가”
- “어떤 시계열 구간을 사건으로 정의하는가”
- “어떤 feature는 쓰고 / 쓰지 않는가”

를 코드 + 산출물(JSON/CSV)로 잠근다.

Core 9의 예측은:
- fallback을 즉시 실행하지 않는다.
- fallback을 **예약(reservation)** 하기 위한 사전 신호만 제공한다.

In [26]:
import pandas as pd
import numpy as np
from pathlib import Path
import json

FALLBACK_PARAMS_PATH = Path("../artifact/core8/core8_06_fallback_params.json")
FALLBACK_DECISIONS_PATH = Path("../artifact/core8/core8_06_fallback_decisions.csv")
CORE8_TRACE_PATH = Path("../artifact/core8/core8_03_refusal_state_trace_counterfactual.csv")
EXPORT_DIR = Path("../artifact/core9")
EXPORT_DIR.mkdir(exist_ok=True)

assert CORE8_TRACE_PATH.exists()
assert FALLBACK_PARAMS_PATH.exists()
assert FALLBACK_DECISIONS_PATH.exists()

In [27]:
state_df = pd.read_csv(CORE8_TRACE_PATH)
fallback_df = pd.read_csv(FALLBACK_DECISIONS_PATH)
with open(FALLBACK_PARAMS_PATH, "r") as f:
    fallback_params = json.load(f)

state_df.head()

Unnamed: 0,run_id,case_id,antibody_id,step,refusal_stage,refusal_mode,blocked_rate_window,veto_streak,action_toggle_rate,SoMS_cumsum_window,refusal_triggered,refusal_reason_code
0,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,0,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED
1,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,1,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED
2,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,2,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED
3,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,3,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED
4,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,4,0,NORMAL,0.0,0.0,0.0,0.6,False,REASON_MIN_STEPS_NOT_REACHED


#### Feature Contract

##### 사용 가능 (Core 8에서 관측된 상태 변수)
- step
- blocked_rate_window
- veto_streak
- action_toggle_rate
- SoMS_cumsum_window

###### 파생 허용 (결정론적)
- ΔSoMS
- rolling mean / max (고정 윈도우)
- conflict_density_proxy (선형 결합)

###### 금지
- fallback_stage
- fallback_entered
- refusal_triggered
- reason_code 계열

❗️ 예측은 거버넌스를 *대체하지 않는다*  
→ 결과/판정 컬럼은 feature에서 **완전 배제**

### Prediction Horizon (고정)

- H = 5 steps (forward-looking window)
- 모든 타깃은 **현재 step t 기준 → [t+1, t+H]** 구간에서 정의된다.

In [28]:
H = 5

SOMS_TH = fallback_params["thresholds"]["soms_cumsum"]
TOGGLE_TH = fallback_params["thresholds"]["toggle_rate"]

ALPHA = 0.4
BETA = 0.4
GAMMA = 0.2

work = state_df.copy()

work["delta_soms"] = (
    work.groupby(["run_id", "case_id"])["SoMS_cumsum_window"]
    .diff()
    .fillna(0)
)

work["conflict_density_proxy"] = (
    ALPHA * work["blocked_rate_window"].fillna(0)
    + BETA  * work["action_toggle_rate"].fillna(0)
    + GAMMA * work["veto_streak"].fillna(0) / 10.0
)

work.head() # Derived Proxy Columns


Unnamed: 0,run_id,case_id,antibody_id,step,refusal_stage,refusal_mode,blocked_rate_window,veto_streak,action_toggle_rate,SoMS_cumsum_window,refusal_triggered,refusal_reason_code,delta_soms,conflict_density_proxy
0,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,0,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED,0.0,0.0
1,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,1,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED,0.0,0.0
2,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,2,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED,0.0,0.0
3,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,3,0,NORMAL,0.0,0.0,0.0,0.0,False,REASON_MIN_STEPS_NOT_REACHED,0.0,0.0
4,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,4,0,NORMAL,0.0,0.0,0.0,0.6,False,REASON_MIN_STEPS_NOT_REACHED,0.6,0.0


In [29]:
def compute_y_soms_runaway(df, h=H, th=SOMS_TH):
    y = np.zeros(len(df), dtype=int)
    for i in range(len(df)):
        future = df.iloc[i+1:i+1+h]
        if (future["SoMS_cumsum_window"] >= th).any():
            y[i] = 1
    return pd.Series(y, index=df.index)

work["y_soms_runaway"] = (
    work
    .groupby(["run_id", "case_id"], group_keys=False)
    .apply(lambda g: compute_y_soms_runaway(g))
)

  .apply(lambda g: compute_y_soms_runaway(g))


In [30]:
def compute_y_osc_nonrecover(df, h=H, th=TOGGLE_TH):
    y = np.zeros(len(df), dtype=int)
    for i in range(len(df)):
        future = df.iloc[i+1:i+1+h]
        if len(future) < h:
            y[i] = 0
        else:
            y[i] = int((future["action_toggle_rate"] >= th).all())
    return pd.Series(y, index=df.index)

work["y_osc_nonrecover"] = (
    work
    .groupby(["run_id", "case_id"], group_keys=False)
    .apply(lambda g: compute_y_osc_nonrecover(g))
)

def compute_y_conflict_selfamp(df, h=H, th=0.6):
    y = np.zeros(len(df), dtype=int)
    for i in range(len(df)):
        future = df.iloc[i+1:i+1+h]
        if len(future) < h:
            y[i] = 0
            continue

        above = (future["conflict_density_proxy"] >= th).all()
        increasing = (future["conflict_density_proxy"]
                      .diff().fillna(0) > 0).all()

        y[i] = int(above and increasing)

    return pd.Series(y, index=df.index)

work["y_conflict_selfamp"] = (
    work
    .groupby(["run_id", "case_id"], group_keys=False)
    .apply(lambda g: compute_y_conflict_selfamp(g))
)

target_cols = [
    "run_id","case_id","antibody_id","step",
    "SoMS_cumsum_window","action_toggle_rate",
    "blocked_rate_window","veto_streak",
    "conflict_density_proxy",
    "y_soms_runaway","y_osc_nonrecover","y_conflict_selfamp"
]

preview = work[target_cols].copy()
preview.to_csv(EXPORT_DIR / "core9_01_targets_preview.csv", index=False)

preview.head()

  .apply(lambda g: compute_y_osc_nonrecover(g))
  .apply(lambda g: compute_y_conflict_selfamp(g))


Unnamed: 0,run_id,case_id,antibody_id,step,SoMS_cumsum_window,action_toggle_rate,blocked_rate_window,veto_streak,conflict_density_proxy,y_soms_runaway,y_osc_nonrecover,y_conflict_selfamp
0,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,0,0.0,0.0,0.0,0.0,0.0,0,0,0
1,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,1,0.0,0.0,0.0,0.0,0.0,0,0,0
2,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,2,0.0,0.0,0.0,0.0,0.0,0,0,0
3,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,3,0.0,0.0,0.0,0.0,0.0,0,0,0
4,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,4,0.6,0.0,0.0,0.0,0.0,0,0,0


In [31]:
contract = {
    "core": "core9",
    "notebook": "core9_01_data_contract_and_targets",
    "prediction_horizon": H,
    "features_allowed": [
        "step","blocked_rate_window","veto_streak",
        "action_toggle_rate","SoMS_cumsum_window",
        "delta_soms","conflict_density_proxy"
    ],
    "features_forbidden": [
        "fallback_stage","fallback_entered",
        "refusal_triggered","reason_code"
    ],
    "targets": {
        "y_soms_runaway": {
            "definition": "SoMS exceeds threshold within H steps",
            "threshold": SOMS_TH
        },
        "y_osc_nonrecover": {
            "definition": "action_toggle_rate stays above threshold for H consecutive steps",
            "threshold": TOGGLE_TH
        },
        "y_conflict_selfamp": {
            "definition": "conflict_density_proxy stays above threshold and strictly increases",
            "proxy_weights": {"blocked":ALPHA,"toggle":BETA,"veto":GAMMA}
        }
    }
}

with open(EXPORT_DIR / "core9_01_contract.json", "w") as f:
    json.dump(contract, f, indent=2)