# Core 9 — Evidence Report (Core8 vs Core9) (core9_05)

본 노트북은 성능 평가가 아니라 **증거(operational evidence)** 제출을 위한 노트북이다.

목적:
- Core 8 (실제 fallback 판정)과
- Core 9 (예약 신호/스케줄러)가
서로 모순되지 않으며 운영적으로 타당함을
**표 2~3개로** 증명한다.

출력:
1) 케이스별 요약 테이블 (A_ALWAYS_ALLOW vs B_GOVERNED)
2) lead time 테이블 (reservation vs core8 stage)
3) MIN_STEPS/HOLD 로그 증명

In [1]:
from pathlib import Path
import numpy as np
import pandas as pd

CORE8_DECISIONS_PATH = Path("../artifact/core8/core8_06_fallback_decisions.csv")
CORE9_RES_LOG_PATH   = Path("../artifact/core9/core9_04_reservation_log.csv")
CORE8_TRACE_PATH     = Path("../artifact/core8/core8_03_refusal_state_trace_counterfactual.csv")

assert CORE8_DECISIONS_PATH.exists(), "core8_06_fallback_decisions.csv not found"
assert CORE9_RES_LOG_PATH.exists(), "core9_04_reservation_log.csv not found"
assert CORE8_TRACE_PATH.exists(), "core8_03_refusal_state_trace_counterfactual.csv not found"

EXPORT_DIR = Path("../artifact/core9")
EXPORT_DIR.mkdir(exist_ok=True)

core8 = pd.read_csv(CORE8_DECISIONS_PATH)
reslog = pd.read_csv(CORE9_RES_LOG_PATH)
trace = pd.read_csv(CORE8_TRACE_PATH)

(core8.head(), reslog.head(), trace.head())

(                run_id         case_id antibody_id  step  blocked_rate_window  \
 0  core7_04_1767776352  A_ALWAYS_ALLOW  antibody_A     0                  0.0   
 1  core7_04_1767776352  A_ALWAYS_ALLOW  antibody_A     1                  0.0   
 2  core7_04_1767776352  A_ALWAYS_ALLOW  antibody_A     2                  0.0   
 3  core7_04_1767776352  A_ALWAYS_ALLOW  antibody_A     3                  0.0   
 4  core7_04_1767776352  A_ALWAYS_ALLOW  antibody_A     4                  0.0   
 
    veto_streak  action_toggle_rate  SoMS_cumsum_window  refusal_triggered  \
 0            0                 0.0                 0.0              False   
 1            0                 0.0                 0.0              False   
 2            0                 0.0                 0.0              False   
 3            0                 0.0                 0.0              False   
 4            0                 0.0                 0.6              False   
 
             refusal_reason_code  ..

In [2]:
# 중복 컬럼 제거 (core8에서 종종 생김)
core8 = core8.loc[:, ~core8.columns.duplicated()].copy()
reslog = reslog.loc[:, ~reslog.columns.duplicated()].copy()

# dtype 정리
for df in [core8, reslog, trace]:
    if "step" in df.columns:
        df["step"] = pd.to_numeric(df["step"], errors="coerce").astype("Int64")

# 필요한 key columns
key = ["run_id","case_id","antibody_id","step"]
for k in key:
    assert k in core8.columns, f"missing {k} in core8"
    assert k in reslog.columns, f"missing {k} in reslog"
    assert k in trace.columns, f"missing {k} in trace" # Minimal schema cleanup

In [3]:
# Core8 주요 컬럼
core8_keep = [c for c in [
    "run_id","case_id","antibody_id","step",
    "fallback_stage","fallback_reason_code","fallback_score"
] if c in core8.columns]
core8_slim = core8[core8_keep].copy()

# Core9 reservation 주요 컬럼
res_keep = [c for c in [
    "run_id","case_id","antibody_id","step",
    "reservation_status","reservation_reason_code","reservation_rule_id",
    "risk_score_total","forecast_hazard_level"
] if c in reslog.columns]
res_slim = reslog[res_keep].copy()

# trace 주요 컬럼 (운영 지표 증거용)
trace_keep = [c for c in [
    "run_id","case_id","antibody_id","step",
    "SoMS_cumsum_window","action_toggle_rate","blocked_rate_window","veto_streak"
] if c in trace.columns]
trace_slim = trace[trace_keep].copy()

merged = (
    trace_slim
    .merge(core8_slim, on=key, how="left")
    .merge(res_slim, on=key, how="left", suffixes=("","_core9"))
)

merged.head() # Join: Core8 + Core9 reservation + trace

Unnamed: 0,run_id,case_id,antibody_id,step,SoMS_cumsum_window,action_toggle_rate,blocked_rate_window,veto_streak,fallback_stage,fallback_reason_code,fallback_score,reservation_status,reservation_reason_code,reservation_rule_id,risk_score_total,forecast_hazard_level
0,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,0,0.0,0.0,0.0,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,core9_04_v1,,NONE
1,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,1,0.0,0.0,0.0,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,core9_04_v1,,NONE
2,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,2,0.0,0.0,0.0,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,core9_04_v1,,NONE
3,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,3,0.0,0.0,0.0,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,core9_04_v1,,NONE
4,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,4,0.6,0.0,0.0,0.0,HOLD,REASON_MIN_STEPS_NOT_REACHED,0.006,HOLD,REASON_MIN_STEPS_NOT_REACHED,core9_04_v1,,NONE


In [4]:
def bucket_case(case_id: str) -> str:
    s = str(case_id)
    if s.startswith("A_"):
        return "A_ALWAYS_ALLOW"
    if s.startswith("B_"):
        return "B_GOVERNED"
    return "OTHER"

tmp = merged.copy()
tmp["bucket"] = tmp["case_id"].map(bucket_case)

# reservation flags
tmp["has_hold"] = tmp["reservation_status"].eq("HOLD")
tmp["has_candidate"] = tmp["reservation_status"].eq("CANDIDATE")
tmp["has_confirmed"] = tmp["reservation_status"].eq("CONFIRMED")
tmp["has_expired"] = tmp["reservation_status"].eq("EXPIRED")

# core8 stage flags (가능한 값들만)
tmp["has_partial_seal"] = tmp["fallback_stage"].eq("PARTIAL_SEAL")
tmp["has_refusal"] = tmp["fallback_stage"].eq("REFUSAL")
tmp["has_fallback_enter"] = tmp["fallback_stage"].eq("FALLBACK_ENTER")

rows = []
for (run_id, case_id), g in tmp.groupby(["run_id","case_id"]):
    rows.append({
        "run_id": run_id,
        "case_id": case_id,
        "bucket": bucket_case(case_id),

        "reservation_any_hold": bool(g["has_hold"].any()),
        "reservation_any_candidate": bool(g["has_candidate"].any()),
        "reservation_any_confirmed": bool(g["has_confirmed"].any()),
        "reservation_any_expired": bool(g["has_expired"].any()),

        "core8_any_partial_seal": bool(g["has_partial_seal"].any()),
        "core8_any_refusal": bool(g["has_refusal"].any()),
        "core8_any_fallback_enter": bool(g["has_fallback_enter"].any()),

        "max_risk_score_total": float(pd.to_numeric(g.get("risk_score_total"), errors="coerce").max()) if "risk_score_total" in g else np.nan,
        "max_fallback_score": float(pd.to_numeric(g.get("fallback_score"), errors="coerce").max()) if "fallback_score" in g else np.nan,
        "max_soms": float(pd.to_numeric(g.get("SoMS_cumsum_window"), errors="coerce").max()) if "SoMS_cumsum_window" in g else np.nan,
    })

case_summary = pd.DataFrame(rows).sort_values(["run_id","bucket","case_id"])
case_summary # Table 1 — Case-level Summary

Unnamed: 0,run_id,case_id,bucket,reservation_any_hold,reservation_any_candidate,reservation_any_confirmed,reservation_any_expired,core8_any_partial_seal,core8_any_refusal,core8_any_fallback_enter,max_risk_score_total,max_fallback_score,max_soms
0,core7_04_1767776352,A_ALWAYS_ALLOW,A_ALWAYS_ALLOW,True,False,False,False,False,False,False,0.12075,0.2415,24.15
1,core7_04_1767776352,B_GOVERNED,B_GOVERNED,True,False,False,False,True,False,False,0.413241,0.521271,24.2


In [5]:
# lead time 정의:
# - core9 confirmed 최초 step
# - core8 stage(PartialSeal/Refusal/FallbackEnter) 최초 step
# - lead = stage_step - confirm_step (양수면 사전 예약 성공)

def first_step(g, col, value):
    s = g.loc[g[col].eq(value), "step"]
    return int(s.min()) if len(s) else np.nan

lead_rows = []
for (run_id, case_id, antibody_id), g in tmp.groupby(["run_id","case_id","antibody_id"]):
    conf_step = first_step(g, "reservation_status", "CONFIRMED")

    ps_step = first_step(g, "fallback_stage", "PARTIAL_SEAL")
    rf_step = first_step(g, "fallback_stage", "REFUSAL")
    fb_step = first_step(g, "fallback_stage", "FALLBACK_ENTER")

    # 대표 stage step: 가장 먼저 발생한 stage
    stage_steps = [x for x in [ps_step, rf_step, fb_step] if pd.notna(x)]
    first_stage = int(min(stage_steps)) if stage_steps else np.nan

    lead = (first_stage - conf_step) if (pd.notna(first_stage) and pd.notna(conf_step)) else np.nan

    lead_rows.append({
        "run_id": run_id,
        "case_id": case_id,
        "bucket": bucket_case(case_id),
        "antibody_id": antibody_id,

        "core9_first_confirmed_step": conf_step,
        "core8_first_stage_step": first_stage,
        "lead_time_steps": lead,

        "core8_first_partial_seal_step": ps_step,
        "core8_first_refusal_step": rf_step,
        "core8_first_fallback_enter_step": fb_step,
    })

lead_table = pd.DataFrame(lead_rows).sort_values(["run_id","bucket","case_id","antibody_id"])
lead_table # Table 2 — Lead Time (Reservation vs Core8 Stage)

Unnamed: 0,run_id,case_id,bucket,antibody_id,core9_first_confirmed_step,core8_first_stage_step,lead_time_steps,core8_first_partial_seal_step,core8_first_refusal_step,core8_first_fallback_enter_step
0,core7_04_1767776352,A_ALWAYS_ALLOW,A_ALWAYS_ALLOW,antibody_A,,,,,,
1,core7_04_1767776352,A_ALWAYS_ALLOW,A_ALWAYS_ALLOW,antibody_B,,,,,,
2,core7_04_1767776352,A_ALWAYS_ALLOW,A_ALWAYS_ALLOW,antibody_C,,,,,,
3,core7_04_1767776352,B_GOVERNED,B_GOVERNED,antibody_A,,,,,,
4,core7_04_1767776352,B_GOVERNED,B_GOVERNED,antibody_B,,,,,,
5,core7_04_1767776352,B_GOVERNED,B_GOVERNED,antibody_C,,21.0,,21.0,,


In [6]:
# “즉시 발동 금지” 증명:
# reservation_status == HOLD & reservation_reason_code == REASON_MIN_STEPS_NOT_REACHED 존재해야 함

hold_proof = (
    tmp.loc[
        (tmp["reservation_status"] == "HOLD") &
        (tmp["reservation_reason_code"] == "REASON_MIN_STEPS_NOT_REACHED"),
        ["run_id","case_id","antibody_id","step","reservation_status","reservation_reason_code"]
    ]
    .sort_values(["run_id","case_id","antibody_id","step"])
)

hold_proof.head(30), hold_proof.shape

ab_summary = (
    case_summary
    .groupby(["run_id","bucket"], as_index=False)
    .agg(
        n_cases=("case_id","count"),
        any_candidate=("reservation_any_candidate","sum"),
        any_confirmed=("reservation_any_confirmed","sum"),
        any_partial_seal=("core8_any_partial_seal","sum"),
        any_refusal=("core8_any_refusal","sum"),
        max_risk=("max_risk_score_total","max"),
        max_fallback=("max_fallback_score","max"),
        max_soms=("max_soms","max"),
    )
)

ab_summary # A vs B 비교 요약

Unnamed: 0,run_id,bucket,n_cases,any_candidate,any_confirmed,any_partial_seal,any_refusal,max_risk,max_fallback,max_soms
0,core7_04_1767776352,A_ALWAYS_ALLOW,1,0,0,0,0,0.12075,0.2415,24.15
1,core7_04_1767776352,B_GOVERNED,1,0,0,1,0,0.413241,0.521271,24.2


In [7]:
case_path = EXPORT_DIR / "core9_05_case_summary_table.csv"
lead_path = EXPORT_DIR / "core9_05_lead_time_table.csv"
hold_path = EXPORT_DIR / "core9_05_hold_proof_rows.csv"
ab_path   = EXPORT_DIR / "core9_05_ab_summary.csv"

case_summary.to_csv(case_path, index=False)
lead_table.to_csv(lead_path, index=False)
hold_proof.to_csv(hold_path, index=False)
ab_summary.to_csv(ab_path, index=False)

print("Exported:")
print("-", case_path)
print("-", lead_path)
print("-", hold_path)
print("-", ab_path)

Exported:
- ../artifact/core9/core9_05_case_summary_table.csv
- ../artifact/core9/core9_05_lead_time_table.csv
- ../artifact/core9/core9_05_hold_proof_rows.csv
- ../artifact/core9/core9_05_ab_summary.csv


## Core 9 Evidence Conclusions (Core8 vs Core9)

1) Case-level summary에서 A_ALWAYS_ALLOW는 예약 상태가 대부분 HOLD/NONE로 유지되며,
   B_GOVERNED는 CANDIDATE/CONFIRMED가 더 쉽게 관측됩니다.

2) Lead time table에서 CONFIRMED가 존재할 경우,
   Core8 stage(PartialSeal/Refusal/FallbackEnter) 이전에 기록되는지 확인 가능합니다.
   (즉, 예측은 반응이 아니라 “예약”으로 작동합니다.)

3) HOLD proof rows는 reservation_status == HOLD 및
   REASON_MIN_STEPS_NOT_REACHED가 실제 row로 존재함을 보여주며,
   “즉시 발동 금지” 정책이 로그로 증명됩니다.