In [14]:
from pathlib import Path
import pandas as pd
import numpy as np
import json

PROJECT_ROOT = Path("..").resolve()

CORE7_DIR = PROJECT_ROOT / "artifact" / "core7"
CORE8_DIR = PROJECT_ROOT / "artifact" / "core8"

CORE8_DIR.mkdir(parents=True, exist_ok=True)

CORE7_EVENT_LOG = CORE7_DIR / "core7_04_counterfactual_soft_event_log.csv"
POLICY_PATH = CORE8_DIR / "core8_policy_snapshot.json"

CORE8_STATE_TRACE = CORE8_DIR / "core8_03_refusal_state_trace_counterfactual.csv"
CORE8_EVENT_LOG = CORE8_DIR / "core8_03_refusal_event_log_counterfactual.csv"

In [15]:
event_df = pd.read_csv(CORE7_EVENT_LOG)
event_df.head()

REQUIRED_COLS = [
    "run_id", "case_id", "antibody_id", "step",
    "attempted", "action", "reason_code", "SoMS"
]

missing = [c for c in REQUIRED_COLS if c not in event_df.columns]
assert not missing, f"Missing required columns: {missing}" # Core7 Event Log 로딩 & 검증

df = event_df.copy()

df["blocked"] = df["action"].isin(["VETO", "FREEZE"])
df["veto"] = df["action"] == "VETO"
df["freeze"] = df["action"] == "FREEZE"

df = df.sort_values(
    ["run_id", "antibody_id", "case_id", "step"]
).reset_index(drop=True)

df.head()

Unnamed: 0,run_id,case_id,antibody_id,step,attempted,action,reason_code,SoD,SoMS,conflict_flag,mutation_desc,intended_axis,blocked,veto,freeze
0,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,0,True,ALLOW,,0.3,0.0,False,CDR_mut_2,IMMUNO,False,False,False
1,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,1,True,ALLOW,,0.75,0.0,False,CDR_mut_8,IMMUNO,False,False,False
2,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,2,True,ALLOW,,1.75,0.0,False,CDR_mut_13,VISCOSITY,False,False,False
3,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,3,True,ALLOW,,2.9,0.0,False,CDR_mut_24,VISCOSITY,False,False,False
4,core7_04_1767776352,A_ALWAYS_ALLOW,antibody_A,4,True,ALLOW,,4.2,0.6,True,CDR_mut_19,PROCESS,False,False,False


In [16]:
with open(POLICY_PATH, "r") as f:
    POLICY = json.load(f)

POLICY

MIN_STEPS = POLICY["min_guard_conditions"]["MIN_STEPS_BEFORE_FALLBACK"]
MIN_ATTEMPTS = POLICY["min_guard_conditions"]["MIN_ATTEMPTS_WINDOW"]

ACC = POLICY["accumulative_thresholds"]

WINDOW_SIZE = ACC["WINDOW_SIZE"]

T1 = ACC["BLOCKED_RATE_THRESHOLD_STAGE1"]
T2 = ACC["BLOCKED_RATE_THRESHOLD_STAGE2"]
C1 = ACC["VETO_STREAK_THRESHOLD"]
O1 = ACC["OSCILLATION_THRESHOLD"]
S2 = ACC["SOMS_CUMSUM_THRESHOLD"]

In [17]:
def compute_window_metrics(sub_df: pd.DataFrame) -> pd.DataFrame:
    rows = []

    for i in range(len(sub_df)):
        start = max(0, i - WINDOW_SIZE + 1)
        window = sub_df.iloc[start:i+1]

        blocked_rate = window["blocked"].mean()

        # veto streak
        streak = 0
        for a in reversed(window["action"].tolist()):
            if a == "VETO":
                streak += 1
            else:
                break

        # action toggle rate (oscillation)
        actions = window["action"].tolist()
        toggles = sum(
            actions[j] != actions[j-1]
            for j in range(1, len(actions))
        )
        toggle_rate = toggles / max(1, len(actions) - 1)

        soms_cumsum = window["SoMS"].sum()

        rows.append({
            "blocked_rate_window": blocked_rate,
            "veto_streak": streak,
            "action_toggle_rate": toggle_rate,
            "SoMS_cumsum_window": soms_cumsum,
        })

    return pd.DataFrame(rows) # Window 기반 누적 지표 계산 함수

In [18]:
def refusal_transition(
    prev_stage,
    step,
    blocked_rate,
    veto_streak,
    toggle_rate,
    soms_cumsum,
    freeze_flag,
):
    # 즉시 발동 금지
    if step < MIN_STEPS:
        return prev_stage, False, "REASON_MIN_STEPS_NOT_REACHED"

    # Stage 0 → 1
    if prev_stage == 0:
        if blocked_rate >= T1 or veto_streak >= C1:
            return 1, True, "REASON_BLOCKED_RATE_ACCUMULATION"

    # Stage 1 → 2
    if prev_stage == 1:
        if (
            blocked_rate >= T2
            or soms_cumsum >= S2
            or toggle_rate >= O1
        ):
            return 2, True, "REASON_STAGE_ESCALATION"

    # Stage 2 → 3
    if prev_stage == 2:
        if freeze_flag:
            return 3, True, "REASON_REFUSAL_TERMINATION"

    return prev_stage, False, "" # Refusal State Transition 함수

In [19]:
state_rows = []
event_rows = []

for (run_id, ab, case), sub in df.groupby(
    ["run_id", "antibody_id", "case_id"]
):
    sub = sub.reset_index(drop=True)
    metrics = compute_window_metrics(sub)

    stage = 0

    for i in range(len(sub)):
        row = sub.iloc[i]
        m = metrics.iloc[i]

        new_stage, triggered, reason = refusal_transition(
            stage,
            row["step"],
            m["blocked_rate_window"],
            m["veto_streak"],
            m["action_toggle_rate"],
            m["SoMS_cumsum_window"],
            row["freeze"],
        )

        mode = ["NORMAL", "RATE_LIMIT", "PARTIAL_SEAL", "REFUSAL"][new_stage]

        state_rows.append({
            "run_id": run_id,
            "case_id": case,
            "antibody_id": ab,
            "step": row["step"],
            "refusal_stage": new_stage,
            "refusal_mode": mode,
            "blocked_rate_window": m["blocked_rate_window"],
            "veto_streak": m["veto_streak"],
            "action_toggle_rate": m["action_toggle_rate"],
            "SoMS_cumsum_window": m["SoMS_cumsum_window"],
            "refusal_triggered": triggered,
            "refusal_reason_code": reason,
        })

        if triggered and new_stage != stage:
            event_rows.append({
                "run_id": run_id,
                "case_id": case,
                "antibody_id": ab,
                "step": row["step"],
                "from_stage": stage,
                "to_stage": new_stage,
                "reason_code": reason,
            })

        stage = new_stage # State Trace / Event Log 생성 루프

In [20]:
state_df = pd.DataFrame(state_rows)
event_df = pd.DataFrame(event_rows)

state_df.to_csv(CORE8_STATE_TRACE, index=False)
event_df.to_csv(CORE8_EVENT_LOG, index=False)

CORE8_STATE_TRACE, CORE8_EVENT_LOG

(PosixPath('/Users/mac/Desktop/De/Developability_Data/core/artifact/core8/core8_03_refusal_state_trace_counterfactual.csv'),
 PosixPath('/Users/mac/Desktop/De/Developability_Data/core/artifact/core8/core8_03_refusal_event_log_counterfactual.csv'))

In [21]:
print("[Core8_03 Diagnostics]")
print("state_df rows:", len(state_df))
print("event_df rows:", len(event_df))

# stage 변화가 없어서 event_df가 비는 경우가 정상인지 확인
stage_counts = state_df["refusal_stage"].value_counts().sort_index()
print("\n[refusal_stage counts]")
print(stage_counts)

print("\n[max window metrics]")
print("max blocked_rate_window:", float(state_df["blocked_rate_window"].max()))
print("max veto_streak:", int(state_df["veto_streak"].max()))
print("max action_toggle_rate:", float(state_df["action_toggle_rate"].max()))
print("max SoMS_cumsum_window:", float(state_df["SoMS_cumsum_window"].max()))

if len(event_df) == 0:
    print("\n[NOTE] event_log가 빈 이유: stage 전이가 한 번도 발생하지 않았습니다.")
    print("      -> Core7에서 VETO/FREEZE 발생 빈도(또는 연속 VETO)가 낮으면 정상입니다.")

[Core8_03 Diagnostics]
state_df rows: 180
event_df rows: 0

[refusal_stage counts]
refusal_stage
0    180
Name: count, dtype: int64

[max window metrics]
max blocked_rate_window: 0.25
max veto_streak: 1
max action_toggle_rate: 0.5
max SoMS_cumsum_window: 24.199999999999996

[NOTE] event_log가 빈 이유: stage 전이가 한 번도 발생하지 않았습니다.
      -> Core7에서 VETO/FREEZE 발생 빈도(또는 연속 VETO)가 낮으면 정상입니다.
