In [46]:
import pandas as pd
import numpy as np
from pathlib import Path

BASE_DIR = Path("../") 
CORE7_DIR = BASE_DIR / "artifact" / "core7"

EVENT_LOG_PATH = CORE7_DIR / "core7_governance_event_log.csv"

assert EVENT_LOG_PATH.exists(), f"missing file: {EVENT_LOG_PATH}"

print("Loaded:", EVENT_LOG_PATH)
event_log = pd.read_csv(EVENT_LOG_PATH)

event_log.head(10)

Loaded: ../artifact/core7/core7_governance_event_log.csv


Unnamed: 0,run_id,case_id,antibody_id,step,attempted,action,reason_code,SoD,SoMS,conflict_flag,mutation_desc,intended_axis,blocked
0,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,0,True,ALLOW,,0.3,0.0,False,CDR_mut_0,IMMUNO,False
1,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,1,True,ALLOW,,0.8,0.0,False,CDR_mut_5,VISCOSITY,False
2,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,2,True,ALLOW,,1.5,0.0,False,CDR_mut_13,VISCOSITY,False
3,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,3,True,ALLOW,,2.4,0.0,False,CDR_mut_10,IMMUNO,False
4,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,4,True,ALLOW,,4.4,0.0,True,CDR_mut_8,PROCESS,False
5,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,5,True,ALLOW,,6.1,0.0,True,CDR_mut_4,VISCOSITY,False
6,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,6,True,ALLOW,,7.6,0.0,False,CDR_mut_6,IMMUNO,False
7,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,7,True,ALLOW,,9.3,0.0,False,CDR_mut_10,IMMUNO,False
8,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,8,True,ALLOW,,11.6,0.0,True,CDR_mut_3,VISCOSITY,False
9,core7_1767774738,A_ALWAYS_ALLOW,antibody_A,9,True,ALLOW,,13.7,0.0,False,CDR_mut_2,CHARGE,False


In [47]:
print("columns:", list(event_log.columns))

print("\n[unique antibody_id]")
print(event_log["antibody_id"].unique())

print("\n[unique case_id]")
print(event_log["case_id"].unique())

print("\n[unique action]")
print(event_log["action"].unique())

print("\n[attempted counts]")
print(event_log["attempted"].value_counts(dropna=False))

columns: ['run_id', 'case_id', 'antibody_id', 'step', 'attempted', 'action', 'reason_code', 'SoD', 'SoMS', 'conflict_flag', 'mutation_desc', 'intended_axis', 'blocked']

[unique antibody_id]
['antibody_A' 'antibody_B' 'antibody_C']

[unique case_id]
['A_ALWAYS_ALLOW' 'B_GOVERNED']

[unique action]
['ALLOW' 'VETO']

[attempted counts]
attempted
True    180
Name: count, dtype: int64


In [48]:
# CSV에 실제 들어있는 값 기준으로 고정
TARGET_ANTIBODIES = ["antibody_A", "antibody_B", "antibody_C"]
TARGET_CASES = ["A_ALWAYS_ALLOW", "B_GOVERNED"]

df = event_log[
    event_log["antibody_id"].isin(TARGET_ANTIBODIES) &
    event_log["case_id"].isin(TARGET_CASES)
].copy()

df.reset_index(drop=True, inplace=True)

print("rows:", len(df))
df[["antibody_id", "case_id", "step", "attempted", "action"]].head(10) # 분석 대상 필터링 (Antibody A/B/C × Case A/B)

rows: 180


Unnamed: 0,antibody_id,case_id,step,attempted,action
0,antibody_A,A_ALWAYS_ALLOW,0,True,ALLOW
1,antibody_A,A_ALWAYS_ALLOW,1,True,ALLOW
2,antibody_A,A_ALWAYS_ALLOW,2,True,ALLOW
3,antibody_A,A_ALWAYS_ALLOW,3,True,ALLOW
4,antibody_A,A_ALWAYS_ALLOW,4,True,ALLOW
5,antibody_A,A_ALWAYS_ALLOW,5,True,ALLOW
6,antibody_A,A_ALWAYS_ALLOW,6,True,ALLOW
7,antibody_A,A_ALWAYS_ALLOW,7,True,ALLOW
8,antibody_A,A_ALWAYS_ALLOW,8,True,ALLOW
9,antibody_A,A_ALWAYS_ALLOW,9,True,ALLOW


In [49]:
def summarize_counts(sub_df: pd.DataFrame) -> pd.Series:
    attempt_count = int(sub_df["attempted"].sum())
    veto_count = int((sub_df["action"] == "VETO").sum())
    freeze_count = int((sub_df["action"] == "FREEZE").sum())
    blocked_rate = (veto_count + freeze_count) / attempt_count if attempt_count > 0 else 0.0
    
    return pd.Series({
        "attempt_count": attempt_count,
        "veto_count": veto_count,
        "freeze_count": freeze_count,
        "blocked_rate": blocked_rate,
    })

summary_counts = (
    df.groupby(["antibody_id", "case_id"])
      .apply(summarize_counts)
      .reset_index()
)

summary_counts

  .apply(summarize_counts)


Unnamed: 0,antibody_id,case_id,attempt_count,veto_count,freeze_count,blocked_rate
0,antibody_A,A_ALWAYS_ALLOW,30.0,0.0,0.0,0.0
1,antibody_A,B_GOVERNED,30.0,1.0,0.0,0.033333
2,antibody_B,A_ALWAYS_ALLOW,30.0,0.0,0.0,0.0
3,antibody_B,B_GOVERNED,30.0,1.0,0.0,0.033333
4,antibody_C,A_ALWAYS_ALLOW,30.0,0.0,0.0,0.0
5,antibody_C,B_GOVERNED,30.0,1.0,0.0,0.033333


In [50]:
def first_step(sub_df: pd.DataFrame, action: str):
    steps = sub_df.loc[sub_df["action"] == action, "step"]
    return steps.min() if len(steps) > 0 else np.nan

first_block_steps = []

for (ab, case), sub in df.groupby(["antibody_id", "case_id"]):
    first_block_steps.append({
        "antibody_id": ab,
        "case_id": case,
        "first_veto_step": first_step(sub, "VETO"),
        "first_freeze_step": first_step(sub, "FREEZE"),
    })

first_block_df = pd.DataFrame(first_block_steps)
first_block_df # 최초 차단 시점 계산 (first veto / freeze)

Unnamed: 0,antibody_id,case_id,first_veto_step,first_freeze_step
0,antibody_A,A_ALWAYS_ALLOW,,
1,antibody_A,B_GOVERNED,10.0,
2,antibody_B,A_ALWAYS_ALLOW,,
3,antibody_B,B_GOVERNED,10.0,
4,antibody_C,A_ALWAYS_ALLOW,,
5,antibody_C,B_GOVERNED,10.0,


In [51]:
summary_table = (
    summary_counts
    .merge(first_block_df, on=["antibody_id", "case_id"], how="left")
    .sort_values(["antibody_id", "case_id"])
    .reset_index(drop=True)
)

summary_table # 요약 테이블 통합 (제출용 핵심 표)

Unnamed: 0,antibody_id,case_id,attempt_count,veto_count,freeze_count,blocked_rate,first_veto_step,first_freeze_step
0,antibody_A,A_ALWAYS_ALLOW,30.0,0.0,0.0,0.0,,
1,antibody_A,B_GOVERNED,30.0,1.0,0.0,0.033333,10.0,
2,antibody_B,A_ALWAYS_ALLOW,30.0,0.0,0.0,0.0,,
3,antibody_B,B_GOVERNED,30.0,1.0,0.0,0.033333,10.0,
4,antibody_C,A_ALWAYS_ALLOW,30.0,0.0,0.0,0.0,,
5,antibody_C,B_GOVERNED,30.0,1.0,0.0,0.033333,10.0,


In [52]:
pivot_table = summary_table.pivot_table(
    index="antibody_id",
    columns="case_id",
    values=[
        "attempt_count",
        "veto_count",
        "freeze_count",
        "blocked_rate",
        "first_veto_step",
        "first_freeze_step",
    ]
)

pivot_table # Case A vs Case B 비교용 Pivot 테이블

Unnamed: 0_level_0,attempt_count,attempt_count,blocked_rate,blocked_rate,first_veto_step,freeze_count,freeze_count,veto_count,veto_count
case_id,A_ALWAYS_ALLOW,B_GOVERNED,A_ALWAYS_ALLOW,B_GOVERNED,B_GOVERNED,A_ALWAYS_ALLOW,B_GOVERNED,A_ALWAYS_ALLOW,B_GOVERNED
antibody_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2
antibody_A,30.0,30.0,0.0,0.033333,10.0,0.0,0.0,0.0,1.0
antibody_B,30.0,30.0,0.0,0.033333,10.0,0.0,0.0,0.0,1.0
antibody_C,30.0,30.0,0.0,0.033333,10.0,0.0,0.0,0.0,1.0


In [53]:
blocked_evidence = summary_table[
    summary_table["case_id"] == "B_GOVERNED"
][[
    "antibody_id",
    "veto_count",
    "freeze_count",
    "blocked_rate",
    "first_veto_step",
    "first_freeze_step",
]]

blocked_evidence # case B에서 실제로 개입이 차단 됨을 증명 

Unnamed: 0,antibody_id,veto_count,freeze_count,blocked_rate,first_veto_step,first_freeze_step
1,antibody_A,1.0,0.0,0.033333,10.0,
3,antibody_B,1.0,0.0,0.033333,10.0,
5,antibody_C,1.0,0.0,0.033333,10.0,


In [54]:
df.groupby(["case_id", "action"]).size().unstack(fill_value=0)

action,ALLOW,VETO
case_id,Unnamed: 1_level_1,Unnamed: 2_level_1
A_ALWAYS_ALLOW,90,0
B_GOVERNED,87,3


In [55]:
def summarize_sentence(row):
    if row.case_id == "A_ALWAYS_ALLOW":
        return (
            f"Antibody {row.antibody_id} in always-allow case "
            f"experienced no governance intervention "
            f"(blocked rate {row.blocked_rate:.2f})."
        )
    else:
        return (
            f"Antibody {row.antibody_id} in governed case experienced "
            f"{int(row.veto_count)} veto(s) and {int(row.freeze_count)} freeze(s), "
            f"with blocked rate {row.blocked_rate:.2f}."
        )