core5_03_ablation_failure_matrix.ipynb

목적:
- 사람들이 흔히 하는 "해결책"들을 ablation 축으로 하나씩 적용해보고
  왜 구조적으로 계속 불안정(toggles/bursts/flip-flops)이 터지는지 표로 고정한다.

중요:
- cutoff 없음
- 성능 평가 없음
- 오직 decision dynamics (불안정성)만 계산
- 기존 컬럼명 절대 변경 금지

In [1]:
import json
import itertools
import random

import numpy as np
import pandas as pd
from pathlib import Path

POLICY_PATH = Path("../artifact/core2/policy_summary.json")
TRACE_PATH = Path("../artifact/core2/prediction_trace.csv")
CANDIDATE_PATH = Path("../artifact/core2/mutation_candidate_set.csv")

print("policy exists:", POLICY_PATH.exists())
print("trace exists:", TRACE_PATH.exists())
print("candidate exists:", CANDIDATE_PATH.exists())

with open(POLICY_PATH, "r") as f:
    POLICY_BASE = json.load(f)

trace_base = pd.read_csv(TRACE_PATH)
cand_base = pd.read_csv(CANDIDATE_PATH)

print("\n=== prediction_trace base shape ===")
print(trace_base.shape)
print("trace columns:", trace_base.columns.tolist())

print("\n=== mutation_candidate_set base shape ===")
print(cand_base.shape)
print("candidate columns:", cand_base.columns.tolist())

trace_base.head()

policy exists: True
trace exists: True
candidate exists: True

=== prediction_trace base shape ===
(60, 8)
trace columns: ['antibody_key', 'step', 'sequence_current', 'pred_score', 'pred_score_delta', 'decision', 'mutation_id_applied', 'intervention_count_cum']

=== mutation_candidate_set base shape ===
(180, 10)
candidate columns: ['step', 'mutation_id', 'mutation_type', 'mutation_pos', 'from_aa', 'to_aa', 'sequence_before', 'sequence_after', 'antibody_key', 'candidate_group']


Unnamed: 0,antibody_key,step,sequence_current,pred_score,pred_score_delta,decision,mutation_id_applied,intervention_count_cum
0,GDPa1-001,1,EAKIIFEVDWQCADHITYAVHVQIRWKAGQMKFHMEDPENNYKCRV...,2.013943,0.013943,MUTATE,GDPa1-001_mut0_1,1
1,GDPa1-001,2,EAKIIFEVDWQCADHITYAVHVQIRWKAGQMKFHMEDPENNYKCRV...,1.966444,-0.047499,HOLD,,1
2,GDPa1-001,3,EAKIIFEVDWQCADHITYAVHVQIRWKAGQMKFHMEDPENNYKCRV...,1.943947,-0.022497,HOLD,,1
3,GDPa1-001,4,EAKIIFEVDWQCADHITYAVHVQIRWKAGQMKFHMEDPENNYKCRV...,1.916268,-0.027679,HOLD,,1
4,GDPa1-001,5,EAKIIFEVDWQCADHITYAVHVQIRWKAGQMKFHMEDPENNYKCRV...,1.939915,0.023647,MUTATE,GDPa1-001_mut1_5,2


In [2]:
TARGET_ANTIBODIES = [
    "GDPa1-001",  # A_boundary / abagovomab
    "GDPa1-045",  # B_conflict / cixutumumab
    "GDPa1-183",  # C_obvious_risk / prolgolimab
]

# base trace에서 3개 항체가 있는지 확인
present = set(trace_base["antibody_key"].unique().tolist())
missing = [x for x in TARGET_ANTIBODIES if x not in present]
print("missing in trace:", missing)

trace_base_3 = trace_base[trace_base["antibody_key"].isin(TARGET_ANTIBODIES)].copy()
trace_base_3 = trace_base_3.sort_values(["antibody_key", "step"]).reset_index(drop=True)

# 초기 상태: step 최소 row에서 sequence_current / pred_score를 그대로 사용
initial_states = {}
for ab in TARGET_ANTIBODIES:
    g = trace_base_3[trace_base_3["antibody_key"] == ab]
    if len(g) == 0:
        continue
    first_row = g.iloc[0]
    initial_states[ab] = {
        "sequence_current": first_row["sequence_current"],
        "pred_score": float(first_row["pred_score"])
    }

initial_states # 후보 항체 3개 고정 + base initial state 추출

missing in trace: []


{'GDPa1-001': {'sequence_current': 'EAKIIFEVDWQCADHITYAVHVQIRWKAGQMKFHMEDPENNYKCRVEPDVLYNWHDCILDIEKKRNGNNHKDYGVIGRPKVIMCICMPKDHWMHSPRFKFIVVKWQWPNIFTSDCEFGQY',
  'pred_score': 2.013942679845788},
 'GDPa1-045': {'sequence_current': 'DPPYRTKVAEVKMELQGRAKTGTELTYHFNGVTAYMSAENLICIWDDSDVFFSVGKTYQHVHLPNRTREIIDMAWVIWIADCIDCMDTIKSHVFWWSISQHEEQNQQRCECPMEIHHVWF',
  'pred_score': 4.030581925183281},
 'GDPa1-183': {'sequence_current': 'QGKRIDRVECVADIGQSSHPCGPAPKRLQVSFHLHCWVCMCCWSTTGCTDGDYDIPEWIWYCIDQWWTMKHMIKPFLRMDARYWEDVHTKFNDINLGRVLYTAVLEFKEEVFKLYHMHKT',
  'pred_score': 4.020457183621493}}

In [3]:
# A1 noise_scale(예측 흔들림) 변화
NOISE_SCALES = [0.01, 0.05, 0.10]

# A2 cooldown 변화
COOLDOWNS = [0, 2, 5]

# A3 lookback_window 변화
LOOKBACK_WINDOWS = [1, 3, 5]

# A4 risk_score 조합 방식 변화
# - "mean" : (base_score + noise) 형태 (기본)
# - "single" : single_metric 처럼 동작하는 대체 (base_score를 더 민감하게 흔들리는 방식)
RISK_MODES = ["mean", "single"]

# A5 mutation 후보 수 변화(후보 적음/많음)
# candidate_group이 있으면 group별 필터로 변화를 주고,
# 없으면 step별 샘플링 n으로 강제한다.
CANDIDATE_SAMPLE_N = [1, 5, 20]  # step마다 후보를 몇 개까지 보게 할지(상한)

grid = list(itertools.product(
    NOISE_SCALES,
    COOLDOWNS,
    LOOKBACK_WINDOWS,
    RISK_MODES,
    CANDIDATE_SAMPLE_N
))

print("grid size:", len(grid))
print("first 5:", grid[:5]) # Ablation grid 정의 (A1~A5)

grid size: 162
first 5: [(0.01, 0, 1, 'mean', 1), (0.01, 0, 1, 'mean', 5), (0.01, 0, 1, 'mean', 20), (0.01, 0, 1, 'single', 1), (0.01, 0, 1, 'single', 5)]


In [4]:
def simulate_pred_score(prev_score, noise_scale, rng):
    # 예측 모델 아님. 의도적으로 흔들리는 score 생성.
    noise = rng.uniform(-noise_scale, noise_scale)
    return prev_score + noise


def apply_risk_mode(base_score, mode, rng, noise_scale):
    # "risk_score 조합 방식 변화"를 예측 생성 방식으로 반영
    # - mean: 표준적인 흔들림
    # - single: 특정 지표만 보는 것처럼 더 들쑥날쑥한 흔들림(스케일을 더 키움)
    if mode == "mean":
        return base_score
    if mode == "single":
        # single metric만 본다고 가정한 형태의 민감도 증가(노이즈 증폭)
        return base_score + rng.uniform(-noise_scale*1.5, noise_scale*1.5)
    return base_score


def build_candidate_view(cand_df, antibody_key, step, sample_n, rng):
    # step+antibody 기준 후보들을 가져온 뒤, sample_n만큼만 보게 만든다.
    # candidate_group 컬럼은 유지하되, 여기서 이름 변경 없음.
    sub = cand_df[
        (cand_df["antibody_key"] == antibody_key) &
        (cand_df["step"] == step)
    ]
    if len(sub) == 0:
        return sub
    
    if len(sub) <= sample_n:
        return sub
    
    # sample without replacement
    idx = rng.choice(sub.index.values, size=sample_n, replace=False)
    return sub.loc[idx]


def decision_rule(delta, cooldown_left):
    # Core2 구조를 유지: 악화면 개입, 개선이면 유지
    # delta > 0이면 악화라고 정의(네 기존 코드 흐름 그대로)
    if cooldown_left > 0:
        return "HOLD"
    return "MUTATE" if delta > 0 else "HOLD" # 시뮬레이션 유틸 함수 (정책/예측/후보 필터/지표 계산)

In [5]:
def run_ablation_simulation_for_antibody(
    antibody_key,
    initial_sequence_current,
    initial_pred_score,
    cand_df,
    noise_scale,
    cooldown,
    lookback_window,
    risk_mode,
    candidate_sample_n,
    max_steps,
    seed
):
    rng = np.random.default_rng(seed)
    
    trace_rows = []
    mutation_rows = []
    
    sequence_current = initial_sequence_current
    pred_history = [initial_pred_score]  # lookback 계산용
    intervention_count = 0
    cooldown_left = 0
    
    prev_decision = None
    
    for step in range(1, max_steps + 1):
        base_score = pred_history[-1]
        base_score = apply_risk_mode(base_score, risk_mode, rng, noise_scale)
        
        pred_score = simulate_pred_score(base_score, noise_scale, rng)
        
        # lookback 적용: pred_score(t) - pred_score(t-lookback)
        if len(pred_history) >= lookback_window:
            ref_score = pred_history[-lookback_window]
        else:
            ref_score = pred_history[0]
        
        pred_score_delta = pred_score - ref_score
        
        # decision
        decision = decision_rule(pred_score_delta, cooldown_left)
        
        mutation_id_applied = None
        
        if decision == "MUTATE":
            candidates_view = build_candidate_view(
                cand_df=cand_df,
                antibody_key=antibody_key,
                step=step,
                sample_n=candidate_sample_n,
                rng=rng
            )
            
            if len(candidates_view) > 0:
                chosen = candidates_view.sample(1, random_state=seed + step).iloc[0]
                
                # 컬럼명 그대로 사용
                sequence_current = chosen["sequence_after"]
                mutation_id_applied = chosen["mutation_id"]
                
                intervention_count += 1
                cooldown_left = cooldown  # cooldown 발동
                
                mutation_rows.append({
                    "antibody_key": antibody_key,
                    "step": step,
                    "mutation_id": chosen["mutation_id"],
                    "mutation_type": chosen["mutation_type"],
                    "mutation_pos": chosen["mutation_pos"],
                    "from_aa": chosen["from_aa"],
                    "to_aa": chosen["to_aa"],
                    "sequence_before": chosen["sequence_before"],
                    "sequence_after": chosen["sequence_after"],
                    "candidate_group": chosen.get("candidate_group", None),
                })
        
        # cooldown 감소
        if cooldown_left > 0:
            cooldown_left -= 1
        
        trace_rows.append({
            "antibody_key": antibody_key,
            "step": step,
            "sequence_current": sequence_current,
            "pred_score": pred_score,
            "pred_score_delta": pred_score_delta,
            "decision": decision,
            "mutation_id_applied": mutation_id_applied,
            "intervention_count_cum": intervention_count,
            "cooldown_left": cooldown_left,
            "prev_decision": prev_decision
        })
        
        pred_history.append(pred_score)
        prev_decision = decision
    
    trace_df = pd.DataFrame(trace_rows)
    mut_df = pd.DataFrame(mutation_rows)
    return trace_df, mut_df # 단일 항체 시뮬레이터 (lookback/cooldown 반영 + 로그 생성)

In [6]:
def compute_toggle_rate(decisions):
    prev = decisions.shift(1)
    toggle = (decisions != prev).astype(int)
    toggle.iloc[0] = 0
    return float(toggle.sum() / len(toggle)), int(toggle.sum())


def compute_burst_mean(decisions):
    bursts = []
    current = 0
    for d in decisions.tolist():
        if d == "MUTATE":
            current += 1
        else:
            if current > 0:
                bursts.append(current)
                current = 0
    if current > 0:
        bursts.append(current)
    if len(bursts) == 0:
        return 0.0, 0, 0  # mean, max, count
    return float(np.mean(bursts)), int(np.max(bursts)), int(len(bursts))


def compute_flip_flop_count(decisions):
    # HOLD -> MUTATE -> HOLD  (짧게 반전)
    d = decisions.tolist()
    count = 0
    for i in range(2, len(d)):
        if d[i-2] == "HOLD" and d[i-1] == "MUTATE" and d[i] == "HOLD":
            count += 1
    return int(count) #불안정 지표 계산 함수 (toggle/burst/intervention/flip-flop)

In [7]:
MAX_STEPS = int(trace_base_3["step"].max()) if "step" in trace_base_3.columns else 20

records = []
exp_id = 0

for (noise_scale, cooldown, lookback_window, risk_mode, candidate_sample_n) in grid:
    exp_id += 1
    exp_key = f"EXP_{exp_id:03d}"
    
    for ab in TARGET_ANTIBODIES:
        if ab not in initial_states:
            continue
        
        trace_sim, mut_sim = run_ablation_simulation_for_antibody(
            antibody_key=ab,
            initial_sequence_current=initial_states[ab]["sequence_current"],
            initial_pred_score=initial_states[ab]["pred_score"],
            cand_df=cand_base,
            noise_scale=noise_scale,
            cooldown=cooldown,
            lookback_window=lookback_window,
            risk_mode=risk_mode,
            candidate_sample_n=candidate_sample_n,
            max_steps=MAX_STEPS,
            seed=42 + exp_id
        )
        
        decisions = trace_sim["decision"]
        
        toggle_rate, toggle_count = compute_toggle_rate(decisions)
        burst_mean, burst_max, burst_count = compute_burst_mean(decisions)
        flip_flop_count = compute_flip_flop_count(decisions)
        
        interventions = int(trace_sim["intervention_count_cum"].max())
        
        records.append({
            "exp_id": exp_key,
            "antibody_key": ab,
            "noise_scale": noise_scale,
            "cooldown": cooldown,
            "lookback_window": lookback_window,
            "risk_mode": risk_mode,
            "candidate_sample_n": candidate_sample_n,
            "toggle_rate": toggle_rate,
            "toggle_count": toggle_count,
            "burst_mean": burst_mean,
            "burst_max": burst_max,
            "burst_count": burst_count,
            "interventions": interventions,
            "flip_flop_count": flip_flop_count
        })

failure_matrix = pd.DataFrame(records)

print("failure_matrix shape:", failure_matrix.shape)
failure_matrix.head() # Ablation grid 실행 (failure_matrix 생성)

failure_matrix shape: (486, 14)


Unnamed: 0,exp_id,antibody_key,noise_scale,cooldown,lookback_window,risk_mode,candidate_sample_n,toggle_rate,toggle_count,burst_mean,burst_max,burst_count,interventions,flip_flop_count
0,EXP_001,GDPa1-001,0.01,0,1,mean,1,0.4,8,2.0,3,5,10,1
1,EXP_001,GDPa1-045,0.01,0,1,mean,1,0.4,8,2.0,3,5,10,1
2,EXP_001,GDPa1-183,0.01,0,1,mean,1,0.4,8,2.0,3,5,10,1
3,EXP_002,GDPa1-001,0.01,0,1,mean,5,0.5,10,2.2,3,5,11,2
4,EXP_002,GDPa1-045,0.01,0,1,mean,5,0.5,10,2.2,3,5,11,2


In [8]:
check = failure_matrix.groupby("antibody_key")["exp_id"].nunique().reset_index()
check # sanity check (항체별로 최소 1개씩 있는지)

Unnamed: 0,antibody_key,exp_id
0,GDPa1-001,162
1,GDPa1-045,162
2,GDPa1-183,162


In [9]:
OUTPUT_DIR = Path("../artifact/core5")
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

out_path = OUTPUT_DIR / "failure_matrix.csv"
failure_matrix.to_csv(out_path, index=False)

out_path

PosixPath('../artifact/core5/failure_matrix.csv')

In [10]:
summary = (
    failure_matrix
    .groupby(["antibody_key"])
    .agg(
        toggle_rate_mean=("toggle_rate", "mean"),
        toggle_rate_p90=("toggle_rate", lambda x: np.percentile(x, 90)),
        burst_mean_mean=("burst_mean", "mean"),
        interventions_mean=("interventions", "mean"),
        flip_flop_mean=("flip_flop_count", "mean"),
    )
    .reset_index()
)

summary

Unnamed: 0,antibody_key,toggle_rate_mean,toggle_rate_p90,burst_mean_mean,interventions_mean,flip_flop_mean
0,GDPa1-001,0.402778,0.7,1.855401,6.697531,3.080247
1,GDPa1-045,0.402778,0.7,1.855401,6.697531,3.080247
2,GDPa1-183,0.402778,0.7,1.855401,6.697531,3.080247
