"""
core2_03_prediction_trigger_simulation.ipynb

목적:
- 예측값을 설계 개입 트리거로 사용할 경우,
  시간이 지남에 따라 설계 개입이 어떻게 누적되는지를 관찰한다.
- '예측이 흔들릴 때 개입/유지가 교차되는 구조'를
  step 단위 시뮬레이션으로 명시적으로 만든다.

중요:
- cutoff 없음
- 성능 평가 없음
- 오직 decision dynamics만 기록한다.
"""

In [5]:
import json
import random
import pandas as pd
from pathlib import Path

random.seed(42)

# policy_summary.json 로딩
policy_path = Path("../artifact/core2/policy_summary.json")

with open(policy_path, "r") as f:
    POLICY = json.load(f)

POLICY

mutation_candidates = pd.read_csv(
    "../artifact/core2/mutation_candidate_set.csv"
)

mutation_candidates.head() # mutation candidate set 로딩

develop_df = pd.read_csv(
    "../../data_csv/Antibody_Developability.csv"
)

develop_df.head() # Core 1 develvopability 데이터 로딩

Unnamed: 0,antibody_id,antibody_name,vh_protein_sequence,vl_protein_sequence,light_aligned_aho,heavy_aligned_aho,hc_subtype,lc_subtype,hierarchical_cluster_IgG_isotype_stratified_fold
0,GDPa1-001,abagovomab,QVKLQESGAELARPGASVKLSCKASGYTFTNYWMQWVKQRPGQGLD...,DIELTQSPASLSASVGETVTITCQASENIYSYLAWHQQKQGKSPQL...,DIELTQSPASLSASVGETVTITCQAS--ENIY------SYLAWHQQ...,QVKLQES-GAELARPGASVKLSCKASG-YTFTN-----YWMQWVKQ...,IgG1,Kappa,2
1,GDPa1-002,abituzumab,QVQLQQSGGELAKPGASVKVSCKASGYTFSSFWMHWVRQAPGQGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QDIS------NYLAWYQQ...,QVQLQQS-GGELAKPGASVKVSCKASG-YTFSS-----FWMHWVRQ...,IgG2,Kappa,0
2,GDPa1-003,abrezekimab,QVTLKESGPVLVKPTETLTLTCTVSGFSLTNYHVQWIRQPPGKALE...,DIQMTQSPSSLSASVGDRVTITCLASEDISNYLAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCLAS--EDIS------NYLAWYQQ...,QVTLKES-GPVLVKPTETLTLTCTVSG-FSLTN-----YHVQWIRQ...,IgG4,Kappa,2
3,GDPa1-004,abrilumab,QVQLVQSGAEVKKPGASVKVSCKVSGYTLSDLSIHWVRQAPGKGLE...,DIQMTQSPSSVSASVGDRVTITCRASQGISSWLAWYQQKPGKAPKL...,DIQMTQSPSSVSASVGDRVTITCRAS--QGIS------SWLAWYQQ...,QVQLVQS-GAEVKKPGASVKVSCKVSG-YTLSD-----LSIHWVRQ...,IgG2,Kappa,0
4,GDPa1-005,adalimumab,EVQLVESGGGLVQPGRSLRLSCAASGFTFDDYAMHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQGIRNYLAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QGIR------NYLAWYQQ...,EVQLVES-GGGLVQPGRSLRLSCAASG-FTFDD-----YAMHWVRQ...,IgG1,Kappa,0


In [None]:
TARGET_ANTIBODIES = [
    "GDPa1-001",  # abagovomab
    "GDPa1-045",  # cixutumumab
    "GDPa1-183",  # prolgolimab
]

develop_df = develop_df[
    develop_df["antibody_id"].isin(TARGET_ANTIBODIES)
].reset_index(drop=True)

develop_df[["antibody_id", "antibody_name"]].drop_duplicates()

Unnamed: 0,antibody_id
0,GDPa1-001
1,GDPa1-045
2,GDPa1-183


risk_score 정의 (연속값)

❗ cutoff 금지
❗ cluster index를 연속 예측 신호처럼 사용

In [None]:
develop_df["risk_score"] = (
    develop_df["hierarchical_cluster_IgG_isotype_stratified_fold"]
    .astype(float)
)

develop_df[["antibody_id", "risk_score"]]

KeyError: "None of [Index(['stability_score', 'aggregation_score', 'solubility_score'], dtype='object')] are in the [index]"

In [None]:
initial_states = {}

for ab in TARGET_ANTIBODIES:
    base_row = develop_df[develop_df["antibody_id"] == ab].iloc[0]
    
    seq_row = mutation_candidates[
        mutation_candidates["antibody_key"] == ab
    ].iloc[0]
    
    initial_states[ab] = {
        "sequence": seq_row["sequence_before"],
        "risk_score": base_row["risk_score"]
    }

initial_states

예측 스코어 변화 시뮬레이션 함수

실제 예측 모델 ❌
흔들리는 예측값을 의도적으로 생성

In [None]:
def simulate_pred_score(prev_score, noise_scale=0.05):
    noise = random.uniform(-noise_scale, noise_scale)
    return prev_score + noise

In [None]:
def run_simulation_for_antibody(
    antibody_key,
    base_sequence,
    base_score,
    mutation_candidates,
    policy,
    max_steps=20
):
    trace = []
    applied_mutations = []
    
    current_sequence = base_sequence
    prev_score = base_score
    intervention_count = 0
    
    for step in range(1, max_steps + 1):
        current_score = simulate_pred_score(prev_score)
        delta = current_score - prev_score
        
        # 정책 판단
        if delta > 0:
            decision = "MUTATE"
        else:
            decision = "HOLD"
        
        mutation_id_applied = None
        
        if decision == "MUTATE":
            candidates = mutation_candidates[
                (mutation_candidates["antibody_key"] == antibody_key) &
                (mutation_candidates["step"] == step)
            ]
            
            if len(candidates) > 0:
                chosen = candidates.sample(1).iloc[0]
                current_sequence = chosen["sequence_after"]
                mutation_id_applied = chosen["mutation_id"]
                intervention_count += 1
                
                applied_mutations.append(chosen.to_dict())
        
        trace.append({
            "antibody_key": antibody_key,
            "step": step,
            "sequence_current": current_sequence,
            "pred_score": current_score,
            "pred_score_delta": delta,
            "decision": decision,
            "mutation_id_applied": mutation_id_applied,
            "intervention_count_cum": intervention_count
        })
        
        prev_score = current_score
    
    return pd.DataFrame(trace), pd.DataFrame(applied_mutations) # 단일 항체 시뮬레이션 함수

In [None]:
all_traces = []
all_mutations = []

for ab in TARGET_ANTIBODIES:
    trace_df, mut_df = run_simulation_for_antibody(
        antibody_key=ab,
        base_sequence=initial_states[ab]["sequence"],
        base_score=initial_states[ab]["risk_score"],
        mutation_candidates=mutation_candidates,
        policy=POLICY,
        max_steps=20
    )
    
    all_traces.append(trace_df)
    all_mutations.append(mut_df) # 전체 항체 시뮬레이션 실행

In [None]:
prediction_trace = pd.concat(all_traces, ignore_index=True)

prediction_trace.head()

applied_mutation_log = pd.concat(all_mutations, ignore_index=True)

applied_mutation_log.head() # 실제 적용된 것만 

output_dir = Path("../artifact/core2")
output_dir.mkdir(parents=True, exist_ok=True)

prediction_trace_path = output_dir / "prediction_trace.csv"
mutation_applied_path = output_dir / "mutation_applied_log.csv"

prediction_trace.to_csv(prediction_trace_path, index=False)
applied_mutation_log.to_csv(mutation_applied_path, index=False)

prediction_trace_path, mutation_applied_path