# Core10_03d — Alternative Candidate Selection from Developability DB

목적:
- post-shutdown 이후 실제 운영 항체가 1개뿐인 문제를 해결
- Developability DB를 외부 대체 후보 풀(candidate pool)로 사용
- Core10 철학: "설계 없이 운영 가능한 항체를 선택"

중요:
- post-shutdown 항체는 developability DB에 존재하지 않음 (세계관 분리)
- 따라서 ref 항체와의 직접 비교는 수행하지 않음
- developability DB 내부 상대 안정성만으로 fallback 후보를 선택

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np

DEV_PATH = Path("../../data_csv/Antibody_Developability.csv")
assert DEV_PATH.exists(), f"File not found: {DEV_PATH.resolve()}"

dev = pd.read_csv(DEV_PATH)

print("dev rows:", len(dev))
print("dev cols:", dev.columns.tolist())

dev rows: 246
dev cols: ['antibody_id', 'antibody_name', 'vh_protein_sequence', 'vl_protein_sequence', 'light_aligned_aho', 'heavy_aligned_aho', 'hc_subtype', 'lc_subtype', 'hierarchical_cluster_IgG_isotype_stratified_fold']


In [3]:
cluster_col = "hierarchical_cluster_IgG_isotype_stratified_fold"

dev["cluster_size"] = (
    dev.groupby(cluster_col)["antibody_id"]
       .transform("count")
)

# base risks
dev["risk_cluster"] = 1.0 / dev["cluster_size"]

ISOTYPE_RISK = {
    "IgG1": 0.0,
    "IgG4": 0.1,
    "IgG2": 0.2,
    "IgG3": 0.3
}
dev["risk_isotype"] = dev["hc_subtype"].map(ISOTYPE_RISK).fillna(0.25)

dev["base_developability_risk"] = (
    0.7 * dev["risk_cluster"] +
    0.3 * dev["risk_isotype"]
)

# normalize
dev["base_developability_risk"] /= dev["base_developability_risk"].max()

dev[[
    "antibody_id",
    "hc_subtype",
    "cluster_size",
    "base_developability_risk"
]].head()

Unnamed: 0,antibody_id,hc_subtype,cluster_size,base_developability_risk
0,GDPa1-001,IgG1,48,0.193882
1,GDPa1-002,IgG2,54,0.970028
2,GDPa1-003,IgG4,48,0.592726
3,GDPa1-004,IgG2,54,0.970028
4,GDPa1-005,IgG1,54,0.17234


In [4]:
# 운영 맥락 가중치 (의도적으로 거칠게)
# "설계 없이 오래 굴릴 수 있는가" 관점

CONTEXT_PENALTY = {
    "IgG1": 0.0,   # 운영 최우수
    "IgG4": 0.05,  # 약간의 리스크
    "IgG2": 0.15,
    "IgG3": 0.30
}

dev["context_penalty"] = dev["hc_subtype"].map(CONTEXT_PENALTY).fillna(0.2)

# Core10 composite risk
dev["core10_operational_risk"] = (
    0.6 * dev["base_developability_risk"] +
    0.4 * dev["context_penalty"]
)

dev["core10_operational_risk"] /= dev["core10_operational_risk"].max()

dev.sort_values("core10_operational_risk").head(10)

Unnamed: 0,antibody_id,antibody_name,vh_protein_sequence,vl_protein_sequence,light_aligned_aho,heavy_aligned_aho,hc_subtype,lc_subtype,hierarchical_cluster_IgG_isotype_stratified_fold,cluster_size,risk_cluster,risk_isotype,base_developability_risk,context_penalty,core10_operational_risk
59,GDPa1-060,domagrozumab,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQDVSTAVAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCKAS--QDVS------TAVAWYQQ...,EVQLLES-GGGLVQPGGSLRLSCAASG-FTFSS-----YAMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
41,GDPa1-042,certolizumab,EVQLVESGGGLVQPGGSLRLSCAASGYVFTDYGMNWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQNVGTNVAWYQQKPGKAPKA...,DIQMTQSPSSLSASVGDRVTITCKAS--QNVG------TNVAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YVFTD-----YGMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
196,GDPa1-197,rontalizumab,EVQLVESGGGLVQPGGSLRLSCATSGYTFTEYIIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQSVSTSSYSYMHWYQQKPGK...,DIQMTQSPSSLSASVGDRVTITCRAS--QSVSTS--SYSYMHWYQQ...,EVQLVES-GGGLVQPGGSLRLSCATSG-YTFTE-----YIIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
45,GDPa1-046,clazakizumab,EVQLVESGGGLVQPGGSLRLSCAASGFSLSNYYVTWVRQAPGKGLE...,AIQMTQSPSSLSASVGDRVTITCQASQSINNELSWYQQKPGKAPKL...,AIQMTQSPSSLSASVGDRVTITCQAS--QSIN------NELSWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FSLSN-----YYVTWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
187,GDPa1-188,ranibizumab,EVQLVESGGGLVQPGGSLRLSCAASGYDFTHYGMNWVRQAPGKGLE...,DIQLTQSPSSLSASVGDRVTITCSASQDISNYLNWYQQKPGKAPKV...,DIQLTQSPSSLSASVGDRVTITCSAS--QDIS------NYLNWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YDFTH-----YGMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
52,GDPa1-053,dacetuzumab,EVQLVESGGGLVQPGGSLRLSCAASGYSFTGYYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRSSQSLVHSNGNTFLHWYQQKPG...,DIQMTQSPSSLSASVGDRVTITCRSS--QSLVHSN-GNTFLHWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YSFTG-----YYIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
186,GDPa1-187,ramucirumab,EVQLVQSGGGLVKPGGSLRLSCAASGFTFSSYSMNWVRQAPGKGLE...,DIQMTQSPSSVSASIGDRVTITCRASQGIDNWLGWYQQKPGKAPKL...,DIQMTQSPSSVSASIGDRVTITCRAS--QGID------NWLGWYQQ...,EVQLVQS-GGGLVKPGGSLRLSCAASG-FTFSS-----YSMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
181,GDPa1-182,prezalumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYWMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQGISNWLAWYQQKPEKAPKS...,DIQMTQSPSSLSASVGDRVTITCRAS--QGIS------NWLAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTFSS-----YWMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
61,GDPa1-062,duligotuzumab,EVQLVESGGGLVQPGGSLRLSCAASGFTLSGDWIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQNIATDVAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QNIA------TDVAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTLSG-----DWIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
180,GDPa1-181,prasinezumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFSNYGMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKSIQTLLYSSNQKNYLAWFQQKP...,DIQMTQSPSSLSASVGDRVTITCKSI--QTLLYSSNQKNYLAWFQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTFSN-----YGMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673


In [27]:
ALPHA = 0.8  # 보수성 계수 (클수록 위험에 민감)

dev["proxy_survivability_score"] = (
    1.0 - ALPHA * dev["developability_risk"]
).clip(lower=0)

dev.sort_values(
    "proxy_survivability_score",
    ascending=False
)[[
    "antibody_id",
    "proxy_survivability_score",
    "developability_risk",
    "cluster_size",
    "hc_subtype"
]].head(10)

Unnamed: 0,antibody_id,proxy_survivability_score,developability_risk,cluster_size,hc_subtype
160,GDPa1-161,0.862128,0.17234,54,IgG1
174,GDPa1-175,0.862128,0.17234,54,IgG1
68,GDPa1-069,0.862128,0.17234,54,IgG1
25,GDPa1-026,0.862128,0.17234,54,IgG1
186,GDPa1-187,0.862128,0.17234,54,IgG1
181,GDPa1-182,0.862128,0.17234,54,IgG1
180,GDPa1-181,0.862128,0.17234,54,IgG1
178,GDPa1-179,0.862128,0.17234,54,IgG1
176,GDPa1-177,0.862128,0.17234,54,IgG1
65,GDPa1-066,0.862128,0.17234,54,IgG1


In [28]:
# reference survivability (현재 운영 항체)
# post-shutdown 항체는 developability DB에 없으므로
# "지금 쓰는 것보다 덜 위험한가?"만 본다
REF_SCORE = 0.0   # 현재 운영 상태 baseline
MARGIN = 0.05     # 보수적 개선 요구량

TOP_K = 10

candidates = (
    dev.sort_values("proxy_survivability_score", ascending=False)
       .head(TOP_K)
       .copy()
)

candidates["decision"] = np.where(
    candidates["proxy_survivability_score"] > REF_SCORE + MARGIN,
    "FALLBACK_CANDIDATE",
    "REJECT"
)

candidates[[
    "antibody_id",
    "proxy_survivability_score",
    "developability_risk",
    "cluster_size",
    "hc_subtype",
    "decision"
]]

Unnamed: 0,antibody_id,proxy_survivability_score,developability_risk,cluster_size,hc_subtype,decision
160,GDPa1-161,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
174,GDPa1-175,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
68,GDPa1-069,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
25,GDPa1-026,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
186,GDPa1-187,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
181,GDPa1-182,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
180,GDPa1-181,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
178,GDPa1-179,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
176,GDPa1-177,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE
65,GDPa1-066,0.862128,0.17234,54,IgG1,FALLBACK_CANDIDATE


In [29]:
# 1) REF 대비 개선된 후보만
improved = candidates[candidates["decision"] == "FALLBACK_CANDIDATE"]

# 2) 너무 위험한 subtype 컷 (운영 정책)
SAFE_ISOTYPES = ["IgG1", "IgG4"]

safe = improved[improved["hc_subtype"].isin(SAFE_ISOTYPES)]

# 3) 최종 선택 (가장 덜 위험한 것 1개)
selected = safe.head(1)

if selected.empty:
    print("⚠️ No safer alternative than current operating antibody")
else:
    print("✅ Selected fallback candidate")
    selected[[
        "antibody_id",
        "proxy_survivability_score",
        "developability_risk",
        "cluster_size",
        "hc_subtype"
    ]]

✅ Selected fallback candidate
