# Core10_03d — Alternative Candidate Selection from Developability DB

목적:
- post-shutdown 이후 실제 운영 항체가 1개뿐인 문제를 해결
- Developability DB를 외부 대체 후보 풀(candidate pool)로 사용
- Core10 철학: "설계 없이 운영 가능한 항체를 선택"

중요:
- post-shutdown 항체는 developability DB에 존재하지 않음 (세계관 분리)
- 따라서 ref 항체와의 직접 비교는 수행하지 않음
- developability DB 내부 상대 안정성만으로 fallback 후보를 선택

In [16]:
from pathlib import Path
import pandas as pd
import numpy as np
import re

DEV_PATH = Path("../../data_csv/Antibody_Developability.csv")
assert DEV_PATH.exists(), f"File not found: {DEV_PATH.resolve()}"

dev = pd.read_csv(DEV_PATH)

print("dev rows:", len(dev))
print("dev cols:", dev.columns.tolist())

dev.head()

dev rows: 246
dev cols: ['antibody_id', 'antibody_name', 'vh_protein_sequence', 'vl_protein_sequence', 'light_aligned_aho', 'heavy_aligned_aho', 'hc_subtype', 'lc_subtype', 'hierarchical_cluster_IgG_isotype_stratified_fold']


Unnamed: 0,antibody_id,antibody_name,vh_protein_sequence,vl_protein_sequence,light_aligned_aho,heavy_aligned_aho,hc_subtype,lc_subtype,hierarchical_cluster_IgG_isotype_stratified_fold
0,GDPa1-001,abagovomab,QVKLQESGAELARPGASVKLSCKASGYTFTNYWMQWVKQRPGQGLD...,DIELTQSPASLSASVGETVTITCQASENIYSYLAWHQQKQGKSPQL...,DIELTQSPASLSASVGETVTITCQAS--ENIY------SYLAWHQQ...,QVKLQES-GAELARPGASVKLSCKASG-YTFTN-----YWMQWVKQ...,IgG1,Kappa,2
1,GDPa1-002,abituzumab,QVQLQQSGGELAKPGASVKVSCKASGYTFSSFWMHWVRQAPGQGLE...,DIQMTQSPSSLSASVGDRVTITCRASQDISNYLAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QDIS------NYLAWYQQ...,QVQLQQS-GGELAKPGASVKVSCKASG-YTFSS-----FWMHWVRQ...,IgG2,Kappa,0
2,GDPa1-003,abrezekimab,QVTLKESGPVLVKPTETLTLTCTVSGFSLTNYHVQWIRQPPGKALE...,DIQMTQSPSSLSASVGDRVTITCLASEDISNYLAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCLAS--EDIS------NYLAWYQQ...,QVTLKES-GPVLVKPTETLTLTCTVSG-FSLTN-----YHVQWIRQ...,IgG4,Kappa,2
3,GDPa1-004,abrilumab,QVQLVQSGAEVKKPGASVKVSCKVSGYTLSDLSIHWVRQAPGKGLE...,DIQMTQSPSSVSASVGDRVTITCRASQGISSWLAWYQQKPGKAPKL...,DIQMTQSPSSVSASVGDRVTITCRAS--QGIS------SWLAWYQQ...,QVQLVQS-GAEVKKPGASVKVSCKVSG-YTLSD-----LSIHWVRQ...,IgG2,Kappa,0
4,GDPa1-005,adalimumab,EVQLVESGGGLVQPGRSLRLSCAASGFTFDDYAMHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQGIRNYLAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QGIR------NYLAWYQQ...,EVQLVES-GGGLVQPGRSLRLSCAASG-FTFDD-----YAMHWVRQ...,IgG1,Kappa,0


In [17]:
cluster_col = "hierarchical_cluster_IgG_isotype_stratified_fold"

# cluster size
dev["cluster_size"] = (
    dev.groupby(cluster_col)["antibody_id"]
       .transform("count")
)

# cluster rarity risk
dev["risk_cluster"] = 1.0 / dev["cluster_size"]

# isotype base risk (운영 경험 기반)
ISOTYPE_RISK = {
    "IgG1": 0.0,
    "IgG4": 0.1,
    "IgG2": 0.2,
    "IgG3": 0.3
}

dev["risk_isotype"] = dev["hc_subtype"].map(ISOTYPE_RISK).fillna(0.25)

# base developability risk
dev["base_developability_risk"] = (
    0.7 * dev["risk_cluster"] +
    0.3 * dev["risk_isotype"]
)

# normalize
dev["base_developability_risk"] /= dev["base_developability_risk"].max()

dev[[
    "antibody_id",
    "hc_subtype",
    "cluster_size",
    "base_developability_risk"
]].head(10)

Unnamed: 0,antibody_id,hc_subtype,cluster_size,base_developability_risk
0,GDPa1-001,IgG1,48,0.193882
1,GDPa1-002,IgG2,54,0.970028
2,GDPa1-003,IgG4,48,0.592726
3,GDPa1-004,IgG2,54,0.970028
4,GDPa1-005,IgG1,54,0.17234
5,GDPa1-006,IgG1,54,0.17234
6,GDPa1-007,IgG1,46,0.202312
7,GDPa1-008,IgG1,48,0.193882
8,GDPa1-009,IgG1,48,0.193882
9,GDPa1-010,IgG1,46,0.202312


In [18]:
# 운영 맥락 가중치
CONTEXT_PENALTY = {
    "IgG1": 0.0,
    "IgG4": 0.05,
    "IgG2": 0.15,
    "IgG3": 0.30
}

dev["context_penalty"] = dev["hc_subtype"].map(CONTEXT_PENALTY).fillna(0.2)

# Core10 composite operational risk
dev["core10_operational_risk"] = (
    0.6 * dev["base_developability_risk"] +
    0.4 * dev["context_penalty"]
)

dev["core10_operational_risk"] /= dev["core10_operational_risk"].max()

dev.sort_values("core10_operational_risk").head(10)

Unnamed: 0,antibody_id,antibody_name,vh_protein_sequence,vl_protein_sequence,light_aligned_aho,heavy_aligned_aho,hc_subtype,lc_subtype,hierarchical_cluster_IgG_isotype_stratified_fold,cluster_size,risk_cluster,risk_isotype,base_developability_risk,context_penalty,core10_operational_risk
59,GDPa1-060,domagrozumab,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQDVSTAVAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCKAS--QDVS------TAVAWYQQ...,EVQLLES-GGGLVQPGGSLRLSCAASG-FTFSS-----YAMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
41,GDPa1-042,certolizumab,EVQLVESGGGLVQPGGSLRLSCAASGYVFTDYGMNWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQNVGTNVAWYQQKPGKAPKA...,DIQMTQSPSSLSASVGDRVTITCKAS--QNVG------TNVAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YVFTD-----YGMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
196,GDPa1-197,rontalizumab,EVQLVESGGGLVQPGGSLRLSCATSGYTFTEYIIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQSVSTSSYSYMHWYQQKPGK...,DIQMTQSPSSLSASVGDRVTITCRAS--QSVSTS--SYSYMHWYQQ...,EVQLVES-GGGLVQPGGSLRLSCATSG-YTFTE-----YIIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
45,GDPa1-046,clazakizumab,EVQLVESGGGLVQPGGSLRLSCAASGFSLSNYYVTWVRQAPGKGLE...,AIQMTQSPSSLSASVGDRVTITCQASQSINNELSWYQQKPGKAPKL...,AIQMTQSPSSLSASVGDRVTITCQAS--QSIN------NELSWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FSLSN-----YYVTWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
187,GDPa1-188,ranibizumab,EVQLVESGGGLVQPGGSLRLSCAASGYDFTHYGMNWVRQAPGKGLE...,DIQLTQSPSSLSASVGDRVTITCSASQDISNYLNWYQQKPGKAPKV...,DIQLTQSPSSLSASVGDRVTITCSAS--QDIS------NYLNWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YDFTH-----YGMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
52,GDPa1-053,dacetuzumab,EVQLVESGGGLVQPGGSLRLSCAASGYSFTGYYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRSSQSLVHSNGNTFLHWYQQKPG...,DIQMTQSPSSLSASVGDRVTITCRSS--QSLVHSN-GNTFLHWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YSFTG-----YYIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
186,GDPa1-187,ramucirumab,EVQLVQSGGGLVKPGGSLRLSCAASGFTFSSYSMNWVRQAPGKGLE...,DIQMTQSPSSVSASIGDRVTITCRASQGIDNWLGWYQQKPGKAPKL...,DIQMTQSPSSVSASIGDRVTITCRAS--QGID------NWLGWYQQ...,EVQLVQS-GGGLVKPGGSLRLSCAASG-FTFSS-----YSMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
181,GDPa1-182,prezalumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYWMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQGISNWLAWYQQKPEKAPKS...,DIQMTQSPSSLSASVGDRVTITCRAS--QGIS------NWLAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTFSS-----YWMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
61,GDPa1-062,duligotuzumab,EVQLVESGGGLVQPGGSLRLSCAASGFTLSGDWIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQNIATDVAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QNIA------TDVAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTLSG-----DWIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
180,GDPa1-181,prasinezumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFSNYGMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKSIQTLLYSSNQKNYLAWFQQKP...,DIQMTQSPSSLSASVGDRVTITCKSI--QTLLYSSNQKNYLAWFQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTFSN-----YGMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673


In [13]:
# 운영 맥락 패널티 (설계 없이 오래 굴릴 수 있는가)
CONTEXT_PENALTY = {
    "IgG1": 0.0,
    "IgG4": 0.05,
    "IgG2": 0.15,
    "IgG3": 0.30
}

dev["context_penalty"] = dev["hc_subtype"].map(CONTEXT_PENALTY).fillna(0.2)

# Core10 composite operational risk
dev["core10_operational_risk"] = (
    0.6 * dev["base_developability_risk"] +
    0.4 * dev["context_penalty"]
)

dev["core10_operational_risk"] /= dev["core10_operational_risk"].max()

dev.sort_values("core10_operational_risk").head(10)

Unnamed: 0,antibody_id,antibody_name,vh_protein_sequence,vl_protein_sequence,light_aligned_aho,heavy_aligned_aho,hc_subtype,lc_subtype,hierarchical_cluster_IgG_isotype_stratified_fold,cluster_size,risk_cluster,risk_isotype,base_developability_risk,context_penalty,core10_operational_risk
59,GDPa1-060,domagrozumab,EVQLLESGGGLVQPGGSLRLSCAASGFTFSSYAMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQDVSTAVAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCKAS--QDVS------TAVAWYQQ...,EVQLLES-GGGLVQPGGSLRLSCAASG-FTFSS-----YAMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
41,GDPa1-042,certolizumab,EVQLVESGGGLVQPGGSLRLSCAASGYVFTDYGMNWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKASQNVGTNVAWYQQKPGKAPKA...,DIQMTQSPSSLSASVGDRVTITCKAS--QNVG------TNVAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YVFTD-----YGMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
196,GDPa1-197,rontalizumab,EVQLVESGGGLVQPGGSLRLSCATSGYTFTEYIIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQSVSTSSYSYMHWYQQKPGK...,DIQMTQSPSSLSASVGDRVTITCRAS--QSVSTS--SYSYMHWYQQ...,EVQLVES-GGGLVQPGGSLRLSCATSG-YTFTE-----YIIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
45,GDPa1-046,clazakizumab,EVQLVESGGGLVQPGGSLRLSCAASGFSLSNYYVTWVRQAPGKGLE...,AIQMTQSPSSLSASVGDRVTITCQASQSINNELSWYQQKPGKAPKL...,AIQMTQSPSSLSASVGDRVTITCQAS--QSIN------NELSWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FSLSN-----YYVTWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
187,GDPa1-188,ranibizumab,EVQLVESGGGLVQPGGSLRLSCAASGYDFTHYGMNWVRQAPGKGLE...,DIQLTQSPSSLSASVGDRVTITCSASQDISNYLNWYQQKPGKAPKV...,DIQLTQSPSSLSASVGDRVTITCSAS--QDIS------NYLNWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YDFTH-----YGMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
52,GDPa1-053,dacetuzumab,EVQLVESGGGLVQPGGSLRLSCAASGYSFTGYYIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRSSQSLVHSNGNTFLHWYQQKPG...,DIQMTQSPSSLSASVGDRVTITCRSS--QSLVHSN-GNTFLHWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-YSFTG-----YYIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
186,GDPa1-187,ramucirumab,EVQLVQSGGGLVKPGGSLRLSCAASGFTFSSYSMNWVRQAPGKGLE...,DIQMTQSPSSVSASIGDRVTITCRASQGIDNWLGWYQQKPGKAPKL...,DIQMTQSPSSVSASIGDRVTITCRAS--QGID------NWLGWYQQ...,EVQLVQS-GGGLVKPGGSLRLSCAASG-FTFSS-----YSMNWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
181,GDPa1-182,prezalumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFSSYWMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQGISNWLAWYQQKPEKAPKS...,DIQMTQSPSSLSASVGDRVTITCRAS--QGIS------NWLAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTFSS-----YWMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
61,GDPa1-062,duligotuzumab,EVQLVESGGGLVQPGGSLRLSCAASGFTLSGDWIHWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCRASQNIATDVAWYQQKPGKAPKL...,DIQMTQSPSSLSASVGDRVTITCRAS--QNIA------TDVAWYQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTLSG-----DWIHWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673
180,GDPa1-181,prasinezumab,EVQLVESGGGLVQPGGSLRLSCAASGFTFSNYGMSWVRQAPGKGLE...,DIQMTQSPSSLSASVGDRVTITCKSIQTLLYSSNQKNYLAWFQQKP...,DIQMTQSPSSLSASVGDRVTITCKSI--QTLLYSSNQKNYLAWFQQ...,EVQLVES-GGGLVQPGGSLRLSCAASG-FTFSN-----YGMSWVRQ...,IgG1,Kappa,0,54,0.018519,0.0,0.17234,0.0,0.156673


In [19]:
ALPHA = 0.9  # 보수성 (운영 기준)

dev["proxy_survivability_score"] = (
    1.0 - ALPHA * dev["core10_operational_risk"]
).clip(lower=0)

dev.sort_values(
    "proxy_survivability_score",
    ascending=False
)[[
    "antibody_id",
    "hc_subtype",
    "proxy_survivability_score",
    "core10_operational_risk",
    "cluster_size"
]].head(10)

Unnamed: 0,antibody_id,hc_subtype,proxy_survivability_score,core10_operational_risk,cluster_size
160,GDPa1-161,IgG1,0.858995,0.156673,54
174,GDPa1-175,IgG1,0.858995,0.156673,54
68,GDPa1-069,IgG1,0.858995,0.156673,54
25,GDPa1-026,IgG1,0.858995,0.156673,54
186,GDPa1-187,IgG1,0.858995,0.156673,54
181,GDPa1-182,IgG1,0.858995,0.156673,54
180,GDPa1-181,IgG1,0.858995,0.156673,54
178,GDPa1-179,IgG1,0.858995,0.156673,54
176,GDPa1-177,IgG1,0.858995,0.156673,54
65,GDPa1-066,IgG1,0.858995,0.156673,54


In [20]:
GLYCO_PATTERN = re.compile(r"N[^P][ST]")

def glyco_count(seq):
    if pd.isna(seq):
        return 0
    return len(GLYCO_PATTERN.findall(seq))

def cys_count(seq):
    if pd.isna(seq):
        return 0
    return seq.count("C")

def seq_len(seq):
    if pd.isna(seq):
        return 0
    return len(seq)

# glycosylation
dev["glyco_vh"] = dev["vh_protein_sequence"].apply(glyco_count)
dev["glyco_vl"] = dev["vl_protein_sequence"].apply(glyco_count)
dev["glyco_total"] = dev["glyco_vh"] + dev["glyco_vl"]

# cysteine
dev["cys_vh"] = dev["vh_protein_sequence"].apply(cys_count)
dev["cys_vl"] = dev["vl_protein_sequence"].apply(cys_count)
dev["cys_total"] = dev["cys_vh"] + dev["cys_vl"]

# length
dev["vh_len"] = dev["vh_protein_sequence"].apply(seq_len)
dev["vl_len"] = dev["vl_protein_sequence"].apply(seq_len)
dev["seq_len_total"] = dev["vh_len"] + dev["vl_len"]

dev[[
    "antibody_id",
    "glyco_total",
    "cys_total",
    "seq_len_total"
]].head(10)

Unnamed: 0,antibody_id,glyco_total,cys_total,seq_len_total
0,GDPa1-001,0,4,226
1,GDPa1-002,0,4,225
2,GDPa1-003,0,4,227
3,GDPa1-004,0,4,225
4,GDPa1-005,0,4,228
5,GDPa1-006,0,4,231
6,GDPa1-007,0,4,228
7,GDPa1-008,0,4,231
8,GDPa1-009,0,4,225
9,GDPa1-010,0,4,222


In [21]:
# canonical antibody 기준: 대략 8~10
dev["cys_anomaly"] = (
    (dev["cys_total"] < 8) |
    (dev["cys_total"] > 10)
).astype(int)

# Core10: cys는 anomaly flag로만 사용
dev["risk_cys"] = dev["cys_anomaly"]

dev[[
    "antibody_id",
    "cys_total",
    "cys_anomaly"
]].head(10)

Unnamed: 0,antibody_id,cys_total,cys_anomaly
0,GDPa1-001,4,1
1,GDPa1-002,4,1
2,GDPa1-003,4,1
3,GDPa1-004,4,1
4,GDPa1-005,4,1
5,GDPa1-006,4,1
6,GDPa1-007,4,1
7,GDPa1-008,4,1
8,GDPa1-009,4,1
9,GDPa1-010,4,1


In [22]:
def norm(col):
    return col / col.max() if col.max() > 0 else col

dev["risk_glyco"] = norm(dev["glyco_total"])
dev["risk_len"]   = norm(dev["seq_len_total"])

# tie-break risk (설명 가능, 결정론)
dev["tie_break_risk"] = (
    0.5 * dev["risk_glyco"] +
    0.3 * dev["risk_len"] +
    0.2 * dev["context_penalty"] +
    1.0 * dev["risk_cys"]   # anomaly는 강한 패널티
)

dev[[
    "antibody_id",
    "tie_break_risk",
    "risk_glyco",
    "risk_len",
    "risk_cys"
]].head(10)

Unnamed: 0,antibody_id,tie_break_risk,risk_glyco,risk_len,risk_cys
0,GDPa1-001,1.2825,0.0,0.941667,1
1,GDPa1-002,1.31125,0.0,0.9375,1
2,GDPa1-003,1.29375,0.0,0.945833,1
3,GDPa1-004,1.31125,0.0,0.9375,1
4,GDPa1-005,1.285,0.0,0.95,1
5,GDPa1-006,1.28875,0.0,0.9625,1
6,GDPa1-007,1.285,0.0,0.95,1
7,GDPa1-008,1.28875,0.0,0.9625,1
8,GDPa1-009,1.28125,0.0,0.9375,1
9,GDPa1-010,1.2775,0.0,0.925,1


In [23]:
# REF 항체는 dev DB에 없으므로 baseline = 0
REF_SCORE = 0.0

# Step 1 — Gate 통과 후보 = Core10 Pool
POOL = dev[
    dev["proxy_survivability_score"] > REF_SCORE
].copy()

print("Core10 POOL size:", len(POOL))

Core10 POOL size: 246


In [24]:
POOL["signature"] = (
    POOL["hc_subtype"].astype(str) + "|" +
    POOL["lc_subtype"].astype(str) + "|" +
    POOL["hierarchical_cluster_IgG_isotype_stratified_fold"].astype(str)
)

POOL["signature"].value_counts().head(10)

signature
IgG1|Kappa|0     36
IgG1|Kappa|2     33
IgG1|Kappa|1     33
IgG1|Kappa|3     31
IgG1|Lambda|4    19
IgG1|Kappa|4     16
IgG4|Kappa|0     11
IgG4|Kappa|3      9
IgG4|Kappa|1      9
IgG4|Kappa|4      8
Name: count, dtype: int64

In [25]:
fallback_pool = (
    POOL
    .sort_values(
        [
            "proxy_survivability_score",   # 1차: Gate 점수
            "tie_break_risk",              # 2차: 운영 리스크
            "antibody_id"                  # 3차: 완전 결정론
        ],
        ascending=[False, True, True]
    )
    .groupby("signature", as_index=False)
    .head(1)   # 각 구조 그룹 대표 1개
)

print("Fallback pool size:", len(fallback_pool))

fallback_pool[[
    "antibody_id",
    "signature",
    "proxy_survivability_score",
    "tie_break_risk",
    "hc_subtype"
]].head(10)

Fallback pool size: 19


Unnamed: 0,antibody_id,signature,proxy_survivability_score,tie_break_risk,hc_subtype
59,GDPa1-060,IgG1|Kappa|0,0.858995,1.27875,IgG1
20,GDPa1-021,IgG1|Kappa|1,0.844606,1.27625,IgG1
84,GDPa1-085,IgG1|Kappa|4,0.844606,1.28125,IgG1
24,GDPa1-025,IgG1|Lambda|4,0.844606,1.2825,IgG1
164,GDPa1-165,IgG1|Lambda|1,0.844606,1.28625,IgG1
16,GDPa1-017,IgG1|Kappa|2,0.841369,1.27625,IgG1
9,GDPa1-010,IgG1|Kappa|3,0.834472,1.2775,IgG1
137,GDPa1-138,IgG4|Kappa|0,0.505395,1.2875,IgG4
38,GDPa1-039,IgG4|Kappa|1,0.491007,1.28875,IgG4
49,GDPa1-050,IgG4|Kappa|4,0.491007,1.29,IgG4


“Core10은 단일 최적 항체를 선택하지 않고,
운영 실패 시 동일 위험 등급 내에서
구조적으로 서로 다른 대체 후보 집합을 구성한다.”

In [27]:
from pathlib import Path
import json

EXPORT_DIR = Path("../artifact/core10")
EXPORT_DIR.mkdir(parents=True, exist_ok=True)

OUT_POOL_CSV  = EXPORT_DIR / "core10_03d_fallback_pool.csv"
OUT_POOL_JSON = EXPORT_DIR / "core10_03d_fallback_pool.json"
OUT_META_JSON = EXPORT_DIR / "core10_03d_metadata.json"

print("Export dir:", EXPORT_DIR.resolve())

Export dir: /Users/mac/Desktop/De/Developability_Data/core/artifact/core10


In [28]:
# fallback_pool 은 이미 앞에서 계산된 상태라고 가정

EXPORT_COLS = [
    "antibody_id",
    "signature",
    "proxy_survivability_score",
    "tie_break_risk",
    "core10_operational_risk",
    "cluster_size",
    "hc_subtype",
    "lc_subtype",
    "hierarchical_cluster_IgG_isotype_stratified_fold"
]

fallback_pool_export = fallback_pool[EXPORT_COLS].copy()

fallback_pool_export.to_csv(OUT_POOL_CSV, index=False)

print("✅ Fallback pool CSV exported:", OUT_POOL_CSV)
print("rows:", len(fallback_pool_export))

✅ Fallback pool CSV exported: ../artifact/core10/core10_03d_fallback_pool.csv
rows: 19


In [29]:
fallback_pool_export.to_json(
    OUT_POOL_JSON,
    orient="records",
    indent=2
)

print("✅ Fallback pool JSON exported:", OUT_POOL_JSON)

✅ Fallback pool JSON exported: ../artifact/core10/core10_03d_fallback_pool.json


In [30]:
metadata = {
    "core_id": "core10_03d",
    "name": "Alternative Candidate Selection from Developability DB",
    "purpose": (
        "Construct a structurally diverse fallback candidate pool "
        "when only one post-shutdown operating antibody exists."
    ),
    "philosophy": (
        "Core10 does not select a single optimal antibody. "
        "It constructs a fallback pool of structurally distinct "
        "candidates within the same operational risk tier."
    ),
    "decision_principles": {
        "stage_1_gate": "proxy_survivability_score > REF_SCORE",
        "stage_2_grouping": "hc_subtype | lc_subtype | cluster_id",
        "stage_3_selection": [
            "proxy_survivability_score (desc)",
            "tie_break_risk (asc)",
            "antibody_id (lexicographic)"
        ]
    },
    "notes": [
        "No learning model involved",
        "Deterministic and explainable",
        "Scores may tie; diversity is the goal",
        "Failure-aware operational design"
    ],
    "pool_size": int(len(fallback_pool_export)),
    "generated_by": "core10_03d notebook"
}

OUT_META_JSON.write_text(
    json.dumps(metadata, indent=2),
    encoding="utf-8"
)

print("✅ Metadata JSON exported:", OUT_META_JSON)

✅ Metadata JSON exported: ../artifact/core10/core10_03d_metadata.json


In [31]:
print("=== Core10_03d Export Summary ===")
print("CSV :", OUT_POOL_CSV.name)
print("JSON:", OUT_POOL_JSON.name)
print("META:", OUT_META_JSON.name)
print("Pool size:", len(fallback_pool_export))

fallback_pool_export.head(5)

=== Core10_03d Export Summary ===
CSV : core10_03d_fallback_pool.csv
JSON: core10_03d_fallback_pool.json
META: core10_03d_metadata.json
Pool size: 19


Unnamed: 0,antibody_id,signature,proxy_survivability_score,tie_break_risk,core10_operational_risk,cluster_size,hc_subtype,lc_subtype,hierarchical_cluster_IgG_isotype_stratified_fold
59,GDPa1-060,IgG1|Kappa|0,0.858995,1.27875,0.156673,54,IgG1,Kappa,0
20,GDPa1-021,IgG1|Kappa|1,0.844606,1.27625,0.17266,49,IgG1,Kappa,1
84,GDPa1-085,IgG1|Kappa|4,0.844606,1.28125,0.17266,49,IgG1,Kappa,4
24,GDPa1-025,IgG1|Lambda|4,0.844606,1.2825,0.17266,49,IgG1,Lambda,4
164,GDPa1-165,IgG1|Lambda|1,0.844606,1.28625,0.17266,49,IgG1,Lambda,1
