# Baseline

#### diffab

In [None]:
import os
import subprocess


pdb_dir = "/spinning1/sharedby/hz362/AbFlow/data/rabd/pdb/"
diffab_repo_dir = "/spinning1/sharedby/hz362/AbFlow/benchmarks/diffab/"
config_files = [
    "configs/test/codesign_single.yml",
    "configs/test/fixbb.yml",
]
results_dirs = [
    "results/codesign_single/",
    "results/fixbb/",
]


original_pdb_files = {f for f in os.listdir(pdb_dir) if f.endswith(".pdb")}
original_working_dir = os.getcwd()

try:
    os.chdir(diffab_repo_dir)

    for pdb_file in original_pdb_files:
        pdb_file_path = os.path.join(original_working_dir, pdb_dir, pdb_file)

        for config_file, results_dir in zip(config_files, results_dirs):

            # check if the results directory exists
            if not os.path.exists(results_dir):
                os.makedirs(results_dir)

            result_file_names = {f.split(".")[0] for f in os.listdir(results_dir)}
            pdb_file_name = pdb_file.split(".")[0]

            if pdb_file_name in result_file_names:
                print(
                    f"Skipping {pdb_file}.pdb as it is already processed in {results_dir}"
                )
                continue

            command = [
                "python",
                "design_pdb.py",
                pdb_file_path,
                "--config",
                config_file,
            ]

            print(f"Running command: {' '.join(command)}")
            subprocess.run(command, check=True)

finally:
    os.chdir(original_working_dir)

    current_pdb_files = {f for f in os.listdir(pdb_dir) if f.endswith(".pdb")}
    additional_files = current_pdb_files - original_pdb_files

    for additional_file in additional_files:
        os.remove(os.path.join(pdb_dir, additional_file))

    print("Finished processing all PDB files. Additional files have been removed.")

In [None]:
# clean up diffab folders
import os
import shutil
from glob import glob

BASE_DIR = '/spinning1/sharedby/hz362/AbFlow/data/baseline/diffab'
MODES = ['SG', 'DN']

for mode in MODES:
    mode_dir = os.path.join(BASE_DIR, mode)
    for full_case_path in os.listdir(mode_dir):
        case_path = os.path.join(mode_dir, full_case_path)

        if not os.path.isdir(case_path):
            continue

        if ".pdb_" not in full_case_path:
            print(f"Skipping invalid folder: {full_case_path}")
            continue
        clean_name = full_case_path.split('.pdb_')[0]
        new_case_path = os.path.join(mode_dir, clean_name)

        if os.path.exists(new_case_path):
            print(f"Target folder already exists: {new_case_path}, skipping...")
            continue

        h_cdr3_path = os.path.join(case_path, 'H_CDR3')
        if not os.path.exists(h_cdr3_path):
            print(f"No H_CDR3 found in: {case_path}, skipping...")
            continue

        os.makedirs(new_case_path)

        pdb_files = sorted(glob(os.path.join(h_cdr3_path, '[0-9][0-9][0-9][0-9]*.pdb')))
        for i, pdb_path in enumerate(pdb_files):
            new_name = f'design_{i}.pdb'
            shutil.copy2(pdb_path, os.path.join(new_case_path, new_name))

        print(f"‚úì Cleaned {full_case_path} ‚Üí {clean_name}")

print("Done cleaning baseline diffab folders.")


In [None]:
import os
import shutil

BASE_DIR = '/spinning1/sharedby/hz362/AbFlow/data/baseline/diffab'
MODES = ['SG', 'DN']

for mode in MODES:
    mode_dir = os.path.join(BASE_DIR, mode)
    for full_case_path in os.listdir(mode_dir):
        case_path = os.path.join(mode_dir, full_case_path)

        if not os.path.isdir(case_path):
            continue

        # Only remove timestamped folders
        if ".pdb_" not in full_case_path:
            continue

        clean_name = full_case_path.split(".pdb_")[0]
        cleaned_path = os.path.join(mode_dir, clean_name)

        if os.path.exists(cleaned_path):
            print(f"üóë Removing old folder: {case_path}")
            shutil.rmtree(case_path)
        else:
            print(f"‚ö†Ô∏è Clean folder missing for: {clean_name}, skipping deletion")

print("Old timestamped folders cleaned up.")


#### MEAN

In [None]:
import os
import subprocess

# === Paths ===
input_dir = "/spinning1/sharedby/hz362/AbFlow/data/rabd/pdb"
output_dir = "/spinning1/sharedby/hz362/AbFlow/data/rabd/pdb_imgt"
immunopdb_script = "/spinning1/sharedby/hz362/AbFlow/benchmarks/MEAN/data/ImmunoPDB.py"

# === Create output dir if needed ===
os.makedirs(output_dir, exist_ok=True)

# === Loop through .pdb files ===
pdb_files = [f for f in os.listdir(input_dir) if f.endswith(".pdb")]

for fname in pdb_files:
    in_file = os.path.join(input_dir, fname)
    out_file = os.path.join(output_dir, fname)  # keep same filename
    cmd = [
        "python", immunopdb_script,
        "-i", in_file,
        "-o", out_file,
        "-s", "imgt"
    ]
    try:
        subprocess.run(cmd, check=True)
        print(f"‚úÖ Renumbered {fname} ‚Üí pdb_imgt/")
    except subprocess.CalledProcessError as e:
        print(f"‚ùå Failed to renumber {fname}: {e}")

print("üéâ All PDBs processed.")


#### abx (move this notebook to the abx folder)

In [None]:
import os
import glob
from subprocess import run

# === CONFIGURATION ===
pdb_input_dir = "/spinning1/sharedby/hz362/AbFlow/data/rabd/pdb"  # üîÅ Change this to your PDB input folder
output_base_dir = "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/output/abx_hcdr3_design"
model_ckpt = "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/trained_model/abx_diffab.ckpt"
model_features = "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/config/config_data_feature.json"
model_config = "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/config/config_model.json"

# Make sure output directory exists
os.makedirs(output_base_dir, exist_ok=True)

# === Function to run design for each PDB ===
def redesign_hcdr3(pdb_file, output_dir):
    print(f"üß¨ Designing HCDR3 for {os.path.basename(pdb_file)} ...")
    run([
        "python", "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/design.py",
        "--model", model_ckpt,
        "--model_features", model_features,
        "--model_config", model_config,
        "--batch_size", "1",
        "--num_samples", "1",  # Adjust for more candidates
        "--pdb_file", pdb_file,
        "--output_dir", output_dir,
        "--mode", "design"
    ])

# === Batch Process All PDBs ===
pdb_files = sorted(glob.glob(os.path.join(pdb_input_dir, "*.pdb")))

for pdb_path in pdb_files:
    pdb_name = os.path.basename(pdb_path).replace(".pdb", "")
    out_dir = os.path.join(output_base_dir, pdb_name)
    os.makedirs(out_dir, exist_ok=True)
    redesign_hcdr3(pdb_path, out_dir)

print("‚úÖ Finished redesigning all PDBs.")


In [None]:
import os
import shutil

# Paths
input_root = "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/output/abx_hcdr3_design"
output_dir = "/spinning1/sharedby/hz362/AbFlow/benchmarks/AbX/output/abx"

# Create output directory if not exists
os.makedirs(output_dir, exist_ok=True)

# Go through all entries in the input directory
for entry in os.listdir(input_root):
    subdir = os.path.join(input_root, entry, "design", "0000")
    if os.path.isdir(subdir):
        pdb_files = [f for f in os.listdir(subdir) if f.endswith(".pdb")]
        if pdb_files:
            pdb_path = os.path.join(subdir, pdb_files[0])
            output_path = os.path.join(output_dir, f"{entry}.pdb")
            shutil.copy(pdb_path, output_path)
            print(f"‚úÖ Saved: {output_path}")
        else:
            print(f"‚ö†Ô∏è No PDB found in: {subdir}")


#### dymean

In [None]:
import sys
import os
from pathlib import Path
from Bio.PDB import PDBParser, PPBuilder

# ‚úÖ Add dyMEAN to Python path
sys.path.append("/spinning1/sharedby/hz362/AbFlow/benchmarks/dyMEAN")

import sys
sys.path.append("/spinning1/sharedby/hz362/AbFlow/benchmarks")  # üëà not just dyMEAN

from api.design import design
from api.binding_interface import get_interface



In [None]:
import os
import json
from pathlib import Path
from Bio.PDB import PDBParser, PPBuilder
from anarci import number
from api.design import design
from api.binding_interface import get_interface


def extract_chains_from_filename(pdb_filename):
    """
    Extract heavy, light, and antigen chains from filenames like 4dtg_H_L_K.pdb
    """
    name_parts = pdb_filename.stem.split("_")
    if len(name_parts) < 4:
        raise ValueError(f"Unexpected PDB filename format: {pdb_filename}")
    return name_parts[1], name_parts[2], name_parts[3]  # H, L, Ag



def extract_sequences(pdb_path, heavy_chain, light_chain):
    """Extract heavy and light chain sequences from PDB"""
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("pdb", pdb_path)
    ppb = PPBuilder()

    heavy_seq = ""
    light_seq = ""

    for model in structure:
        for chain in model:
            seq = ""
            for pp in ppb.build_peptides(chain):
                seq += pp.get_sequence()
            if chain.id == heavy_chain:
                heavy_seq = seq
            elif chain.id == light_chain:
                light_seq = seq

    return str(heavy_seq), str(light_seq)


def get_imgt_regions(sequence):
    """Split sequence into IMGT regions and return them"""
    numbering, _ = number(sequence, scheme="imgt")
    non_gap_seq = [aa for (_, _), aa in numbering if aa != "-"]
    non_gap_numbers = [number for (number, _), aa in numbering if aa != "-"]

    ptr = [27, 39, 55, 66, 105, 118]
    indices = [i for i, n in enumerate(non_gap_numbers) if n in ptr]
    split_points = [0] + indices + [len(non_gap_seq)]
    regions = [non_gap_seq[split_points[i]:split_points[i + 1]] for i in range(len(split_points) - 1)]

    while len(regions) < 7:
        regions.append([])

    return ["".join(region) for region in regions]


def get_frameworks(heavy_seq, light_seq):
    h_regions = get_imgt_regions(heavy_seq)
    l_regions = get_imgt_regions(light_seq)

    masked_cdr3 = "-" * len(h_regions[5]) if h_regions[5] else ""
    heavy_masked = h_regions[0] + h_regions[1] + h_regions[2] + h_regions[3] + h_regions[4] + masked_cdr3 + h_regions[6]
    light_seq_full = "".join(l_regions) if l_regions else ""

    return [('H', heavy_masked), ('L', light_seq_full)]


def get_epitope(pdb_path, receptor_chains, ligand_chains, out_path, k=48):
    epitope, _ = get_interface(pdb=pdb_path, receptor_chains=receptor_chains,
                               ligand_chains=ligand_chains, num_epitope_residues=k)
    data = []
    for res, chain_name, _ in epitope:
        data.append((chain_name, res.get_id()))
    with open(out_path, 'w') as f:
        json.dump(data, f)


# === Config ===
pdb_dir = "/spinning1/sharedby/hz362/AbFlow/data/rabd/pdb"
output_dir = "./dymean_results"
ckpt_path = "/spinning1/sharedby/hz362/AbFlow/benchmarks/dyMEAN/checkpoints/cdrh3_design.ckpt"
gpu_id = 0

os.makedirs(output_dir, exist_ok=True)

# === Loop through PDBs and design HCDR3 ===
for pdb_file in Path(pdb_dir).glob("*.pdb"):
    pdb_id = pdb_file.stem
    print(f"Designing HCDR3 for: {pdb_id}")

    # try:
    heavy_chain, light_chain, antigen_chain = extract_chains_from_filename(pdb_file)

    epitope_path = os.path.join(output_dir, f"{pdb_id}_epitope.json")
    out_pdb_dir = os.path.join(output_dir, pdb_id)
    os.makedirs(out_pdb_dir, exist_ok=True)

    get_epitope(str(pdb_file),
                receptor_chains=[antigen_chain],
                ligand_chains=[heavy_chain],
                out_path=epitope_path)

    heavy_seq, light_seq = extract_sequences(str(pdb_file), heavy_chain, light_chain)
    frameworks = [get_frameworks(heavy_seq, light_seq)]

    design(
        ckpt=ckpt_path,
        gpu=gpu_id,
        pdbs=[str(pdb_file)],
        epitope_defs=[epitope_path],
        frameworks=frameworks,
        out_dir=out_pdb_dir,
        identifiers=[pdb_id],
        remove_chains=[[heavy_chain]],
        enable_openmm_relax=False,
        auto_detect_cdrs=False
    )

    # except Exception as e:
    #     print(f"‚ùå Failed to process {pdb_id}: {e}")
    #     continue


In [None]:
import os
from pathlib import Path
import shutil

# Source folder containing all the result subfolders
src_dir = Path("/spinning1/sharedby/hz362/AbFlow/scripts/analysis/dymean_results")
# Target folder to collect cleaned PDBs
target_dir = src_dir.parent / "dymean"
target_dir.mkdir(exist_ok=True)

for subdir in src_dir.iterdir():
    if subdir.is_dir():
        pdb_name = f"{subdir.name}.pdb"
        pdb_path = subdir / pdb_name

        if pdb_path.exists():
            dst_path = target_dir / pdb_name
            shutil.copy(pdb_path, dst_path)
            print(f"‚úÖ Copied {pdb_name} to {target_dir}")
        else:
            print(f"‚ùå Missing PDB: {pdb_path}")


#### RosettaAb

In [None]:
import os
import subprocess
from pathlib import Path

# === PATHS === 
rosetta_bin = "/spinning1/sharedby/hz362/AbFlow/benchmarks/rosetta/source/bin/antibody_designer.default.linuxgccrelease"
input_folder = "/spinning1/sharedby/hz362/AbFlow/data/rabd/pdb"
output_root = "/spinning1/sharedby/hz362/AbFlow/benchmarks/rosetta/output"
nstruct = 1

# === Ensure output folder exists ===
os.makedirs(output_root, exist_ok=True)

# === Batch process PDBs ===
for pdb_file in Path(input_folder).glob("*.pdb"):
    pdb_id = pdb_file.stem
    outdir = Path(output_root) / pdb_id
    outdir.mkdir(parents=True, exist_ok=True)

    command = [
        rosetta_bin,
        "-s", str(pdb_file),
        "-primary_cdrs", "H3",
        "-graft_design_cdrs", "H3",
        "-seq_design_cdrs", "H3",
        "-light_chain", "kappa",
        "-nstruct", str(nstruct),
        "-out:path:all", str(outdir),
        "-mintype", "min",
        "-mc_optimize_dG",
        "-disallow_aa", "PRO", "CYS"
    ]

    print(f"Running RAbD on {pdb_file.name}...")
    subprocess.run(command)

