# Test: Fold Antibody and Antigen Separately

Test run for P10A1_1JPL_WT to verify folding works correctly.

**Input files (already generated):**
- `P10A1_HL.json` - Antibody H+L complex
- `1JPL_WT_antigen.json` - Antigen alone

**Requirements:**
- Run on GPU node: `srun --partition=dhvi-gpu --gres=gpu:1 --mem=10G -c 6 --pty bash`

In [1]:
import os
import subprocess
import json
import time
import shutil
from pathlib import Path
from datetime import datetime, timedelta

# =============================================================================
# Configuration
# =============================================================================

user = os.environ.get("USER", "unknown")

# Runtime directories
os.environ.setdefault("APPTAINER_TMPDIR", f"/scratch/{user}/tmp")
os.environ.setdefault("APPTAINER_CACHEDIR", f"/cwork/{user}/cache")

TMPDIR = Path(f"/scratch/{user}/af3_tmp")
TMPDIR.mkdir(parents=True, exist_ok=True)
os.environ["TMPDIR"] = str(TMPDIR)
os.environ["TEMP"] = str(TMPDIR)
os.environ["TMP"] = str(TMPDIR)

Path(os.environ["APPTAINER_TMPDIR"]).mkdir(parents=True, exist_ok=True)
Path(os.environ["APPTAINER_CACHEDIR"]).mkdir(parents=True, exist_ok=True)

# AF3 paths
AF3_HOME = Path("/opt/apps/community/alphafold3")
DATABASES_DIR = AF3_HOME / "AF3_databases"
MODEL_PARAMETERS_DIR = AF3_HOME / "AF3_models"
AF3_PROGRAMS_DIR = AF3_HOME / "AF3_Programs" / "alphafold3"

# Find SIF image
SIF_CANDIDATES = [
    AF3_HOME / "AF_v3.0.1.sif",
    AF3_HOME / "AF3_v3.0.1.sif",
    AF3_HOME / "AF3_111125.sif",
]
SIF_IMAGE = next((c for c in SIF_CANDIDATES if c.exists()), None)

# Project paths
PROJECT_ROOT = Path(f"/cwork/{user}/ab_seq_bind_analysis")
AF3_INPUT_DIR = PROJECT_ROOT / "data" / "processed" / "af3_inputs_test"
AF3_OUTPUT_DIR = PROJECT_ROOT / "data" / "processed" / "af3_outputs_test"

AF3_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print("Configuration")
print("=" * 60)
print(f"SIF_IMAGE:      {SIF_IMAGE}")
print(f"AF3_INPUT_DIR:  {AF3_INPUT_DIR}")
print(f"AF3_OUTPUT_DIR: {AF3_OUTPUT_DIR}")

Configuration
SIF_IMAGE:      /opt/apps/community/alphafold3/AF3_v3.0.1.sif
AF3_INPUT_DIR:  /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_inputs_test
AF3_OUTPUT_DIR: /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_outputs_test


In [2]:
# List available test inputs
print("Available test inputs:")
print("=" * 60)

input_files = sorted(AF3_INPUT_DIR.glob("*.json"))
for i, f in enumerate(input_files):
    with open(f) as fh:
        data = json.load(fh)
    chains = [s['protein']['id'] for s in data['sequences']]
    print(f"  {i}. {f.name} - Chains: {chains}")

Available test inputs:
  0. 1JPL_WT_antigen.json - Chains: ['A']
  1. P10A1_HL.json - Chains: ['H', 'L']


## Run AF3 Prediction

Select which file to fold by setting `INPUT_INDEX` below.

In [3]:
# =============================================================================
# SELECT INPUT (0 = P10A1_HL, 1 = 1JPL_WT_antigen)
# =============================================================================
INPUT_INDEX = 0  # <-- CHANGE THIS (0 or 1)
# =============================================================================

INPUT_JSON = input_files[INPUT_INDEX]
RUN_NAME = INPUT_JSON.stem

with open(INPUT_JSON) as f:
    input_data = json.load(f)

print(f"Selected: {INPUT_JSON.name}")
print(f"Run name: {RUN_NAME}")
print(f"\nChains:")
for seq in input_data['sequences']:
    chain_id = seq['protein']['id']
    chain_len = len(seq['protein']['sequence'])
    print(f"  {chain_id}: {chain_len} residues")

Selected: 1JPL_WT_antigen.json
Run name: 1JPL_WT_antigen

Chains:
  A: 166 residues


In [4]:
# Validate environment
print("Environment Check")
print("=" * 60)

checks = [
    ("SIF_IMAGE", SIF_IMAGE),
    ("DATABASES_DIR", DATABASES_DIR),
    ("MODEL_PARAMETERS_DIR", MODEL_PARAMETERS_DIR),
    ("INPUT_JSON", INPUT_JSON),
]

for label, path in checks:
    exists = path.exists() if path else False
    status = "[+] OK" if exists else "[X] MISSING"
    print(f"{status} {label}")

# GPU check
result = subprocess.run(["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], 
                       capture_output=True, text=True)
if result.returncode == 0:
    print(f"[+] GPU: {result.stdout.strip()}")
else:
    print("[X] No GPU - run on GPU node first!")

Environment Check
[+] OK SIF_IMAGE
[+] OK DATABASES_DIR
[+] OK MODEL_PARAMETERS_DIR
[+] OK INPUT_JSON
[+] GPU: NVIDIA RTX 5000 Ada Generation


In [5]:
# =============================================================================
# Run AF3 Prediction
# =============================================================================

# Clean temp files
for old_tmp in TMPDIR.glob("tmp*"):
    try:
        if old_tmp.is_dir():
            shutil.rmtree(old_tmp, ignore_errors=True)
        else:
            old_tmp.unlink(missing_ok=True)
    except:
        pass

# Build command
cmd = [
    "singularity", "exec", "--nv",
    "--bind", f"{AF3_INPUT_DIR}:/root/af_input",
    "--bind", f"{AF3_OUTPUT_DIR}:/root/af_output",
    "--bind", f"{MODEL_PARAMETERS_DIR}:/root/models",
    "--bind", f"{DATABASES_DIR}:/root/public_databases",
    "--bind", f"{AF3_PROGRAMS_DIR}:/root/AF3",
    "--bind", f"{TMPDIR}:/tmp",
    str(SIF_IMAGE),
    "python", "/root/AF3/run_alphafold.py",
    f"--json_path=/root/af_input/{INPUT_JSON.name}",
    "--model_dir=/root/models",
    "--db_dir=/root/public_databases",
    "--output_dir=/root/af_output",
]

print(f"Running AF3 for: {RUN_NAME}")
print("=" * 60)
print(f"Started: {datetime.now().isoformat()}")
print("\nThis may take 20-60 minutes...\n")

start_time = time.time()

proc = subprocess.Popen(
    cmd,
    stdout=subprocess.PIPE,
    stderr=subprocess.STDOUT,
    text=True,
    bufsize=1,
    env={**os.environ, "TMPDIR": "/tmp", "TEMP": "/tmp", "TMP": "/tmp"}
)

for line in proc.stdout:
    # Print key progress lines
    if any(x in line for x in ["Finished", "Running", "Error", "error", "Output", "fold job"]):
        elapsed = time.time() - start_time
        print(f"[{timedelta(seconds=int(elapsed))}] {line.rstrip()}")

rc = proc.wait()
elapsed = time.time() - start_time

print("\n" + "=" * 60)
print(f"Total time: {timedelta(seconds=int(elapsed))}")
if rc == 0:
    print("[+] SUCCESS!")
else:
    print(f"[X] FAILED (code {rc})")

Running AF3 for: 1JPL_WT_antigen
Started: 2026-02-03T17:48:41.705126

This may take 20-60 minutes...

[0:00:11] Running AlphaFold 3. Please note that standard AlphaFold 3 model parameters are
[0:00:23] Running fold job 1JPL_WT_antigen...
[0:00:23] Output will be written in /root/af_output/1jpl_wt_antigen
[0:00:23] Running data pipeline...
[0:00:23] Running data pipeline for chain A...
[0:03:50] I0203 17:52:32.373617 140021277378112 subprocess_utils.py:97] Finished Jackhmmer (bfd-first_non_consensus_sequences.fasta) in 207.583 seconds
[0:14:43] I0203 18:03:25.018351 140021294163520 subprocess_utils.py:97] Finished Jackhmmer (uniref90_2022_05.fa) in 860.230 seconds
[0:19:29] I0203 18:08:11.280521 140021268985408 subprocess_utils.py:97] Finished Jackhmmer (uniprot_all_2021_04.fa) in 1146.489 seconds
[0:24:37] I0203 18:13:19.529726 140021285770816 subprocess_utils.py:97] Finished Jackhmmer (mgy_clusters_2022_05.fa) in 1454.740 seconds
[0:24:38] I0203 18:13:20.077552 140029151466624 subproc

In [6]:
# Check outputs
print(f"Outputs for: {RUN_NAME}")
print("=" * 60)

# Find output directory (AF3 lowercases the name)
possible_dirs = [
    AF3_OUTPUT_DIR / RUN_NAME,
    AF3_OUTPUT_DIR / RUN_NAME.lower(),
]

output_subdir = None
for d in possible_dirs:
    if d.exists():
        output_subdir = d
        break

# Also check for timestamped dirs
if not output_subdir:
    for d in AF3_OUTPUT_DIR.glob(f"{RUN_NAME.lower()}*"):
        if d.is_dir():
            output_subdir = d
            break

if output_subdir and output_subdir.exists():
    print(f"Output directory: {output_subdir}")
    files = list(output_subdir.rglob("*"))
    print(f"\nFiles ({len(files)}):")
    for f in sorted(files)[:20]:
        if f.is_file():
            size = f.stat().st_size
            print(f"  {f.name} ({size:,} bytes)")
else:
    print("No output directory found yet.")
    print(f"\nAll dirs in {AF3_OUTPUT_DIR}:")
    for d in AF3_OUTPUT_DIR.iterdir():
        print(f"  {d.name}")

Outputs for: 1JPL_WT_antigen
Output directory: /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_outputs_test/1jpl_wt_antigen

Files (26):
  1jpl_wt_antigen_confidences.json (244,050 bytes)
  1jpl_wt_antigen_data.json (6,413,741 bytes)
  1jpl_wt_antigen_model.cif (120,485 bytes)
  1jpl_wt_antigen_ranking_scores.csv (147 bytes)
  1jpl_wt_antigen_summary_confidences.json (246 bytes)
  TERMS_OF_USE.md (13,036 bytes)
  1jpl_wt_antigen_seed-1_sample-0_confidences.json (244,050 bytes)
  1jpl_wt_antigen_seed-1_sample-0_model.cif (120,485 bytes)
  1jpl_wt_antigen_seed-1_sample-0_summary_confidences.json (246 bytes)
  1jpl_wt_antigen_seed-1_sample-1_confidences.json (244,041 bytes)
  1jpl_wt_antigen_seed-1_sample-1_model.cif (120,485 bytes)
  1jpl_wt_antigen_seed-1_sample-1_summary_confidences.json (246 bytes)
  1jpl_wt_antigen_seed-1_sample-2_confidences.json (244,169 bytes)
  1jpl_wt_antigen_seed-1_sample-2_model.cif (120,485 bytes)
  1jpl_wt_antigen_seed-1_sample-2_summary_confidences.jso

## Quick Reference

**To fold antibody (H+L):** Set `INPUT_INDEX = 0`

**To fold antigen (A):** Set `INPUT_INDEX = 1`

**Expected runtime:**
- Antibody (H+L, ~233 aa): ~30-45 min
- Antigen (A, ~166 aa): ~20-30 min