# Test AlphaFold 3 Setup

Quick verification that AF3 can run on the cluster.

**Requirements:**
- Run on a GPU node (e.g., `srun --partition=dhvi-gpu --gres=gpu:1 --pty bash`)
- Load CUDA before running: `module load CUDA/11.4`
- Singularity/Apptainer available
- AF3 databases and models accessible (paths below match `run_AF3_wk24_glycan.job`)

**What this tests:**
1. Environment validation (paths, image, databases)
2. Singularity can load the AF3 container
3. GPU is accessible inside container
4. (Optional) Run a minimal prediction

In [1]:
import os
import subprocess
import json
from pathlib import Path

# =============================================================================
# Configuration - matches run_AF3_wk24_glycan.job (WieheLab/MMH)
# =============================================================================

user = os.environ.get("USER", "unknown")

# Runtime / Apptainer (set these before running AF3; job script uses these)
os.environ.setdefault("APPTAINER_TMPDIR", f"/scratch/{user}/tmp")
os.environ.setdefault("APPTAINER_CACHEDIR", f"/cwork/{user}/cache")
# IMPORTANT: Set TMPDIR for Python's tempfile module (used by Jackhmmer)
# This prevents cleanup errors when container/host have permission conflicts
TMPDIR = Path(f"/scratch/{user}/af3_tmp")
TMPDIR.mkdir(parents=True, exist_ok=True)
os.environ["TMPDIR"] = str(TMPDIR)
os.environ["TEMP"] = str(TMPDIR)
os.environ["TMP"] = str(TMPDIR)

# Ensure scratch/cache dirs exist (job script does: mkdir -p ...)
Path(os.environ["APPTAINER_TMPDIR"]).mkdir(parents=True, exist_ok=True)
Path(os.environ["APPTAINER_CACHEDIR"]).mkdir(parents=True, exist_ok=True)

# AF3 paths (same as run_AF3_wk24_glycan.job)
AF3_HOME = Path("/opt/apps/community/alphafold3")
DATABASES_DIR = AF3_HOME / "AF3_databases"
MODEL_PARAMETERS_DIR = AF3_HOME / "AF3_models"
AF3_PROGRAMS_DIR = AF3_HOME / "AF3_Programs" / "alphafold3"

# SIF image: job uses AF_v3.0.1.sif
SIF_CANDIDATES = [
    AF3_HOME / "AF_v3.0.1.sif",
    AF3_HOME / "AF3_v3.0.1.sif",
    AF3_HOME / "AF3_111125.sif",
]
SIF_IMAGE = None
for candidate in SIF_CANDIDATES:
    if candidate.exists():
        SIF_IMAGE = candidate
        break

# Input/output base dirs (job convention: /cwork/$USER/AF3_Input, AF3_Output)
AF3_INPUT_BASE = Path(f"/cwork/{user}/AF3_Input")
AF3_OUTPUT_BASE = Path(f"/cwork/{user}/AF3_Output")
# Test run dirs for this repo
TEST_OUTPUT_DIR = Path(f"/cwork/{user}/ab_seq_bind_analysis/data/processed/af3_outputs/test_run")

# SLURM hints (from job: dhvi-gpu, 10G, 6 CPUs)
SLURM_PARTITION = "dhvi-gpu"
SLURM_GRES = "gpu:1"
SLURM_MEM = "10G"
SLURM_CPUS = 6

print("AF3 Configuration (run_AF3_wk24_glycan.job)")
print("=" * 60)
print(f"APPTAINER_TMPDIR:   {os.environ.get('APPTAINER_TMPDIR')}")
print(f"APPTAINER_CACHEDIR: {os.environ.get('APPTAINER_CACHEDIR')}")
print(f"TMPDIR:             {os.environ.get('TMPDIR')}")
print(f"AF3_HOME:           {AF3_HOME}")
print(f"SIF_IMAGE:          {SIF_IMAGE}")
print(f"DATABASES_DIR:      {DATABASES_DIR}")
print(f"MODEL_PARAMS_DIR:   {MODEL_PARAMETERS_DIR}")
print(f"AF3_PROGRAMS_DIR:   {AF3_PROGRAMS_DIR}")
print(f"AF3_INPUT_BASE:     {AF3_INPUT_BASE}")
print(f"AF3_OUTPUT_BASE:    {AF3_OUTPUT_BASE}")
print(f"TEST_OUTPUT_DIR:    {TEST_OUTPUT_DIR}")
print(f"SLURM:              -p {SLURM_PARTITION} --gres={SLURM_GRES} --mem={SLURM_MEM} -c {SLURM_CPUS}")

AF3 Configuration (run_AF3_wk24_glycan.job)
APPTAINER_TMPDIR:   /scratch/hsb26/tmp
APPTAINER_CACHEDIR: /cwork/hsb26/cache
TMPDIR:             /scratch/hsb26/af3_tmp
AF3_HOME:           /opt/apps/community/alphafold3
SIF_IMAGE:          /opt/apps/community/alphafold3/AF3_v3.0.1.sif
DATABASES_DIR:      /opt/apps/community/alphafold3/AF3_databases
MODEL_PARAMS_DIR:   /opt/apps/community/alphafold3/AF3_models
AF3_PROGRAMS_DIR:   /opt/apps/community/alphafold3/AF3_Programs/alphafold3
AF3_INPUT_BASE:     /cwork/hsb26/AF3_Input
AF3_OUTPUT_BASE:    /cwork/hsb26/AF3_Output
TEST_OUTPUT_DIR:    /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_outputs/test_run
SLURM:              -p dhvi-gpu --gres=gpu:1 --mem=10G -c 6


## 1. Validate Environment

In [2]:
def check_path(label, path, required=True):
    """Check if a path exists and print status."""
    exists = path.exists() if path else False
    status = "OK" if exists else "MISSING"
    symbol = "[+]" if exists else "[X]"
    print(f"{symbol} {label}: {path} - {status}")
    if required and not exists:
        raise FileNotFoundError(f"Required path missing: {label} = {path}")
    return exists

print("Environment Check")
print("=" * 60)

# Check critical paths
check_path("AF3_HOME", AF3_HOME)
check_path("SIF_IMAGE", SIF_IMAGE)
check_path("DATABASES_DIR", DATABASES_DIR)
check_path("MODEL_PARAMETERS_DIR", MODEL_PARAMETERS_DIR)
check_path("AF3_PROGRAMS_DIR", AF3_PROGRAMS_DIR)

# Check for singularity/apptainer
print("\nSingularity/Apptainer Check")
print("-" * 40)
result = subprocess.run(["which", "singularity"], capture_output=True, text=True)
if result.returncode == 0:
    print(f"[+] singularity found: {result.stdout.strip()}")
else:
    result = subprocess.run(["which", "apptainer"], capture_output=True, text=True)
    if result.returncode == 0:
        print(f"[+] apptainer found: {result.stdout.strip()}")
    else:
        print("[X] Neither singularity nor apptainer found!")

print("\n" + "=" * 60)
print("Environment validation complete!")

Environment Check
[+] AF3_HOME: /opt/apps/community/alphafold3 - OK
[+] SIF_IMAGE: /opt/apps/community/alphafold3/AF3_v3.0.1.sif - OK
[+] DATABASES_DIR: /opt/apps/community/alphafold3/AF3_databases - OK
[+] MODEL_PARAMETERS_DIR: /opt/apps/community/alphafold3/AF3_models - OK
[+] AF3_PROGRAMS_DIR: /opt/apps/community/alphafold3/AF3_Programs/alphafold3 - OK

Singularity/Apptainer Check
----------------------------------------
[+] singularity found: /usr/bin/singularity

Environment validation complete!


## 2. Test Container Loads

In [3]:
print("Testing container load...")
print("=" * 60)

# Simple test: run python --version inside container
cmd = [
    "singularity", "exec",
    str(SIF_IMAGE),
    "python", "--version"
]

print(f"Command: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)

if result.returncode == 0:
    print(f"[+] Container loaded successfully")
    print(f"    Python version: {result.stdout.strip()}")
else:
    print(f"[X] Container failed to load")
    print(f"    Error: {result.stderr}")

Testing container load...
Command: singularity exec /opt/apps/community/alphafold3/AF3_v3.0.1.sif python --version
[+] Container loaded successfully
    Python version: Python 3.11.11


## 3. Test GPU Access

In [4]:
print("Testing GPU access inside container...")
print("=" * 60)

# Test nvidia-smi inside container
cmd = [
    "singularity", "exec", "--nv",
    str(SIF_IMAGE),
    "nvidia-smi", "--query-gpu=name,memory.total,driver_version", "--format=csv"
]

print(f"Command: {' '.join(cmd[:5])} ... nvidia-smi")
result = subprocess.run(cmd, capture_output=True, text=True)

if result.returncode == 0:
    print(f"[+] GPU accessible inside container")
    print(f"\n{result.stdout}")
else:
    print(f"[X] GPU not accessible")
    print(f"    Error: {result.stderr}")
    print("\n    Make sure you're on a GPU node!")
    print(f"    Try: srun --partition={SLURM_PARTITION} --gres={SLURM_GRES} --pty bash")

Testing GPU access inside container...
Command: singularity exec --nv /opt/apps/community/alphafold3/AF3_v3.0.1.sif nvidia-smi ... nvidia-smi
[+] GPU accessible inside container

name, memory.total [MiB], driver_version
NVIDIA RTX 5000 Ada Generation, 32760 MiB, 570.86.10



In [5]:
print("Testing JAX GPU access (used by AF3)...")
print("=" * 60)

# Test JAX can see GPU
jax_test_script = '''
import jax
devices = jax.devices()
print(f"JAX devices: {devices}")
print(f"GPU available: {any('gpu' in str(d).lower() for d in devices)}")
'''

cmd = [
    "singularity", "exec", "--nv",
    str(SIF_IMAGE),
    "python", "-c", jax_test_script
]

result = subprocess.run(cmd, capture_output=True, text=True)

if result.returncode == 0:
    print(f"[+] JAX GPU test passed")
    print(result.stdout)
else:
    print(f"[X] JAX GPU test failed")
    print(f"stdout: {result.stdout}")
    print(f"stderr: {result.stderr}")

Testing JAX GPU access (used by AF3)...
[+] JAX GPU test passed
JAX devices: [CudaDevice(id=0)]
GPU available: False



## 4. Test AF3 Import

In [6]:
print("Testing AF3 module import...")
print("=" * 60)

af3_test_script = '''
import sys
sys.path.insert(0, "/root/AF3")
try:
    from alphafold3 import structure
    print("[+] alphafold3.structure imported successfully")
except ImportError as e:
    print(f"[X] Import failed: {e}")
'''

cmd = [
    "singularity", "exec", "--nv",
    "--bind", f"{AF3_PROGRAMS_DIR}:/root/AF3",
    str(SIF_IMAGE),
    "python", "-c", af3_test_script
]

result = subprocess.run(cmd, capture_output=True, text=True)
print(result.stdout)
if result.stderr:
    print(f"stderr: {result.stderr[:500]}")

Testing AF3 module import...
[+] alphafold3.structure imported successfully



## 5. (Optional) Run Minimal Prediction

This runs a very small test prediction to verify the full pipeline works.

**Warning:** Even a minimal prediction takes significant time and resources.

In [7]:
# Create a minimal test input JSON
# Using a small antibody fragment for quick testing

test_input = {
    "name": "test_minimal",
    "modelSeeds": [1],
    "dialect": "alphafold3",
    "version": 3,
    "sequences": [
        {
            "protein": {
                "id": "A",
                # Short test sequence (~50 residues)
                "sequence": "MKTAYIAKQRQISFVKSHFSRQLEERLGLIEVQAPILSRVGDGTQDNLSGAEKAVQV"
            }
        }
    ]
}

# Create test directories
TEST_INPUT_DIR = Path(f"/cwork/{user}/ab_seq_bind_analysis/data/processed/af3_inputs")
TEST_INPUT_DIR.mkdir(parents=True, exist_ok=True)
TEST_OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

test_json_path = TEST_INPUT_DIR / "P10A1_1JPL_WT.json"
with open(test_json_path, "w") as f:
    json.dump(test_input, f, indent=2)

print(f"Created test input: {test_json_path}")
print(f"Output will go to: {TEST_OUTPUT_DIR}")

Created test input: /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_inputs/P10A1_1JPL_WT.json
Output will go to: /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_outputs/test_run


In [8]:
# Set to True to actually run the prediction
RUN_PREDICTION = True

if not RUN_PREDICTION:
    print("Prediction disabled. Set RUN_PREDICTION = True to run.")
    print("\nThis will run a minimal AF3 prediction to verify the full pipeline.")
else:
    import sys
    import time
    from datetime import datetime, timedelta
    
    print("Running minimal AF3 prediction...")
    print("=" * 60)
    print("This may take 10-30 minutes even for a small protein.\n")
    
    # Clean any leftover temp files from previous failed runs
    import shutil
    for old_tmp in TMPDIR.glob("tmp*"):
        try:
            if old_tmp.is_dir():
                shutil.rmtree(old_tmp, ignore_errors=True)
            else:
                old_tmp.unlink(missing_ok=True)
        except:
            pass
    
    cmd = [
        "singularity", "exec", "--nv",
        "--bind", f"{TEST_INPUT_DIR}:/root/af_input",
        "--bind", f"{TEST_OUTPUT_DIR}:/root/af_output",
        "--bind", f"{MODEL_PARAMETERS_DIR}:/root/models",
        "--bind", f"{DATABASES_DIR}:/root/public_databases",
        "--bind", f"{AF3_PROGRAMS_DIR}:/root/AF3",
        # Bind temp directory to prevent cleanup errors
        "--bind", f"{TMPDIR}:/tmp",
        str(SIF_IMAGE),
        "python", "/root/AF3/run_alphafold.py",
        "--json_path=/root/af_input/P10A1_1JPL_WT.json",
        "--model_dir=/root/models",
        "--db_dir=/root/public_databases",
        "--output_dir=/root/af_output",
    ]
    
    print("Command:")
    print(" ".join(cmd[:10]) + " ...")
    print()
    
    # Progress tracking
    start_time = time.time()
    stage_markers = {
        "Running data pipeline": "MSA Search",
        "Jackhmmer": "Jackhmmer",
        "Finished Jackhmmer": "Jackhmmer done",
        "Running model inference": "Model Inference", 
        "Running structure module": "Structure Module",
        "Writing output": "Writing Output",
    }
    current_stage = "Initializing"
    jackhmmer_count = 0
    jackhmmer_total = 4  # uniref90, bfd, mgy, uniprot
    
    def format_elapsed(seconds):
        return str(timedelta(seconds=int(seconds)))
    
    def print_progress(stage, elapsed, extra=""):
        bar_width = 40
        # Simple spinner
        spinner = ["⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏"]
        spin_char = spinner[int(elapsed) % len(spinner)]
        msg = f"\r{spin_char} [{format_elapsed(elapsed)}] {stage}"
        if extra:
            msg += f" {extra}"
        print(msg + " " * 20, end="", flush=True)
    
    # Run with live output and progress
    proc = subprocess.Popen(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.STDOUT,
        text=True,
        bufsize=1,
        env={**os.environ, "TMPDIR": "/tmp", "TEMP": "/tmp", "TMP": "/tmp"}
    )
    
    print("Progress:")
    print("-" * 60)
    
    last_progress_time = time.time()
    for line in proc.stdout:
        elapsed = time.time() - start_time
        
        # Update stage based on output
        for marker, stage_name in stage_markers.items():
            if marker in line:
                if "Finished Jackhmmer" in line:
                    jackhmmer_count += 1
                    current_stage = f"Jackhmmer ({jackhmmer_count}/{jackhmmer_total})"
                else:
                    current_stage = stage_name
        
        # Print progress update every 2 seconds or on stage change
        if time.time() - last_progress_time >= 2:
            print_progress(current_stage, elapsed)
            last_progress_time = time.time()
        
        # Also print important log lines
        if any(x in line for x in ["[+]", "[X]", "Error", "error", "Finished", "Running fold job", "Output will be written"]):
            print()  # newline before log
            print(line.rstrip())
    
    rc = proc.wait()
    elapsed = time.time() - start_time
    
    print()  # Final newline
    print("\n" + "=" * 60)
    print(f"Total time: {format_elapsed(elapsed)}")
    if rc == 0:
        print("[+] AF3 test prediction completed successfully!")
    else:
        print(f"[X] AF3 test prediction failed with code {rc}")
        # Cleanup any remaining temp files
        print("\nCleaning up temp files...")
        for tmp_dir in TMPDIR.glob("tmp*"):
            try:
                shutil.rmtree(tmp_dir, ignore_errors=True)
            except:
                pass

Running minimal AF3 prediction...
This may take 10-30 minutes even for a small protein.

Command:
singularity exec --nv --bind /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_inputs:/root/af_input --bind /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_outputs/test_run:/root/af_output --bind /opt/apps/community/alphafold3/AF3_models:/root/models --bind ...

Progress:
------------------------------------------------------------
⠸ [0:00:23] Initializing                    
Running fold job test_minimal...

Output will be written in /root/af_output/test_minimal
⠹ [0:03:42] Jackhmmer (2/4)                    
I0202 23:05:48.705152 140504687703616 subprocess_utils.py:97] Finished Jackhmmer (bfd-first_non_consensus_sequences.fasta) in 199.191 seconds
⠼ [0:12:54] Jackhmmer (4/4)                    
I0202 23:15:00.737345 140538401515072 subprocess_utils.py:97] Finished Jackhmmer (uniref90_2022_05.fa) in 751.225 seconds
⠧ [0:18:07] Jackhmmer (6/4)                    
I0202 23:20:13.270

In [9]:
# Check what outputs were generated
print(f"Checking outputs in: {TEST_OUTPUT_DIR}")
print("=" * 60)

if TEST_OUTPUT_DIR.exists():
    files = list(TEST_OUTPUT_DIR.rglob("*"))
    if files:
        print(f"Found {len(files)} files/directories:\n")
        for f in sorted(files)[:20]:
            rel = f.relative_to(TEST_OUTPUT_DIR)
            if f.is_file():
                size = f.stat().st_size
                print(f"  {rel} ({size:,} bytes)")
            else:
                print(f"  {rel}/")
        if len(files) > 20:
            print(f"  ... and {len(files) - 20} more")
    else:
        print("No output files found (prediction may not have run yet).")
else:
    print("Output directory does not exist yet.")

Checking outputs in: /cwork/hsb26/ab_seq_bind_analysis/data/processed/af3_outputs/test_run
Found 29 files/directories:

  test_minimal/
  test_minimal/.ipynb_checkpoints/
  test_minimal/.ipynb_checkpoints/test_minimal_data-checkpoint.json (1,017,017 bytes)
  test_minimal/TERMS_OF_USE.md (13,036 bytes)
  test_minimal/seed-1_sample-0/
  test_minimal/seed-1_sample-0/test_minimal_seed-1_sample-0_confidences.json (32,050 bytes)
  test_minimal/seed-1_sample-0/test_minimal_seed-1_sample-0_model.cif (43,486 bytes)
  test_minimal/seed-1_sample-0/test_minimal_seed-1_sample-0_summary_confidences.json (246 bytes)
  test_minimal/seed-1_sample-1/
  test_minimal/seed-1_sample-1/test_minimal_seed-1_sample-1_confidences.json (32,043 bytes)
  test_minimal/seed-1_sample-1/test_minimal_seed-1_sample-1_model.cif (43,486 bytes)
  test_minimal/seed-1_sample-1/test_minimal_seed-1_sample-1_summary_confidences.json (246 bytes)
  test_minimal/seed-1_sample-2/
  test_minimal/seed-1_sample-2/test_minimal_seed-1_sa

## Summary

If all tests pass:
- Environment is correctly configured
- Container loads and GPU is accessible
- AF3 modules can be imported
- (Optional) Full prediction pipeline works

You're ready to run predictions using `03_run_af3_prediction.ipynb`!