# DiffSBDD Toy Generation Demo

This notebook walks through a single ligand generation using the DiffSBDD checkpoint included in the repository. It mirrors what `generate_ligands.py` does, but keeps the workflow reproducible inside a notebook.


> ⚠️ DiffSBDD uses PyTorch Lightning and OpenBabel and may take several minutes on CPU-only hardware. This example keeps `n_samples=1` to stay lightweight.


In [None]:
from __future__ import annotations

import os
from pathlib import Path

PROJECT_ROOT = Path.cwd().resolve()
if (PROJECT_ROOT / "scripts").is_dir():
    # Already at repo root.
    pass
else:
    # If executed from the notebook directory, climb up until we find `DiffSBDD`.
    for candidate in [PROJECT_ROOT, *PROJECT_ROOT.parents]:
        if (candidate / "DiffSBDD").exists():
            os.chdir(candidate)
            PROJECT_ROOT = candidate
            break
    else:
        raise RuntimeError("Could not locate repository root containing DiffSBDD directory.")

print(f"Project root: {PROJECT_ROOT}")


In [None]:
from datetime import datetime

DIFFSBDD_DIR = PROJECT_ROOT / "DiffSBDD"
checkpoint_path = DIFFSBDD_DIR / "checkpoints" / "crossdocked_fullatom_cond.ckpt"
pdb_path = DIFFSBDD_DIR / "example" / "3rfm.pdb"
ref_ligand_path = DIFFSBDD_DIR / "example" / "3rfm_B_CFF.sdf"
output_dir = PROJECT_ROOT / "scripts" / "eval" / "toy_generation"
output_dir.mkdir(parents=True, exist_ok=True)

print(f"Checkpoint: {checkpoint_path}")
print(f"Protein PDB: {pdb_path}")
print(f"Reference ligand: {ref_ligand_path}")
print(f"Output directory: {output_dir}")


In [None]:
import subprocess
import sys

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
outfile = output_dir / f"generated_{timestamp}.sdf"

cmd = [
    sys.executable,
    str(DIFFSBDD_DIR / "generate_ligands.py"),
    str(checkpoint_path),
    "--pdbfile",
    str(pdb_path),
    "--ref_ligand",
    str(ref_ligand_path),
    "--outfile",
    str(outfile),
    "--n_samples",
    "1",
    "--batch_size",
    "1",
    "--resamplings",
    "5",
    "--timesteps",
    "50",
    "--sanitize",
    "--relax",
]

print("Running:", " ".join(cmd))
result = subprocess.run(cmd, cwd=PROJECT_ROOT, text=True, capture_output=True)
print(result.stdout)
if result.returncode != 0:
    print(result.stderr)
    raise RuntimeError(f"DiffSBDD generation failed with exit code {result.returncode}")

print(f"Ligand written to {outfile}")


In [None]:
if outfile.exists():
    with outfile.open("r", encoding="utf-8") as handle:
        head = "".join(handle.readlines()[:20])
    print("Preview of generated SDF (first 20 lines):\n")
    print(head)
else:
    print(f"No SDF found at {outfile}")


In [None]:
from rdkit import Chem

def extract_smiles_from_sdf(path: Path) -> str:
    supplier = Chem.SDMolSupplier(str(path))
    mol = supplier[0]
    if mol is None:
        raise ValueError(f"Could not read molecule from {path}")
    return Chem.MolToSmiles(mol)

ligand_smiles = extract_smiles_from_sdf(outfile)
print(f"Ligand SMILES: {ligand_smiles}")


In [None]:
boltz_template = {
    "version": 1,
    "sequences": [
        {
            "protein": {
                "id": "A",
                "sequence": "MSEQNNTEMTFQIQRIYTKDISFEAPNAPHVFQKDWLDNEFGYSNWSKIDDEI" "DDN",
                "msa": "empty",
            }
        },
        {
            "ligand": {
                "id": "LIG",
                "smiles": ligand_smiles,
            }
        },
    ],
    "properties": [{"affinity": {"binder": "LIG"}}],
}

def write_boltz_yaml(path: Path) -> None:
    with path.open("w", encoding="utf-8") as handle:
        yaml.safe_dump(boltz_template, handle, sort_keys=False)

boltz_input_dir = output_dir / "boltz_inputs"
boltz_input_dir.mkdir(parents=True, exist_ok=True)
boltz_yaml_path = boltz_input_dir / f"ligand_{timestamp}.yaml"
write_boltz_yaml(boltz_yaml_path)

print(f"Boltz YAML written to {boltz_yaml_path}")


In [None]:
from scripts.eval.random import run_random_affinity_workflow

boltz_output_dir = output_dir / "boltz_outputs"
boltz_output_dir.mkdir(parents=True, exist_ok=True)

summary = run_random_affinity_workflow(
    chemical_space=boltz_yaml_path,
    sample_size=1,
    column=None,
    seed=None,
    template_path=None,
    output_dir=boltz_output_dir,
    binder_id="LIG",
    cache_dir=Path("~/.boltz").expanduser(),
    accelerator="cpu",
    sampling_steps=10,
    diffusion_samples=1,
    sampling_steps_affinity=20,
    diffusion_samples_affinity=1,
    keep_inputs=True,
)

print(json.dumps(summary, indent=2))


In [None]:
summary_path = boltz_output_dir / "summaries" / "affinity_summary.json"
if summary_path.exists():
    with summary_path.open("r", encoding="utf-8") as handle:
        summary_loaded = json.load(handle)
    print("Boltz affinity summary:\n")
    print(json.dumps(summary_loaded, indent=2))
else:
    print(f"No affinity summary produced; check Boltz logs under {boltz_output_dir}")
