In [None]:
# @title Install Dependencies and Boltz2 with CUDA support 
import sys
import subprocess
import threading
import time
import os
import shutil

# ANSI color codes for colored output 
class Color:
    CYAN = "\033[96m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    RESET = "\033[0m"

repo_dir = "boltz"

steps = [
    {
        "loader": f"{Color.CYAN}Cloning repository...{Color.RESET}",
        "done": f"{Color.GREEN}[✔] Repository cloned successfully.{Color.RESET}",
        "fail": f"{Color.RED}[✘] Repository clone failed.{Color.RESET}",
        "cmd": ["git", "clone", "https://github.com/jwohlwend/boltz.git"]
    },
    {
        "loader": f"{Color.YELLOW}Installing dependencies...{Color.RESET}",
        "done": f"{Color.GREEN}[✔] Dependencies installed successfully.{Color.RESET}",
        "fail": f"{Color.RED}[✘] Dependency installation failed.{Color.RESET}",
        "cmd": [sys.executable, "-m", "pip", "install", "-e", "boltz[cuda]", "biopython", "numpy", "matplotlib", "pyyaml", "py3Dmol", "--quiet"]
    },
    {
        "loader": f"{Color.CYAN}Validating installation...{Color.RESET}",
        "done": f"{Color.GREEN}[✔] Validation complete.{Color.RESET}",
        "fail": f"{Color.RED}[✘] Validation failed.{Color.RESET}",
        "cmd": [sys.executable, "-c", "import torch; print('Torch CUDA available:', torch.cuda.is_available()); print('CUDA device count:', torch.cuda.device_count())"]
    }
]

def loader(msg, stop_event):
    symbols = ["-", "\\", "|", "/"]
    i = 0
    while not stop_event.is_set():
        sys.stdout.write(f"\r[{symbols[i % len(symbols)]}] {msg}   ")
        sys.stdout.flush()
        time.sleep(0.1)
        i += 1
    sys.stdout.write("\r" + " " * (len(msg) + 10) + "\r")

# Step 1: Remove repo if it exists
if os.path.isdir(repo_dir):
    print(f"{Color.YELLOW}[i] Repository already exists. Removing '{repo_dir}'...{Color.RESET}")
    try:
        shutil.rmtree(repo_dir)
        print(f"{Color.GREEN}[✔] Existing repository removed.{Color.RESET}")
    except Exception as e:
        print(f"{Color.RED}[✘] Failed to remove existing repository: {e}{Color.RESET}")
        raise

all_success = True

# Main steps
for step in steps:
    stop_event = threading.Event()
    t = threading.Thread(target=loader, args=(step["loader"], stop_event))
    t.start()
    try:
        subprocess.run(step["cmd"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
        stop_event.set()
        t.join()
        print(step["done"])
    except Exception as e:
        stop_event.set()
        t.join()
        print(f"{step['fail']} {e}")
        all_success = False
        break

if all_success:
    print(f"{Color.GREEN}All steps completed successfully.{Color.RESET}")


In [None]:

# @title Download CCD Dataset and Test Boltz2 (silent, loader, colors)
import sys
import threading
import time
import os

# ANSI color codes for colored output
class Color:
    CYAN = "\033[96m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    RESET = "\033[0m"

def loader(msg, stop_event):
    symbols = ["-", "\\", "|", "/"]
    i = 0
    while not stop_event.is_set():
        sys.stdout.write(f"\r[{symbols[i % len(symbols)]}] {msg}   ")
        sys.stdout.flush()
        time.sleep(0.1)
        i += 1
    sys.stdout.write("\r" + " " * (len(msg) + 10) + "\r")
    sys.stdout.flush()

# Step 1: Create data directory
os.makedirs("/content/boltz_data", exist_ok=True)

# Step 2: Write YAML file
yaml_content = f"""\
version: 1
sequences:
- protein:
  id: [A, B]
  sequence: MVTPEGNVSLVDESLLVGVTDEDRAVRSAHQF
- ligand:
  id: [C, D]
  ccd: SAH   # fetch ligand from CCD
- ligand:
  id: [E, F]
  smiles: 'N[C@@H](Cc1ccc(O)cc1)C(=O)O'
"""
with open("/content/boltz_data/test.yaml", "w") as f:
    f.write(yaml_content)

# Step 3: Run boltz predict (silent)
step_msg = f"{Color.YELLOW}Downloading CCD Dataset...{Color.RESET}"
stop_event = threading.Event()
t = threading.Thread(target=loader, args=(step_msg, stop_event))
t.start()
try:
    import subprocess
    subprocess.run(
        ["boltz", "predict", "test.yaml", "--use_msa_server"],
        cwd="/content/boltz_data",
        stdout=subprocess.DEVNULL,
        stderr=subprocess.DEVNULL,
        check=True
    )
    stop_event.set()
    t.join()
    print(f"{Color.GREEN}[✔] CCD Dataset Downloaded and validated.{Color.RESET}")
except Exception as e:
    stop_event.set()
    t.join()
    print(f"{Color.RED}[✘] CCD Dataset Download or validation failed: {e}{Color.RESET}")


In [None]:

# @title Boltz2 Engine
import sys
import threading
import time
import os
import re
import shutil
import numpy as np
import matplotlib.pyplot as plt
from Bio.PDB import MMCIFParser, PDBIO
import py3Dmol
import subprocess

# ANSI color codes for colored output
class Color:
    CYAN = "\033[96m"
    GREEN = "\033[92m"
    YELLOW = "\033[93m"
    RED = "\033[91m"
    RESET = "\033[0m"

def loader(msg, stop_event):
    symbols = ["-", "\\", "|", "/"]
    i = 0
    while not stop_event.is_set():
        sys.stdout.write(f"\r[{symbols[i % len(symbols)]}] {msg}   ")
        sys.stdout.flush()
        time.sleep(0.1)
        i += 1
    sys.stdout.write("\r" + " " * (len(msg) + 10) + "\r")
    sys.stdout.flush()

# Set up parameters
os.chdir("/content/boltz_data/")
protein_seq = "MALWMRLLPLLALLALWGPDPAAAFVNQHLCGSHLVEALYLVCGERGFFYTPKTRREAEDLQVGQVELGGGPGAGSLQPLALEGSLQKRGIVEQCCTSICSLYQLENYCN"
output_dir = "Insulin"
recycling_steps = 3
diffusion_samples = 1
use_potentials = True
override = True

output_path = f"/content/boltz_data/{output_dir}"
if os.path.exists(output_path):
    shutil.rmtree(output_path)

protein_seq = protein_seq.upper()
protein_seq = re.sub(r"\s+", "", protein_seq.strip())

# Write YAML file
yaml_content = f"""\
version: 1
sequences:
  - protein:
      id: A
      sequence: {protein_seq}
"""
with open("protein.yaml", "w") as f:
    f.write(yaml_content)
print(f"{Color.GREEN}[✔] Saved as protein.yaml{Color.RESET}")

# Prepare command
cmd = [
    "boltz", "predict", "protein.yaml",
    "--use_msa_server",
    "--out_dir", output_dir,
    "--recycling_steps", str(recycling_steps),
    "--diffusion_samples", str(diffusion_samples),
]
if use_potentials: cmd.append("--use_potentials")
if override: cmd.append("--override")

# Run boltz predict with loader and silent output
step_msg = f"{Color.YELLOW}Running Boltz2 prediction...{Color.RESET}"
stop_event = threading.Event()
t = threading.Thread(target=loader, args=(step_msg, stop_event))
t.start()
try:
    subprocess.run(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
    stop_event.set()
    t.join()
    print(f"{Color.GREEN}[✔] Boltz2 run finished successfully!{Color.RESET}")
except Exception as e:
    stop_event.set()
    t.join()
    print(f"{Color.RED}[✘] Boltz2 run failed: {e}{Color.RESET}")

# Visualize results (unchanged)
def visualize_boltz_results(output_dir, model_id=0):
    """
    Visualize Boltz2 results:
      - 3D protein structure
      - pLDDT confidence plot
      - PAE heatmap
    """
    cif_file = f"/content/boltz_data/{output_dir}/boltz_results_protein/predictions/protein/protein_model_{model_id}.cif"
    pdb_file = f"/content/boltz_data/{output_dir}_model{model_id}.pdb"
    plddt_file = f"/content/boltz_data/{output_dir}/boltz_results_protein/predictions/protein/plddt_protein_model_{model_id}.npz"
    pae_file   = f"/content/boltz_data/{output_dir}/boltz_results_protein/predictions/protein/pae_protein_model_{model_id}.npz"

    # --- Convert CIF to PDB ---
    parser = MMCIFParser(QUIET=True)
    structure = parser.get_structure("protein", cif_file)
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_file)

    # --- Load PDB into py3Dmol viewer ---
    with open(pdb_file, "r") as f:
        pdb_data = f.read()

    # --- 3D Structure Viewer ---
    print("\n==============================")
    print("🔬 3D Protein Structure")
    print("Shows the predicted protein fold as a cartoon model.\nColors follow spectrum by residue index.")
    print("==============================\n")

    viewer = py3Dmol.view(width=600, height=500)
    viewer.addModel(pdb_data, "pdb")
    viewer.setStyle({"cartoon": {"color": "spectrum"}})
    viewer.zoomTo()
    viewer.show()

    # --- pLDDT Plot ---
    print("\n==============================")
    print("📈 Predicted Local Distance Difference Test (pLDDT)")
    print("Confidence score per residue: Higher = more reliable structure.")
    print("==============================\n")

    plddt = np.load(plddt_file)["plddt"]
    plt.figure(figsize=(10,4))
    plt.plot(plddt, label="pLDDT", color="blue")
    plt.xlabel("Residue index")
    plt.ylabel("pLDDT score")
    plt.title(f"Model {model_id} | Confidence per residue")
    plt.legend()
    plt.tight_layout(pad=3.0)
    plt.show()

    # --- PAE Heatmap ---
    print("\n==============================")
    print("🗺️ Predicted Aligned Error (PAE) Heatmap")
    print("Shows expected positional error between residue pairs.\nLower values = more reliable alignment.")
    print("==============================\n")

    pae = np.load(pae_file)["pae"]
    plt.figure(figsize=(6,5))
    plt.imshow(pae, cmap="viridis", origin="lower")
    plt.colorbar(label="Predicted Aligned Error (Å)")
    plt.title(f"Model {model_id} | PAE Heatmap")
    plt.xlabel("Residue index")
    plt.ylabel("Residue index")
    plt.tight_layout(pad=3.0)
    plt.show()

# Optionally call visualization if prediction succeeded
if os.path.exists(f"/content/boltz_data/{output_dir}/boltz_results_protein/predictions/protein/protein_model_0.cif"):
    visualize_boltz_results(output_dir=output_dir, model_id=0)


In [None]:
# @title Copy Results to Drive
import shutil, os
from google.colab import drive
from Bio.PDB import MMCIFParser, PDBIO

# Mount Google Drive
drive.mount('/content/drive')

# Paths
drive_output_dir = f"/content/drive/MyDrive/Boltz2_Results/{output_dir}"
local_output_path = f"/content/boltz_data/{output_dir}"

# Convert CIF to PDB
cif_file = f"{local_output_path}/boltz_results_protein/predictions/protein/protein_model_0.cif"
pdb_file = f"{local_output_path}/{output_dir}.pdb"

parser = MMCIFParser(QUIET=True)
structure = parser.get_structure("prot", cif_file)
io = PDBIO()
io.set_structure(structure)
io.save(pdb_file)
print(f"✅ PDB saved locally: {pdb_file}")

# Remove old folder in Drive if exists
if os.path.exists(drive_output_dir):
    print(f"⚠️ Removing existing folder {drive_output_dir}")
    shutil.rmtree(drive_output_dir)
    print("✅ Old Drive folder removed.")

# Copy local output folder to Drive
shutil.copytree(local_output_path, drive_output_dir)
print(f"✅ All results copied to Google Drive: {drive_output_dir}")

# Copy PDB file separately (optional, just in case)
drive_pdb_file = os.path.join(drive_output_dir, os.path.basename(pdb_file))
shutil.copy(pdb_file, drive_pdb_file)
print(f"✅ PDB copied to Google Drive: {drive_pdb_file}")


In [None]:
# @title Download Results (.zip)
from google.colab import files
from Bio.PDB import MMCIFParser, PDBIO
import shutil
import os

# Local output folder you want to download
local_output_path = f"/content/boltz_data/{output_dir}"

# Convert CIF to PDB
cif_file = f"{local_output_path}/boltz_results_protein/predictions/protein/protein_model_0.cif"
pdb_file = f"{local_output_path}/{output_dir}.pdb"

# Parse CIF and save as PDB
parser = MMCIFParser(QUIET=True)
structure = parser.get_structure("prot", cif_file)
io = PDBIO()
io.set_structure(structure)
io.save(pdb_file)

# Path for the zip file
zip_file = f"/content/{output_dir}.zip"

# Remove previous zip if exists
if os.path.exists(zip_file):
    os.remove(zip_file)

# Create zip of the entire folder
shutil.make_archive(base_name=f"/content/{output_dir}", format='zip', root_dir=local_output_path)

# Download the zip file
files.download(zip_file)

# Success message
print(f"✅ Download successful! All results from '{output_dir}' are saved in '{zip_file}'")
