In [None]:
import os
import time
import signal
import sys
import json
import numpy as np
import matplotlib.pyplot as plt
import ipywidgets as widgets
import py3Dmol
from google.colab import files
from IPython.display import display
from inference.utils import parse_pdb
from colabdesign.rf.utils import get_ca, fix_contigs, fix_partial_contigs, fix_pdb, sym_it
from colabdesign.shared.protein import pdb_to_string
from colabdesign.shared.plot import plot_pseudo_3D

# Ensure necessary directories and dependencies are set up
def setup_environment():
    if not os.path.exists("params"):
        os.system("apt-get install aria2")
        os.mkdir("params")
        # Download required parameter files in the background
        os.system("(\
        aria2c -q -x 16 https://files.ipd.uw.edu/krypton/schedules.zip; \
        aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/6f5902ac237024bdd0c176cb93063dc4/Base_ckpt.pt; \
        aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/e29311f6f1bf1af907f9ef9f44b8328b/Complex_base_ckpt.pt; \
        aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/f572d396fae9206628714fb2ce00f72e/Complex_beta_ckpt.pt; \
        aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar; \
        tar -xf alphafold_params_2022-12-06.tar -C params; \
        touch params/done.txt) &")

    if not os.path.exists("RFdiffusion"):
        print("Installing RFdiffusion...")
        os.system("git clone https://github.com/sokrypton/RFdiffusion.git")
        os.system("pip install jedi omegaconf hydra-core icecream pyrsistent pynvml decorator")
        os.system("pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger")
        os.system("pip install --no-dependencies dgl==2.0.0 -f https://data.dgl.ai/wheels/cu121/repo.html")
        os.system("pip install --no-dependencies e3nn==0.3.3 opt_einsum_fx")
        os.system("cd RFdiffusion/env/SE3Transformer; pip install .")
        os.system("wget -qnc https://files.ipd.uw.edu/krypton/ananas")
        os.system("chmod +x ananas")

    if not os.path.exists("colabdesign"):
        print("Installing ColabDesign...")
        os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git")
        os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")

    if not os.path.exists("RFdiffusion/models"):
        print("Downloading RFdiffusion models...")
        os.mkdir("RFdiffusion/models")
        models = ["Base_ckpt.pt", "Complex_base_ckpt.pt", "Complex_beta_ckpt.pt"]
        for model in models:
            while os.path.exists(f"{model}.aria2"):
                time.sleep(5)
        os.system(f"mv {' '.join(models)} RFdiffusion/models")
        os.system("unzip schedules.zip; rm schedules.zip")

    if "RFdiffusion" not in sys.path:
        os.environ["DGLBACKEND"] = "pytorch"
        sys.path.append("RFdiffusion")

# Fetch a PDB file either by upload or from a database
def get_pdb(pdb_code=None):
    if not pdb_code:
        upload_dict = files.upload()
        pdb_filename = list(upload_dict.keys())[0]
        with open("tmp.pdb", "wb") as f:
            f.write(upload_dict[pdb_filename])
        return "tmp.pdb"
    elif os.path.exists(pdb_code):
        return pdb_code
    elif len(pdb_code) == 4:
        if not os.path.exists(f"{pdb_code}.pdb1"):
            os.system(f"wget -qnc https://files.rcsb.org/download/{pdb_code}.pdb1.gz")
            os.system(f"gunzip {pdb_code}.pdb1.gz")
        return f"{pdb_code}.pdb1"
    else:
        os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb")
        return f"AF-{pdb_code}-F1-model_v3.pdb"

# Function to run an external command and track process
def run_command(command):
    pid_file = '/dev/shm/pid'
    os.system(f'nohup {command} & echo $! > {pid_file}')
    with open(pid_file, 'r') as f:
        pid = int(f.read().strip())
    os.remove(pid_file)
    return pid

def is_process_running(pid):
    try:
        os.kill(pid, 0)
    except OSError:
        return False
    return True

# Function to visualize the execution progress
def run_task(command, steps, visual="none"):
    progress = widgets.FloatProgress(min=0, max=1, description='Running', bar_style='info')
    output_widget = widgets.Output()
    display(widgets.VBox([progress, output_widget]))

    pid = run_command(command)
    fail = False

    for n in range(steps):
        while not os.path.exists(f"/dev/shm/{n}.pdb"):
            time.sleep(0.1)
            if not is_process_running(pid):
                fail = True
                break

        if fail:
            progress.bar_style = 'danger'
            progress.description = "Failed"
            break

        progress.value = (n + 1) / steps

        if visual != "none":
            with output_widget:
                output_widget.clear_output(wait=True)
                pdb_str = open(f"/dev/shm/{n}.pdb").read()
                if visual == "image":
                    xyz, bfact = get_ca(f"/dev/shm/{n}.pdb", get_bfact=True)
                    plt.figure(dpi=100, figsize=(6, 6))
                    plot_pseudo_3D(xyz, c=bfact, cmin=0.5, cmax=0.9)
                    plt.show()
                elif visual == "interactive":
                    viewer = py3Dmol.view()
                    viewer.addModel(pdb_str, "pdb")
                    viewer.setStyle({"cartoon": {"colorscheme": {"prop": "b", "gradient": "roygb", "min": 0.5, "max": 0.9}}})
                    viewer.zoomTo()
                    viewer.show()

        os.remove(f"/dev/shm/{n}.pdb")

    while is_process_running(pid):
        time.sleep(0.1)

setup_environment()


In [None]:
import os
import random
import string

# Unconditional diffusion generates a random protein backbone
# based on previous training data. This script runs RFDiffusion
# with specified parameters to create such a backbone.

def generate_unique_path(base_name):
    """
    Generates a unique filename for output PDBs to avoid overwriting.
    """
    unique_name = base_name
    while os.path.exists(f"outputs/{unique_name}_0.pdb"):
        unique_name = base_name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))
    return unique_name

# Define parameters for the diffusion process
config = {
    "name": "unconditional",  # Base name for the generated structure
    "contigs": "",  # Define residue count and chain structure
    "pdb": "",  # Input PDB file (if applicable)
    "iterations": 25,  # Number of denoising steps
    "hotspot": "",  # Hotspot regions (optional)
    "num_designs": 1,  # Number of backbone designs to generate
    "visual": "interactive",  # Visualization mode
    "symmetry": "none",  # Symmetry constraints
    "order": 1,  # Order of sequence prediction
    "chains": "",  # Specific chain information
    "add_potential": True,  # Whether to add potential-based constraints
    "partial_T": "auto",  # Annealing schedule
    "use_beta_model": False  # Whether to use a beta model variant
}

# Generate a unique output path
config["path"] = generate_unique_path(config["name"])

# Clean up string values in the configuration dictionary
for key, value in config.items():
    if isinstance(value, str):
        config[key] = value.replace("'", "").replace('"', "")

# Execute RFDiffusion with the defined parameters
print("Starting RFDiffusion process...")
contigs, copies = run_diffusion(**config)


In [None]:
# Run ProteinMPNN and AlphaFold

# Parameters for ProteinMPNN - used to generate multiple sequences for a given protein backbone
num_sequences = 8  # Number of sequences to generate
sampling_temperature = 0.1  # Controls sequence diversity; higher values increase variation
remove_amino_acid = "C"  # Specify amino acids to exclude (e.g., cysteine)
use_soluble_mpnn = False  # Use a model optimized for solubility

# Parameters for AlphaFold - used to validate generated sequences
initial_structure_guess = False  # If True, uses an initial guess for folding
num_recycles = 2  # Number of recycles in AlphaFold structure refinement
use_multimer_model = False  # Whether to use AlphaFold-Multimer for complex structures

# Ensure AlphaFold parameters are available before proceeding
if not os.path.isfile("params/done.txt"):
    print("Downloading AlphaFold parameters...")
    while not os.path.isfile("params/done.txt"):
        time.sleep(5)  # Wait until the download completes

# Construct command-line options for running ProteinMPNN and AlphaFold
contigs_str = ":".join(contigs)  # Format contigs as a string with colon separators
options = [
    f"--pdb=outputs/{path}_0.pdb",  # Input PDB file for ProteinMPNN
    f"--loc=outputs/{path}",  # Output directory
    f"--contig={contigs_str}",  # Define protein backbone contigs
    f"--copies={copies}",  # Number of copies (if applicable)
    f"--num_seqs={num_sequences}",  # Number of sequences to generate
    f"--num_recycles={num_recycles}",  # AlphaFold refinement steps
    f"--rm_aa={remove_amino_acid}",  # Excluded amino acids
    f"--mpnn_sampling_temp={sampling_temperature}",  # Diversity control
    f"--num_designs={num_designs}"  # Number of final designs
]

# Add optional flags based on user settings
if initial_structure_guess:
    options.append("--initial_guess")
if use_multimer_model:
    options.append("--use_multimer")
if use_soluble_mpnn:
    options.append("--use_soluble")

# Convert list of options into a single command string
command_options = " ".join(options)

# Run ProteinMPNN and AlphaFold
os.system(f"python colabdesign/rf/designability_test.py {command_options}")

# Display AlphaFold results
print("DISPLAYING ALPHAFOLD RESULTS")
if num_designs > 1:
    # Create an interactive dropdown menu to browse multiple designs
    def update_display(change):
        if change['name'] == 'value':
            with output:
                output.clear_output(wait=True)
                plot_pdb_final(change['new'])

    dropdown = widgets.Dropdown(
        options=["best"] + [str(k) for k in range(num_designs)],
        value="best",
        description='Design:',
    )
    dropdown.observe(update_display)
    output = widgets.Output()
    display(widgets.VBox([dropdown, output]))

    with output:
        plot_pdb_final(dropdown.value)
else:
    plot_pdb_final()  # Display the single generated structure


In [None]:
# Visualize the PDB structure for ABLE (PDB ID: 6w70)
viewer = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
pdb_file_path = get_pdb("6w70")  # Fetch the PDB file
pdb_content = open(pdb_file_path, 'r').read()
viewer.addModel(pdb_content, 'pdb', {'hbondCutoff': 4.0})
viewer.setStyle({"model": 0}, {'cartoon': {'colorscheme': {'prop': 'b', 'gradient': 'roygb', 'min': 0, 'max': 100}}})
viewer.zoomTo()  # Focus on the structure
viewer.show()

%%time
# Define and execute the motif scaffolding pipeline
# This will use RFDiffusion to link two helices of ABLE while keeping others intact.
project_name = "scaffold_design"
# Specify the contigs: residues from Chain A to be linked with a gap and others left as-is
contigs_definition = "A1-29/5-5/A98-126 : A30-97"  #@param {type:"string"}
input_pdb = "6W70"  #@param {type:"string"}
num_iterations = 25  #@param ["25", "50", "100", "150", "200"] {type:"raw"}
hotspot_residues = ""
design_count = 1
output_visualization = "interactive"
symmetry_mode = "none"
chain_order = 1
chain_ids = ""
apply_potential = True
temperature_schedule = "auto"
use_beta_version = False

# Generate a unique output path for results
output_path = project_name
while os.path.exists(f"outputs/{output_path}_0.pdb"):
    output_path = project_name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

# Define parameters for RFDiffusion
rf_flags = {
    "contigs": contigs_definition,
    "pdb": input_pdb,
    "order": chain_order,
    "iterations": num_iterations,
    "symmetry": symmetry_mode,
    "hotspot": hotspot_residues,
    "path": output_path,
    "chains": chain_ids,
    "add_potential": apply_potential,
    "num_designs": design_count,
    "use_beta_model": use_beta_version,
    "visual": output_visualization,
    "partial_T": temperature_schedule
}

# Clean up string parameters for safety
for key, value in rf_flags.items():
    if isinstance(value, str):
        rf_flags[key] = value.replace("'", "").replace('"', "")

print("Executing RFDiffusion...")
contigs_result, copies_result = run_diffusion(**rf_flags)

# Run ProteinMPNN to generate sequences for the designed structure
sequence_count = 1  #@param ["1", "2", "4", "8", "16", "32", "64"] {type:"raw"}
sampling_temperature = 0.1  #@param ["0.0001", "0.1", "0.15", "0.2", "0.25", "0.3", "0.5", "1.0"] {type:"raw"}
excluded_amino_acids = "C"
use_soluble_variant = False

# Validate designs using AlphaFold
recycle_steps = 2  #@param ["0", "1", "2", "3", "6", "12"] {type:"raw"}
enable_multimer_mode = False

# Ensure AlphaFold parameters are downloaded before running
if not os.path.isfile("params/done.txt"):
    print("Downloading AlphaFold parameters...")
    while not os.path.isfile("params/done.txt"):
        time.sleep(5)

# Prepare options for designability testing script
contigs_string_format = ":".join(contigs_definition)
design_opts = [
    f"--pdb=outputs/{output_path}_0.pdb",
    f"--loc=outputs/{output_path}",
    f"--contig={contigs_string_format}",
    f"--copies={copies_result}",
    f"--num_seqs={sequence_count}",
    f"--num_recycles={recycle_steps}",
    f"--rm_aa={excluded_amino_acids}",
    f"--mpnn_sampling_temp={sampling_temperature}",
    f"--num_designs={design_count}"
]

if enable_multimer_mode:
    design_opts.append("--use_multimer")
if use_soluble_variant:
    design_opts.append("--use_soluble")

design_opts_str = ' '.join(design_opts)
!python colabdesign/rf/designability_test.py {design_opts_str}

# Display results from AlphaFold validation
print("Displaying AlphaFold Results...")
if design_count > 1:
    def update_display(change):
        if change['name'] == 'value':
            with output_widget:
                output_widget.clear_output(wait=True)
                plot_pdb_final(change['new'])

    dropdown_menu = widgets.Dropdown(
        options=["best"] + [str(i) for i in range(design_count)],
        value="best",
        description='Design:',
    )

    dropdown_menu.observe(update_display)
    output_widget = widgets.Output()

    display(widgets.VBox([dropdown_menu, output_widget]))

    with output_widget:
        plot_pdb_final(dropdown_menu.value)
else:
    plot_pdb_final()


In [None]:
# **Binder Design Pipeline**
# This pipeline generates a new binder to stabilize the binding state of ABLE-D.
# The binder is designed to interact with specific hotspots on ABLE-D.

%%time
# Define parameters for RFdiffusion to create a binder for ABLE-D
project_name = "binder_design"
# Specify contigs: regions of ABLE-D and the binder's length
contigs_config = "A1-63:B1-68:100-100"  #@param {type:"string"}
use_prebuilt_able_d = False  #@param {type:"boolean"}

# Use pre-generated ABLE-D structure if specified, otherwise provide custom PDB
if use_prebuilt_able_d:
    input_pdb = "/content/outputs/motif/best.pdb"
else:
    input_pdb = ""  #@param {type:"string"}

# Set diffusion parameters
diffusion_iterations = 25  #@param ["25", "50", "100", "150", "200"] {type:"raw"}
binding_hotspots = "B58, A41, B56, A44, B5, A22, A26, B8"  #@param {type:"string"}
design_count = 1
visualization_mode = "interactive"
symmetry_type = "none"
chain_ordering = 1
chain_ids_to_use = ""
enable_potential_addition = True
temperature_schedule = "auto"
use_beta_version_model = False

# Generate a unique output path for saving results
output_directory = project_name
while os.path.exists(f"outputs/{output_directory}_0.pdb"):
    output_directory = project_name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

# Configure flags for RFDiffusion
rf_diffusion_flags = {
    "contigs": contigs_config,
    "pdb": input_pdb,
    "order": chain_ordering,
    "iterations": diffusion_iterations,
    "symmetry": symmetry_type,
    "hotspot": binding_hotspots,
    "path": output_directory,
    "chains": chain_ids_to_use,
    "add_potential": enable_potential_addition,
    "num_designs": design_count,
    "use_beta_model": use_beta_version_model,
    "visual": visualization_mode,
    "partial_T": temperature_schedule
}

# Clean up string parameters for safety
for key, value in rf_diffusion_flags.items():
    if isinstance(value, str):
        rf_diffusion_flags[key] = value.replace("'", "").replace('"', "")

print("Starting RFdiffusion to design binder...")
contigs_result, copies_result = run_diffusion(**rf_diffusion_flags)

# Generate sequences for the designed binder using ProteinMPNN
sequence_count = 1  #@param ["1", "2", "4", "8", "16", "32", "64"] {type:"raw"}
sampling_temperature_mpnn = 0.1  #@param ["0.0001", "0.1", "0.15", "0.2", "0.25", "0.3", "0.5", "1.0"] {type:"raw"}
excluded_residues = "C"
use_soluble_variant_mpnn = False

# Validate the designs using AlphaFold
recycle_steps_afold = 2  #@param ["0", "1", "2", "3", "6", "12"] {type:"raw"}
enable_multimer_mode_afold = False

# Ensure AlphaFold parameters are downloaded before starting validation
if not os.path.isfile("params/done.txt"):
    print("Downloading AlphaFold parameters...")
    while not os.path.isfile("params/done.txt"):
        time.sleep(5)

# Prepare command-line options for designability testing script
contigs_as_string = ":".join(contigs_config)
designability_options = [
    f"--pdb=outputs/{output_directory}_0.pdb",
    f"--loc=outputs/{output_directory}",
    f"--contig={contigs_as_string}",
    f"--copies={copies_result}",
    f"--num_seqs={sequence_count}",
    f"--num_recycles={recycle_steps_afold}",
    f"--rm_aa={excluded_residues}",
    f"--mpnn_sampling_temp={sampling_temperature_mpnn}",
    f"--num_designs={design_count}"
]

if enable_multimer_mode_afold:
    designability_options.append("--use_multimer")
if use_soluble_variant_mpnn:
    designability_options.append("--use_soluble")

designability_command = ' '.join(designability_options)
!python colabdesign/rf/designability_test.py {designability_command}

# Display AlphaFold results for validation and analysis
print("Displaying AlphaFold Results...")
if design_count > 1:
    def update_output(change):
        if change['name'] == 'value':
            with display_output:
                display_output.clear_output(wait=True)
                plot_pdb_final(change['new'])

    dropdown_selector = widgets.Dropdown(
        options=["best"] + [str(i) for i in range(design_count)],
        value="best",
        description='Design:',
    )

    dropdown_selector.observe(update_output)
    display_output = widgets.Output()

    display(widgets.VBox([dropdown_selector, display_output]))

    with display_output:
        plot_pdb_final(dropdown_selector.value)
else:
    plot_pdb_final()
