In [1]:
#@title setup **RFdiffusion** (~3min)
%%time
import os, time, signal
import sys, random, string, re
if not os.path.isdir("params"):
  os.system("apt-get install aria2")
  os.system("mkdir params")
  # send param download into background
  os.system("(\
  aria2c -q -x 16 https://files.ipd.uw.edu/krypton/schedules.zip; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/6f5902ac237024bdd0c176cb93063dc4/Base_ckpt.pt; \
  aria2c -q -x 16 http://files.ipd.uw.edu/pub/RFdiffusion/e29311f6f1bf1af907f9ef9f44b8328b/Complex_base_ckpt.pt; \
  aria2c -q -x 16 https://storage.googleapis.com/alphafold/alphafold_params_2022-12-06.tar; \
  tar -xf alphafold_params_2022-12-06.tar -C params; \
  touch params/done.txt) &")

if not os.path.isdir("RFdiffusion"):
  print("installing RFdiffusion...")
  os.system("git clone https://github.com/sokrypton/RFdiffusion.git")
  os.system("pip install jedi omegaconf hydra-core icecream pyrsistent pynvml decorator")
  os.system("pip install git+https://github.com/NVIDIA/dllogger#egg=dllogger")
  # 17Mar2024: adding --no-dependencies to avoid installing nvidia-cuda-* dependencies
  os.system("pip install --no-dependencies dgl==2.0.0 -f https://data.dgl.ai/wheels/cu121/repo.html")
  os.system("pip install --no-dependencies e3nn==0.3.3 opt_einsum_fx")
  os.system("cd RFdiffusion/env/SE3Transformer; pip install .")
  os.system("wget -qnc https://files.ipd.uw.edu/krypton/ananas")
  os.system("chmod +x ananas")

if not os.path.isdir("colabdesign"):
  print("installing ColabDesign...")
  os.system("pip -q install git+https://github.com/sokrypton/ColabDesign.git@v1.1.1")
  os.system("ln -s /usr/local/lib/python3.*/dist-packages/colabdesign colabdesign")

if not os.path.isdir("RFdiffusion/models"):
  print("downloading RFdiffusion params...")
  os.system("mkdir RFdiffusion/models")
  models = ["Base_ckpt.pt","Complex_base_ckpt.pt"]
  for m in models:
    while os.path.isfile(f"{m}.aria2"):
      time.sleep(5)
  os.system(f"mv {' '.join(models)} RFdiffusion/models")
  os.system("unzip schedules.zip; rm schedules.zip")

if 'RFdiffusion' not in sys.path:
  os.environ["DGLBACKEND"] = "pytorch"
  sys.path.append('RFdiffusion')

from google.colab import files
import json
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML
import ipywidgets as widgets
import py3Dmol

from inference.utils import parse_pdb
from colabdesign.rf.utils import get_ca
from colabdesign.rf.utils import fix_contigs, fix_partial_contigs, fix_pdb, sym_it
from colabdesign.shared.protein import pdb_to_string
from colabdesign.shared.plot import plot_pseudo_3D

def get_pdb(pdb_code=None):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  elif os.path.isfile(pdb_code):
    return pdb_code
  elif len(pdb_code) == 4:
    if not os.path.isfile(f"{pdb_code}.pdb1"):
      os.system(f"wget -qnc https://files.rcsb.org/download/{pdb_code}.pdb1.gz")
      os.system(f"gunzip {pdb_code}.pdb1.gz")
    return f"{pdb_code}.pdb1"
  else:
    os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb")
    return f"AF-{pdb_code}-F1-model_v3.pdb"

def run_ananas(pdb_str, path, sym=None):
  pdb_filename = f"outputs/{path}/ananas_input.pdb"
  out_filename = f"outputs/{path}/ananas.json"
  with open(pdb_filename,"w") as handle:
    handle.write(pdb_str)

  cmd = f"./ananas {pdb_filename} -u -j {out_filename}"
  if sym is None: os.system(cmd)
  else: os.system(f"{cmd} {sym}")

  # parse results
  try:
    out = json.loads(open(out_filename,"r").read())
    results,AU = out[0], out[-1]["AU"]
    group = AU["group"]
    chains = AU["chain names"]
    rmsd = results["Average_RMSD"]
    print(f"AnAnaS detected {group} symmetry at RMSD:{rmsd:.3}")

    C = np.array(results['transforms'][0]['CENTER'])
    A = [np.array(t["AXIS"]) for t in results['transforms']]

    # apply symmetry and filter to the asymmetric unit
    new_lines = []
    for line in pdb_str.split("\n"):
      if line.startswith("ATOM"):
        chain = line[21:22]
        if chain in chains:
          x = np.array([float(line[i:(i+8)]) for i in [30,38,46]])
          if group[0] == "c":
            x = sym_it(x,C,A[0])
          if group[0] == "d":
            x = sym_it(x,C,A[1],A[0])
          coord_str = "".join(["{:8.3f}".format(a) for a in x])
          new_lines.append(line[:30]+coord_str+line[54:])
      else:
        new_lines.append(line)
    return results, "\n".join(new_lines)

  except:
    return None, pdb_str

def run(command, steps, num_designs=1, visual="none"):

  def run_command_and_get_pid(command):
    pid_file = '/dev/shm/pid'
    os.system(f'nohup {command} > /dev/null & echo $! > {pid_file}')
    with open(pid_file, 'r') as f:
      pid = int(f.read().strip())
    os.remove(pid_file)
    return pid
  def is_process_running(pid):
    try:
      os.kill(pid, 0)
    except OSError:
      return False
    else:
      return True

  run_output = widgets.Output()
  progress = widgets.FloatProgress(min=0, max=1, description='running', bar_style='info')
  display(widgets.VBox([progress, run_output]))

  # clear previous run
  for n in range(steps):
    if os.path.isfile(f"/dev/shm/{n}.pdb"):
      os.remove(f"/dev/shm/{n}.pdb")

  pid = run_command_and_get_pid(command)
  try:
    fail = False
    for _ in range(num_designs):

      # for each step check if output generated
      for n in range(steps):
        wait = True
        while wait and not fail:
          time.sleep(0.1)
          if os.path.isfile(f"/dev/shm/{n}.pdb"):
            pdb_str = open(f"/dev/shm/{n}.pdb").read()
            if pdb_str[-3:] == "TER":
              wait = False
            elif not is_process_running(pid):
              fail = True
          elif not is_process_running(pid):
            fail = True

        if fail:
          progress.bar_style = 'danger'
          progress.description = "failed"
          break

        else:
          progress.value = (n+1) / steps
          if visual != "none":
            with run_output:
              run_output.clear_output(wait=True)
              if visual == "image":
                xyz, bfact = get_ca(f"/dev/shm/{n}.pdb", get_bfact=True)
                fig = plt.figure()
                fig.set_dpi(100);fig.set_figwidth(6);fig.set_figheight(6)
                ax1 = fig.add_subplot(111);ax1.set_xticks([]);ax1.set_yticks([])
                plot_pseudo_3D(xyz, c=bfact, cmin=0.5, cmax=0.9, ax=ax1)
                plt.show()
              if visual == "interactive":
                view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
                view.addModel(pdb_str,'pdb')
                view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':0.5,'max':0.9}}})
                view.zoomTo()
                view.show()
        if os.path.exists(f"/dev/shm/{n}.pdb"):
          os.remove(f"/dev/shm/{n}.pdb")
      if fail:
        progress.bar_style = 'danger'
        progress.description = "failed"
        break

    while is_process_running(pid):
      time.sleep(0.1)

  except KeyboardInterrupt:
    os.kill(pid, signal.SIGTERM)
    progress.bar_style = 'danger'
    progress.description = "stopped"
def run_diffusion(contigs, path, pdb=None, iterations=50,
                  symmetry="none", order=1, hotspot=None,
                  chains=None, add_potential=False,
                  num_designs=1, visual="none"):

  full_path = f"outputs/{path}"
  os.makedirs(full_path, exist_ok=True)
  opts = [f"inference.output_prefix={full_path}",
          f"inference.num_designs={num_designs}"]

  if chains == "": chains = None

  # determine symmetry type
  if symmetry in ["auto","cyclic","dihedral"]:
    if symmetry == "auto":
      sym, copies = None, 1
    else:
      sym, copies = {"cyclic":(f"c{order}",order),
                     "dihedral":(f"d{order}",order*2)}[symmetry]
  else:
    symmetry = None
    sym, copies = None, 1

  # determine mode
  contigs = contigs.replace(","," ").replace(":"," ").split()
  is_fixed, is_free = False, False
  fixed_chains = []
  for contig in contigs:
    for x in contig.split("/"):
      a = x.split("-")[0]
      if a[0].isalpha():
        is_fixed = True
        if a[0] not in fixed_chains:
          fixed_chains.append(a[0])
      if a.isnumeric():
        is_free = True
  if len(contigs) == 0 or not is_free:
    mode = "partial"
  elif is_fixed:
    mode = "fixed"
  else:
    mode = "free"

  # fix input contigs
  if mode in ["partial","fixed"]:
    pdb_str = pdb_to_string(get_pdb(pdb), chains=chains)
    if symmetry == "auto":
      a, pdb_str = run_ananas(pdb_str, path)
      if a is None:
        print(f'ERROR: no symmetry detected')
        symmetry = None
        sym, copies = None, 1
      else:
        if a["group"][0] == "c":
          symmetry = "cyclic"
          sym, copies = a["group"], int(a["group"][1:])
        elif a["group"][0] == "d":
          symmetry = "dihedral"
          sym, copies = a["group"], 2 * int(a["group"][1:])
        else:
          print(f'ERROR: the detected symmetry ({a["group"]}) not currently supported')
          symmetry = None
          sym, copies = None, 1

    elif mode == "fixed":
      pdb_str = pdb_to_string(pdb_str, chains=fixed_chains)

    pdb_filename = f"{full_path}/input.pdb"
    with open(pdb_filename, "w") as handle:
      handle.write(pdb_str)

    parsed_pdb = parse_pdb(pdb_filename)
    opts.append(f"inference.input_pdb={pdb_filename}")
    if mode in ["partial"]:
      iterations = int(80 * (iterations / 200))
      opts.append(f"diffuser.partial_T={iterations}")
      contigs = fix_partial_contigs(contigs, parsed_pdb)
    else:
      opts.append(f"diffuser.T={iterations}")
      contigs = fix_contigs(contigs, parsed_pdb)
  else:
    opts.append(f"diffuser.T={iterations}")
    parsed_pdb = None
    contigs = fix_contigs(contigs, parsed_pdb)

  if hotspot is not None and hotspot != "":
    opts.append(f"ppi.hotspot_res=[{hotspot}]")

  # setup symmetry
  if sym is not None:
    sym_opts = ["--config-name symmetry", f"inference.symmetry={sym}"]
    if add_potential:
      sym_opts += ["'potentials.guiding_potentials=[\"type:olig_contacts,weight_intra:1,weight_inter:0.1\"]'",
                   "potentials.olig_intra_all=True","potentials.olig_inter_all=True",
                   "potentials.guide_scale=2","potentials.guide_decay=quadratic"]
    opts = sym_opts + opts
    contigs = sum([contigs] * copies,[])

  opts.append(f"'contigmap.contigs=[{' '.join(contigs)}]'")
  opts += ["inference.dump_pdb=True","inference.dump_pdb_path='/dev/shm'"]

  print("mode:", mode)
  print("output:", full_path)
  print("contigs:", contigs)

  opts_str = " ".join(opts)
  cmd = f"./RFdiffusion/run_inference.py {opts_str}"
  print(cmd)

  # RUN
  run(cmd, iterations, num_designs, visual=visual)

  # fix pdbs
  for n in range(num_designs):
    pdbs = [f"outputs/traj/{path}_{n}_pX0_traj.pdb",
            f"outputs/traj/{path}_{n}_Xt-1_traj.pdb",
            f"{full_path}_{n}.pdb"]
    for pdb in pdbs:
      with open(pdb,"r") as handle: pdb_str = handle.read()
      with open(pdb,"w") as handle: handle.write(fix_pdb(pdb_str, contigs))

  return contigs, copies

installing RFdiffusion...
installing ColabDesign...
downloading RFdiffusion params...
CPU times: user 5.44 s, sys: 998 ms, total: 6.44 s
Wall time: 3min 8s


In [5]:
from google.colab import drive
drive.mount('/content/drive')

import os
import shutil
import glob
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import uuid
from datetime import datetime
import re
import torch
from time import time

meta_data_filepath = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/generation_metadata_rfdiffusion_tev.csv"

if os.path.exists(meta_data_filepath):
  all_metadata_df = pd.read_csv(meta_data_filepath)
  print("Existing generation metadata read in.")
else:
  all_metadata_df = pd.DataFrame()
  #all_metadata_df.to_csv(meta_data_filepath, index=False)
  print("Created generation metadata dataframe")



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Existing generation metadata read in.


In [6]:
import torch
import time
for i in range(10):
  meta_data = {}
  meta_data["batch_id"] = str(uuid.uuid4())
  meta_data["batch_size"] = 10
  meta_data["Timestamp"] = str(datetime.now())
  meta_data['model'] = 'RFdiffusion_150it'
  meta_data['task'] = 'backbone_pdb_generation'
  #meta_data['conditions'] = 'IL10 (monomer) scaffolding [contigs = "24/A25-50/40/A91-125/59"]'
  #meta_data['conditions'] = 'IL10 (complex) scaffolding [contigs = "24/A25-50/40/A91-125/59 C1-184"]'
  #meta_data['conditions'] = 'tev (monomer) scaffolding [contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16"]'
  meta_data['conditions'] = 'tev (complex) scaffolding [contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B15-21"]'
  meta_data['wall_time_batch'] = None
  meta_data['gpu'] = 'T4 GPU'

  batch_length = 5

  #name = "IL10"
  #name = "IL10_w_receptor"
  #name = "tev"
  name = "tev_in_complex"
  #contigs = "24/A25-50/40/A91-125/59"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10_Mutant_model1.pdb"
  #contigs = "24/A25-50/40/A91-125/59/0 C1-184"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/monomer_B_complex.pdb"
  #contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_monomer.pdb"
  contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B15-21"
  pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_complex.pdb"
  iterations = 150
  hotspot = ""
  num_designs = batch_length
  visual = "none"
  symmetry = "none"
  order = 1
  chains = ""
  add_potential = False
  path = name
  while os.path.exists(f"outputs/{path}_0.pdb"):
    path = name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

  flags = {"contigs":contigs,
          "pdb":pdb,
          "order":order,
          "iterations":iterations,
          "symmetry":symmetry,
          "hotspot":hotspot,
          "path":path,
          "chains":chains,
          "add_potential":add_potential,
          "num_designs":num_designs,
          "visual":visual}

  for k,v in flags.items():
    if isinstance(v,str):
      flags[k] = v.replace("'","").replace('"','')

  start_time = time.time()
  contigs, copies = run_diffusion(**flags)
  end_time = time.time()
  total_job_time = end_time - start_time
  meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
  meta_data['wall_time_task'] = str(total_job_time/batch_length) + " Seconds (inferred)"

  for filename in os.listdir("./outputs"):
      if filename.endswith(".pdb") and name in filename:
        meta_data['entity_id'] = str(uuid.uuid4())
        #new_name = "rfdiffusion_tev_mono_" + meta_data['entity_id'] + ".pdb"
        new_name = "rfdiffusion_tev_comp_" + meta_data['entity_id'] + ".pdb"
        meta_data['output_file_name'] = new_name
        metadata_entry = pd.Series(meta_data)
        all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
        cleanup_command = f"""mv ./outputs/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/{new_name}"""
        !{cleanup_command}

  all_metadata_df.to_csv(meta_data_filepath, index=False)
  print("Metadata saved. Cleaning up....")
  !rm -rf ./outputs
  !mkdir outputs


mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....


In [7]:
import torch
import time
for i in range(10):
  meta_data = {}
  meta_data["batch_id"] = str(uuid.uuid4())
  meta_data["batch_size"] = 10
  meta_data["Timestamp"] = str(datetime.now())
  meta_data['model'] = 'RFdiffusion_150it'
  meta_data['task'] = 'backbone_pdb_generation'
  #meta_data['conditions'] = 'IL10 (monomer) scaffolding [contigs = "24/A25-50/40/A91-125/59"]'
  #meta_data['conditions'] = 'IL10 (complex) scaffolding [contigs = "24/A25-50/40/A91-125/59 C1-184"]'
  #meta_data['conditions'] = 'tev (monomer) scaffolding [contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16"]'
  meta_data['conditions'] = 'tev (complex) scaffolding [contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B15-21"]'
  meta_data['wall_time_batch'] = None
  meta_data['gpu'] = 'T4 GPU'

  batch_length = 5

  #name = "IL10"
  #name = "IL10_w_receptor"
  #name = "tev"
  name = "tev_in_complex"
  #contigs = "24/A25-50/40/A91-125/59"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10_Mutant_model1.pdb"
  #contigs = "24/A25-50/40/A91-125/59/0 C1-184"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/monomer_B_complex.pdb"
  #contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_monomer.pdb"
  contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B15-21"
  pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_complex.pdb"
  iterations = 150
  hotspot = ""
  num_designs = batch_length
  visual = "none"
  symmetry = "none"
  order = 1
  chains = ""
  add_potential = False
  path = name
  while os.path.exists(f"outputs/{path}_0.pdb"):
    path = name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

  flags = {"contigs":contigs,
          "pdb":pdb,
          "order":order,
          "iterations":iterations,
          "symmetry":symmetry,
          "hotspot":hotspot,
          "path":path,
          "chains":chains,
          "add_potential":add_potential,
          "num_designs":num_designs,
          "visual":visual}

  for k,v in flags.items():
    if isinstance(v,str):
      flags[k] = v.replace("'","").replace('"','')

  start_time = time.time()
  contigs, copies = run_diffusion(**flags)
  end_time = time.time()
  total_job_time = end_time - start_time
  meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
  meta_data['wall_time_task'] = str(total_job_time/batch_length) + " Seconds (inferred)"

  for filename in os.listdir("./outputs"):
      if filename.endswith(".pdb") and name in filename:
        meta_data['entity_id'] = str(uuid.uuid4())
        #new_name = "rfdiffusion_tev_mono_" + meta_data['entity_id'] + ".pdb"
        new_name = "rfdiffusion_tev_comp_" + meta_data['entity_id'] + ".pdb"
        meta_data['output_file_name'] = new_name
        metadata_entry = pd.Series(meta_data)
        all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
        cleanup_command = f"""mv ./outputs/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/{new_name}"""
        !{cleanup_command}

  all_metadata_df.to_csv(meta_data_filepath, index=False)
  print("Metadata saved. Cleaning up....")
  !rm -rf ./outputs
  !mkdir outputs


mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16', 'B15-21']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16 B15-21]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....


In [8]:
import torch
import time
for i in range(10):
  meta_data = {}
  meta_data["batch_id"] = str(uuid.uuid4())
  meta_data["batch_size"] = 10
  meta_data["Timestamp"] = str(datetime.now())
  meta_data['model'] = 'RFdiffusion_150it'
  meta_data['task'] = 'backbone_pdb_generation'
  #meta_data['conditions'] = 'IL10 (monomer) scaffolding [contigs = "24/A25-50/40/A91-125/59"]'
  #meta_data['conditions'] = 'IL10 (complex) scaffolding [contigs = "24/A25-50/40/A91-125/59 C1-184"]'
  meta_data['conditions'] = 'tev (monomer) scaffolding [contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16"]'
  #meta_data['conditions'] = 'tev (complex) scaffolding [contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B15-21"]'
  meta_data['wall_time_batch'] = None
  meta_data['gpu'] = 'T4 GPU'

  batch_length = 5

  #name = "IL10"
  #name = "IL10_w_receptor"
  #name = "tev"
  name = "tev_in_complex"
  #contigs = "24/A25-50/40/A91-125/59"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10_Mutant_model1.pdb"
  #contigs = "24/A25-50/40/A91-125/59/0 C1-184"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/monomer_B_complex.pdb"
  contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16"
  pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_monomer.pdb"
  #contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B15-21"
  #pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_complex.pdb"
  iterations = 150
  hotspot = ""
  num_designs = batch_length
  visual = "none"
  symmetry = "none"
  order = 1
  chains = ""
  add_potential = False
  path = name
  while os.path.exists(f"outputs/{path}_0.pdb"):
    path = name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

  flags = {"contigs":contigs,
          "pdb":pdb,
          "order":order,
          "iterations":iterations,
          "symmetry":symmetry,
          "hotspot":hotspot,
          "path":path,
          "chains":chains,
          "add_potential":add_potential,
          "num_designs":num_designs,
          "visual":visual}

  for k,v in flags.items():
    if isinstance(v,str):
      flags[k] = v.replace("'","").replace('"','')

  start_time = time.time()
  contigs, copies = run_diffusion(**flags)
  end_time = time.time()
  total_job_time = end_time - start_time
  meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
  meta_data['wall_time_task'] = str(total_job_time/batch_length) + " Seconds (inferred)"

  for filename in os.listdir("./outputs"):
      if filename.endswith(".pdb") and name in filename:
        meta_data['entity_id'] = str(uuid.uuid4())
        new_name = "rfdiffusion_tev_mono_" + meta_data['entity_id'] + ".pdb"
        #new_name = "rfdiffusion_tev_comp_" + meta_data['entity_id'] + ".pdb"
        meta_data['output_file_name'] = new_name
        metadata_entry = pd.Series(meta_data)
        all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
        cleanup_command = f"""mv ./outputs/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/{new_name}"""
        !{cleanup_command}

  all_metadata_df.to_csv(meta_data_filepath, index=False)
  print("Metadata saved. Cleaning up....")
  !rm -rf ./outputs
  !mkdir outputs


mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....
mode: fixed
output: outputs/tev_in_complex
contigs: ['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/tev_in_complex inference.num_designs=5 inference.input_pdb=outputs/tev_in_complex/input.pdb diffuser.T=150 'contigmap.contigs=[27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....


In [65]:
batch_length = 10


name = "tev_in_complex"
#contigs = "24/A25-50/40/A91-125/59/0 C1-184"
#pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/monomer_B_complex.pdb"
contigs = "27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0 B1-7"
pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_complex.pdb"
iterations = 150
hotspot = ""
num_designs = batch_length
visual = "none"
symmetry = "none"
order = 1
chains = ""
add_potential = False
path = name
while os.path.exists(f"outputs/{path}_0.pdb"):
  path = name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

flags = {"contigs":contigs,
        "pdb":pdb,
        "order":order,
        "iterations":iterations,
        "symmetry":symmetry,
        "hotspot":hotspot,
        "path":path,
        "chains":chains,
        "add_potential":add_potential,
        "num_designs":num_designs,
        "visual":visual}

for k,v in flags.items():
  if isinstance(v,str):
    flags[k] = v.replace("'","").replace('"','')


In [66]:
full_path = f"outputs/{path}"
os.makedirs(full_path, exist_ok=True)
opts = [f"inference.output_prefix={full_path}",
        f"inference.num_designs={num_designs}"]

In [67]:
chains = None
symmetry = None
sym, copies = None, 1
opts

['inference.output_prefix=outputs/tev_in_complex', 'inference.num_designs=10']

In [68]:
contigs = contigs.replace(","," ").replace(":"," ").split()

In [69]:
is_fixed, is_free = False, False
fixed_chains = []
for contig in contigs:
  print(contig)
  for x in contig.split("/"):
    a = x.split("-")[0]
    if a[0].isalpha():
      is_fixed = True
      if a[0] not in fixed_chains:
        fixed_chains.append(a[0])
    if a.isnumeric():
      is_free = True

27/A28-33/13/A47-51/88/A140-152/15/A168-179/32/A212-221/16/0
B1-7


In [70]:
mode

'fixed'

In [71]:
if len(contigs) == 0 or not is_free:
  mode = "partial"
elif is_fixed:
  mode = "fixed"
else:
  mode = "free"

In [None]:
def pdb_to_string(pdb_file, chains=None, models=None):
  '''read pdb file and return as string'''

  if chains is not None:
    if "," in chains: chains = chains.split(",")
    if not isinstance(chains,list): chains = [chains]
  if models is not None:
    if not isinstance(models,list): models = [models]

  modres = {**MODRES}
  lines = []
  seen = []
  model = 1

  if "\n" in pdb_file:
    old_lines = pdb_file.split("\n")
  else:
    with open(pdb_file,"rb") as f:
      old_lines = [line.decode("utf-8","ignore").rstrip() for line in f]
  for line in old_lines:
    if line[:5] == "MODEL":
      model = int(line[5:])
    if models is None or model in models:
      if line[:6] == "MODRES":
        k = line[12:15]
        v = line[24:27]
        if k not in modres and v in residue_constants.restype_3to1:
          modres[k] = v
      if line[:6] == "HETATM":
        k = line[17:20]
        if k in modres:
          line = "ATOM  "+line[6:17]+modres[k]+line[20:]
      if line[:4] == "ATOM":
        chain = line[21:22]
        if chains is None or chain in chains:
          atom = line[12:12+4].strip()
          resi = line[17:17+3]
          resn = line[22:22+5].strip()
          if resn[-1].isalpha(): # alternative atom
            resn = resn[:-1]
            line = line[:26]+" "+line[27:]
          key = f"{model}_{chain}_{resn}_{resi}_{atom}"
          if key not in seen: # skip alternative placements
            lines.append(line)
            seen.append(key)
      if line[:5] == "MODEL" or line[:3] == "TER" or line[:6] == "ENDMDL":
        lines.append(line)
  return "\n".join(lines)

In [None]:
def fix_contigs(contigs,parsed_pdb):
  def fix_contig(contig):
    INF = float("inf")
    X = contig.split("/")
    Y = []
    for n,x in enumerate(X):
      if x[0].isalpha():
        C,x = x[0],x[1:]
        S,E = -INF,INF
        if x.startswith("-"):
          E = int(x[1:])
        elif x.endswith("-"):
          S = int(x[:-1])
        elif "-" in x:
          (S,E) = (int(y) for y in x.split("-"))
        elif x.isnumeric():
          S = E = int(x)
        new_x = ""
        c_,i_ = None,0
        for c, i in parsed_pdb["pdb_idx"]:
          if c == C and i >= S and i <= E:
            if c_ is None:
              new_x = f"{c}{i}"
            else:
              if c != c_ or i != i_+1:
                new_x += f"-{i_}/{c}{i}"
            c_,i_ = c,i
        Y.append(new_x + f"-{i_}")
      elif "-" in x:
        # sample length
        s,e = x.split("-")
        m = np.random.randint(int(s),int(e)+1)
        Y.append(f"{m}-{m}")
      elif x.isnumeric() and x != "0":
        Y.append(f"{x}-{x}")
    return "/".join(Y)
  return [fix_contig(x) for x in contigs]

In [72]:
pdb_str = pdb_to_string(get_pdb(pdb), chains=chains)
pdb_str = pdb_to_string(pdb_str, chains=fixed_chains)
pdb_filename = f"{full_path}/input.pdb"
with open(pdb_filename, "w") as handle:
  handle.write(pdb_str)

In [63]:
pdb

'/content/drive/MyDrive/Generative_Models/IL10_scaffolding/monomer_B_complex.pdb'

In [73]:
parsed_pdb = parse_pdb(pdb_filename)

In [74]:
parsed_pdb

{'xyz': array([[[-35.249, -10.907,  -9.259],
         [-34.205, -10.776,  -8.219],
         [-33.829,  -9.31 ,  -8.181],
         ...,
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ]],
 
        [[-34.013,  -8.613,  -7.045],
         [-33.73 ,  -7.185,  -6.994],
         [-32.229,  -6.938,  -7.158],
         ...,
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ]],
 
        [[-31.837,  -5.993,  -7.997],
         [-30.449,  -5.554,  -8.077],
         [-30.064,  -4.944,  -6.726],
         ...,
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ]],
 
        ...,
 
        [[  3.123,  11.388,   2.359],
         [  2.883,  12.539,   1.5  ],
         [  3.014,  13.817,   2.308],
         ...,
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ],
         [  0.   ,   0.   ,   0.   ]],


In [42]:
parsed_pdb = parse_pdb(pdb_filename)
opts.append(f"inference.input_pdb={pdb_filename}")
opts.append(f"diffuser.T={iterations}")
contigs = fix_contigs(contigs, parsed_pdb)

In [43]:
contigs

['27-27/A28-33/13-13/A47-51/88-88/A140-152/15-15/A168-179/32-32/A212-221/16-16',
 '-0']

In [None]:
def run_diffusion(contigs, path, pdb=None, iterations=50,
                  symmetry="none", order=1, hotspot=None,
                  chains=None, add_potential=False,
                  num_designs=1, visual="none"):





  if hotspot is not None and hotspot != "":
    opts.append(f"ppi.hotspot_res=[{hotspot}]")

  # setup symmetry
  if sym is not None:
    sym_opts = ["--config-name symmetry", f"inference.symmetry={sym}"]
    if add_potential:
      sym_opts += ["'potentials.guiding_potentials=[\"type:olig_contacts,weight_intra:1,weight_inter:0.1\"]'",
                   "potentials.olig_intra_all=True","potentials.olig_inter_all=True",
                   "potentials.guide_scale=2","potentials.guide_decay=quadratic"]
    opts = sym_opts + opts
    contigs = sum([contigs] * copies,[])

  opts.append(f"'contigmap.contigs=[{' '.join(contigs)}]'")
  opts += ["inference.dump_pdb=True","inference.dump_pdb_path='/dev/shm'"]

  print("mode:", mode)
  print("output:", full_path)
  print("contigs:", contigs)

  opts_str = " ".join(opts)
  cmd = f"./RFdiffusion/run_inference.py {opts_str}"
  print(cmd)

  # RUN
  run(cmd, iterations, num_designs, visual=visual)

  # fix pdbs
  for n in range(num_designs):
    pdbs = [f"outputs/traj/{path}_{n}_pX0_traj.pdb",
            f"outputs/traj/{path}_{n}_Xt-1_traj.pdb",
            f"{full_path}_{n}.pdb"]
    for pdb in pdbs:
      with open(pdb,"r") as handle: pdb_str = handle.read()
      with open(pdb,"w") as handle: handle.write(fix_pdb(pdb_str, contigs))

  return contigs, copies

In [None]:
import torch
import time

meta_data = {}
meta_data['entity_id'] = str(uuid.uuid4())
meta_data["batch_id"] = str(uuid.uuid4())
meta_data["batch_size"] = 25
meta_data["Timestamp"] = str(datetime.now())
meta_data['model'] = 'RFdiffusion_150it'
meta_data['task'] = 'backbone_pdb_generation'
#meta_data['conditions'] = 'IL10 (monomer) scaffolding [contigs = "24/A25-50/40/A91-125/59"]'
meta_data['conditions'] = 'IL10 (complex) scaffolding [contigs = "24/A25-50/40/A91-125/59 C1-194"]'
meta_data['wall_time_batch'] = None
meta_data['gpu'] = 'T4 GPU'

batch_length = 25

#name = "IL10"
name = "IL10_w_receptor"
#contigs = "24/A25-50/40/A91-125/59"
#pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10_Mutant_model1.pdb"
contigs = "24/A25-50/40/A91-125/59/0 C1-194"
pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/monomer_B_complex.pdb"
iterations = 150
hotspot = ""
num_designs = batch_length
visual = "none"
symmetry = "none"
order = 1
chains = ""
add_potential = False
path = name
while os.path.exists(f"outputs/{path}_0.pdb"):
  path = name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

flags = {"contigs":contigs,
        "pdb":pdb,
        "order":order,
        "iterations":iterations,
        "symmetry":symmetry,
        "hotspot":hotspot,
        "path":path,
        "chains":chains,
        "add_potential":add_potential,
        "num_designs":num_designs,
        "visual":visual}

for k,v in flags.items():
  if isinstance(v,str):
    flags[k] = v.replace("'","").replace('"','')

start_time = time.time()
contigs, copies = run_diffusion(**flags)
end_time = time.time()
total_job_time = end_time - start_time
meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
meta_data['wall_time_task'] = str(total_job_time/batch_length) + " Seconds (inferred)"

for filename in os.listdir("./outputs"):
    if filename.endswith(".pdb") and name in filename:
      new_name = "rfdiffusion_il10_comp_" + meta_data['entity_id'] + ".pdb"
      meta_data['output_file_name'] = new_name
      meta_data['entity_id'] = str(uuid.uuid4())
      metadata_entry = pd.Series(meta_data)
      all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
      cleanup_command = f"""mv ./outputs/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/{new_name}"""
      !{cleanup_command}

all_metadata_df.to_csv(meta_data_filepath, index=False)
print("Metadata saved. Cleaning up....")
!rm -rf ./outputs
!mkdir outputs


mode: fixed
output: outputs/IL10_w_receptor
contigs: ['24-24/A25-50/40-40/A91-125/59-59', 'C21-159/C168-178/C182-194']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/IL10_w_receptor inference.num_designs=25 inference.input_pdb=outputs/IL10_w_receptor/input.pdb diffuser.T=150 'contigmap.contigs=[24-24/A25-50/40-40/A91-125/59-59 C21-159/C168-178/C182-194]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....


In [None]:
import torch
import time

meta_data = {}
meta_data['entity_id'] = str(uuid.uuid4())
meta_data["batch_id"] = str(uuid.uuid4())
meta_data["batch_size"] = 25
meta_data["Timestamp"] = str(datetime.now())
meta_data['model'] = 'RFdiffusion_150it'
meta_data['task'] = 'backbone_pdb_generation'
meta_data['conditions'] = 'IL10 (monomer) scaffolding [contigs = "24/A25-50/40/A91-125/59"]'
#meta_data['conditions'] = 'IL10 (complex) scaffolding [contigs = "24/A25-50/40/A91-125/59 C1-194"]'
meta_data['wall_time_batch'] = None
meta_data['gpu'] = 'T4 GPU'

batch_length = 25

name = "IL10"
#name = "IL10_w_receptor"
contigs = "24/A25-50/40/A91-125/59"
pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10_Mutant_model1.pdb"
#contigs = "24/A25-50/40/A91-125/59/0 C1-194"
#pdb = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/complex_AF_monomer.pdb"
iterations = 150
hotspot = ""
num_designs = batch_length
visual = "none"
symmetry = "none"
order = 1
chains = ""
add_potential = False
path = name
while os.path.exists(f"outputs/{path}_0.pdb"):
  path = name + "_" + ''.join(random.choices(string.ascii_lowercase + string.digits, k=5))

flags = {"contigs":contigs,
        "pdb":pdb,
        "order":order,
        "iterations":iterations,
        "symmetry":symmetry,
        "hotspot":hotspot,
        "path":path,
        "chains":chains,
        "add_potential":add_potential,
        "num_designs":num_designs,
        "visual":visual}

for k,v in flags.items():
  if isinstance(v,str):
    flags[k] = v.replace("'","").replace('"','')

start_time = time.time()
contigs, copies = run_diffusion(**flags)
end_time = time.time()
total_job_time = end_time - start_time
meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
meta_data['wall_time_task'] = str(total_job_time/batch_length) + " Seconds (inferred)"

for filename in os.listdir("./outputs"):
    if filename.endswith(".pdb") and name in filename:
      new_name = "rfdiffusion_il10_mono_" + meta_data['entity_id'] + ".pdb"
      meta_data['output_file_name'] = new_name
      meta_data['entity_id'] = str(uuid.uuid4())
      metadata_entry = pd.Series(meta_data)
      all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
      cleanup_command = f"""mv ./outputs/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/rfdiffusion_il10/{new_name}"""
      !{cleanup_command}

all_metadata_df.to_csv(meta_data_filepath, index=False)
print("Metadata saved. Cleaning up....")
!rm -rf ./outputs
!mkdir outputs


mode: fixed
output: outputs/IL10
contigs: ['24-24/A25-50/40-40/A91-125/59-59']
./RFdiffusion/run_inference.py inference.output_prefix=outputs/IL10 inference.num_designs=25 inference.input_pdb=outputs/IL10/input.pdb diffuser.T=150 'contigmap.contigs=[24-24/A25-50/40-40/A91-125/59-59]' inference.dump_pdb=True inference.dump_pdb_path='/dev/shm'


VBox(children=(FloatProgress(value=0.0, bar_style='info', description='running', max=1.0), Output()))

Metadata saved. Cleaning up....


In [None]:
#!ls ./outputs
!mkdir outputs

In [None]:
!rm -rf ./outputs

In [None]:
!kill -9 -1