In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import uuid
from datetime import datetime
import re
import torch

#meta_data_filepath = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/protpardelle_il10/generation_metadata_protpardelle_il10.csv"
meta_data_filepath = "/content/drive/MyDrive/Generative_Models/IL10_scaffolding/protpardelle_il10/generation_metadata_protpardelle_tev.csv"

if os.path.exists(meta_data_filepath):
  all_metadata_df = pd.read_csv(meta_data_filepath)
  print("Existing generation metadata read in.")
else:
  all_metadata_df = pd.DataFrame()
  #all_metadata_df.to_csv(meta_data_filepath, index=False)
  print("Created generation metadata dataframe")

Existing generation metadata read in.


In [3]:
%%bash
pip install torch transformers einops tqdm wandb rotary-embedding-torch biopython scipy torchtyping dm-tree matplotlib seaborn black ipython
git clone https://github.com/ProteinDesignLab/protpardelle
git clone https://github.com/dauparas/ProteinMPNN.git

Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl (43 kB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 43.2/43.2 kB 1.7 MB/s eta 0:00:00
Collecting wandb
  Downloading wandb-0.17.2-py3-none-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.9 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 6.9/6.9 MB 19.1 MB/s eta 0:00:00
Collecting rotary-embedding-torch
  Downloading rotary_embedding_torch-0.6.2-py3-none-any.whl (5.3 kB)
Collecting biopython
  Downloading biopython-1.83-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 3.1/3.1 MB 42.7 MB/s eta 0:00:00
Collecting torchtyping
  Downloading torchtyping-0.1.4-py3-none-any.whl (17 kB)
Collecting black
  Downloading black-24.4.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.8 MB)
     ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 1.8/1.8 MB 50.5 MB/s eta 0:00:00
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from to

Cloning into 'protpardelle'...
Updating files:  32% (8/25)Updating files:  36% (9/25)Updating files:  40% (10/25)Updating files:  44% (11/25)Updating files:  48% (12/25)Updating files:  52% (13/25)Updating files:  56% (14/25)Updating files:  60% (15/25)Updating files:  64% (16/25)Updating files:  68% (17/25)Updating files:  72% (18/25)Updating files:  76% (19/25)Updating files:  80% (20/25)Updating files:  84% (21/25)Updating files:  88% (22/25)Updating files:  92% (23/25)Updating files:  96% (24/25)Updating files: 100% (25/25)Updating files: 100% (25/25), done.
Cloning into 'ProteinMPNN'...


In [4]:
%cd protpardelle

/content/protpardelle


In [5]:
!ls

checkpoints  configs  diffusion.py     evaluation.py  LICENSE	 modules.py		README.md
CODEOWNERS   core     draw_samples.py  inference.py   models.py  protpardelle_pymol.py


In [6]:
#There has been a bug introduced in a recent protpardelle commit (issue logged), in the meantime we will just correct it

with open("draw_samples.py", "r") as f:
  lines = f.readlines()

new_lines = []
for line in lines:
  new_line = line.replace("sampling.d", "inference.d")
  new_lines.append(new_line)

with open("draw_samples.py", "w") as f:
  f.writelines(new_lines)


In [7]:
import time
batch_size = 50
#generation_command = "python draw_samples.py --type allatom --minlen 184 --maxlen 185 --steplen 1 --perlen 50 --input_pdb /content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10_Mutant_model1.pdb --resample_idxs 0-24,50-90,126-184"
generation_command = "python draw_samples.py --type allatom --minlen 237 --maxlen 238 --steplen 1 --perlen 50 --input_pdb /content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_monomer.pdb --resample_idxs 0-27,33-46,51-139,152-167,179-211,221-237"
meta_data = {}
meta_data['batch_id'] = str(uuid.uuid4())
meta_data['batch_size'] = str(batch_size)
meta_data['Timestamp'] = str(datetime.now())
meta_data['model'] = 'protpardelle'
meta_data['task'] = 'all_atom_pdb_generation'
#meta_data['conditions'] = 'IL10 (monomer) scaffolding [--resample_idxs 0-24,50-90,126-184]'
meta_data['conditions'] = 'tev (monomer) scaffolding [--resample_idxs 0-27,33-46,51-139,152-167,179-211,221-237]'
meta_data['gpu'] = 'T4 GPU'
start_time = time.time()
!{generation_command}
end_time = time.time()
total_job_time = end_time - start_time
meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
meta_data['wall_time_task'] = str(total_job_time/batch_size) + " Seconds (inferred)"

for filename in os.listdir("/content/protpardelle/samples"):
    if filename.endswith(".pdb") and "samp" in filename:
      meta_data['entity_id'] = str(uuid.uuid4())
      #meta_data['output_file_name'] = "protpardelle_IL10_" + meta_data['entity_id'] + ".pdb"
      meta_data['output_file_name'] = "protpardelle_tev_" + meta_data['entity_id'] + ".pdb"
      metadata_entry = pd.Series(meta_data)
      all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
      #cleanup_command = f"mv /content/protpardelle/samples/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/protpardelle_il10/{meta_data['output_file_name']}"
      cleanup_command = f"mv /content/protpardelle/samples/{filename} /content/drive/MyDrive/Generative_Models/IL10_scaffolding/protpardelle_il10/{meta_data['output_file_name']}"
      !{cleanup_command}
all_metadata_df.to_csv(meta_data_filepath, index=False)
print("Metadata saved. Cleaning up....")
! rm -r /content/protpardelle/samples
torch.cuda.empty_cache()

Namespace(model_checkpoint='checkpoints', mpnnpath='checkpoints/minimpnn_state_dict.pth', modeldir=None, modelepoch=None, type='allatom', param=None, paramval=None, parampath=None, perlen=50, minlen=237, maxlen=238, steplen=1, num_lens=None, targetdir='.', input_pdb='/content/drive/MyDrive/Generative_Models/IL10_scaffolding/tev_monomer.pdb', resample_idxs='0-27,33-46,51-139,152-167,179-211,221-237')
Base directory: .
Samples saved to: ./samples
Model loaded from checkpoints/allatom_state_dict.pth
Beginning sampling for 24-06-21-04-02-24...
Samples drawn for length 237
Sampling concluded after 298.21420526504517 seconds.
Of this, 293.8603000640869 seconds were for actual sampling.
50 total samples were drawn.
Metadata saved. Cleaning up....


In [None]:
%%bash
for file in /content/protpardelle/samples/*_samp*.pdb
  do
  bn=$(basename "$file")
  mv $file /content/drive/MyDrive/Generative_Models/IL10_scaffolding/protpardelle_tev/$bn
  done

In [None]:
!{cleanup_command}

In [None]:
!rm -r /content/protpardelle/samples

In [None]:
!ls ./samples

ls: cannot access './samples': No such file or directory


In [None]:
mkdir ./samples

In [None]:
generation_command = "python draw_samples.py --type allatom --minlen 184 --maxlen 185 --steplen 1 --perlen 10 --input_pdb /content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10Monomer_5f6ba_unrelaxed_rank_005_alphafold2_ptm_model_1_seed_000.pdb --resample_idxs 0-24,50-90,126-184"
!{generation_command}

Namespace(model_checkpoint='checkpoints', mpnnpath='checkpoints/minimpnn_state_dict.pth', modeldir=None, modelepoch=None, type='allatom', param=None, paramval=None, parampath=None, perlen=10, minlen=184, maxlen=185, steplen=1, num_lens=None, targetdir='.', input_pdb='/content/drive/MyDrive/Generative_Models/IL10_scaffolding/IL10Monomer_5f6ba_unrelaxed_rank_005_alphafold2_ptm_model_1_seed_000.pdb', resample_idxs='0-24,50-90,126-184')
Base directory: .
Samples saved to: ./samples
Model loaded from checkpoints/allatom_state_dict.pth
Beginning sampling for 24-05-01-03-32-53...
Samples drawn for length 184
Sampling concluded after 50.22335076332092 seconds.
Of this, 49.60393142700195 seconds were for actual sampling.
10 total samples were drawn.


In [None]:
from google.colab import files
import re

samples_dir = "./samples"
for filename in os.listdir(samples_dir):
  match = re.search(r"len184_samp(\d+).pdb", filename)
  if match:
    print(filename)
    files.download("./samples/" + filename)

In [None]:
#depreciated VVVVVVVVVVVVVVVVVVVVVVVVVVVV

In [None]:
%%bash
MINICONDA_INSTALLER_SCRIPT=Miniconda3-latest-Linux-x86_64.sh
MINICONDA_PREFIX=/usr/local
wget https://repo.anaconda.com/miniconda/$MINICONDA_INSTALLER_SCRIPT
chmod +x $MINICONDA_INSTALLER_SCRIPT
./$MINICONDA_INSTALLER_SCRIPT -b -f -p $MINICONDA_PREFIX

conda install python=3.8 dssp pip -y
pip install torch transformers einops tqdm wandb rotary-embedding-torch biopython scipy torchtyping dm-tree matplotlib seaborn black ipython

git clone https://github.com/ProteinDesignLab/protpardelle
git clone https://github.com/dauparas/ProteinMPNN.git