In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import uuid
from datetime import datetime
import re
import torch

meta_data_filepath = "/content/drive/MyDrive/Generative_Models/conditional_generation/protpardelle_tev/generation_metadata_protpardelle_tev.csv"

if os.path.exists(meta_data_filepath):
  all_metadata_df = pd.read_csv(meta_data_filepath)
  print("Existing generation metadata read in.")
else:
  all_metadata_df = pd.DataFrame()
  #all_metadata_df.to_csv(meta_data_filepath, index=False)
  print("Created generation metadata dataframe")

Existing generation metadata read in.


In [None]:
%%bash
pip install torch transformers einops tqdm wandb rotary-embedding-torch biopython scipy torchtyping dm-tree matplotlib seaborn black ipython
git clone https://github.com/ProteinDesignLab/protpardelle
git clone https://github.com/dauparas/ProteinMPNN.git

In [None]:
%cd protpardelle

In [None]:
!ls

In [6]:
#There has been a bug introduced in a recent protpardelle commit (issue logged), in the meantime we will just correct it

with open("draw_samples.py", "r") as f:
  lines = f.readlines()

new_lines = []
for line in lines:
  new_line = line.replace("sampling.d", "inference.d")
  new_lines.append(new_line)

with open("draw_samples.py", "w") as f:
  f.writelines(new_lines)


In [None]:
import time
batch_size = 50
generation_command = "python draw_samples.py --type allatom --minlen 237 --maxlen 238 --steplen 1 --perlen 50 --input_pdb /content/drive/MyDrive/Generative_Models/conditional_generation/tev_monomer.pdb --resample_idxs 0-27,33-46,51-139,152-167,179-211,221-237"
meta_data = {}
meta_data['batch_id'] = str(uuid.uuid4())
meta_data['batch_size'] = str(batch_size)
meta_data['Timestamp'] = str(datetime.now())
meta_data['model'] = 'protpardelle'
meta_data['task'] = 'all_atom_pdb_generation'
meta_data['conditions'] = 'tev (monomer) scaffolding [--resample_idxs 0-27,33-46,51-139,152-167,179-211,221-237]'
meta_data['gpu'] = 'T4 GPU'
start_time = time.time()
!{generation_command}
end_time = time.time()
total_job_time = end_time - start_time
meta_data['wall_time_batch'] = str(total_job_time) + " Seconds"
meta_data['wall_time_task'] = str(total_job_time/batch_size) + " Seconds (inferred)"

for filename in os.listdir("/content/protpardelle/samples"):
    if filename.endswith(".pdb") and "samp" in filename:
      meta_data['entity_id'] = str(uuid.uuid4())
      meta_data['output_file_name'] = "protpardelle_tev_" + meta_data['entity_id'] + ".pdb"
      metadata_entry = pd.Series(meta_data)
      all_metadata_df = pd.concat([all_metadata_df,pd.DataFrame(metadata_entry).T], ignore_index=True)
      cleanup_command = f"mv /content/protpardelle/samples/{filename} /content/drive/MyDrive/Generative_Models/conditional_generation/protpardelle_tev/{meta_data['output_file_name']}"
      !{cleanup_command}
all_metadata_df.to_csv(meta_data_filepath, index=False)
print("Metadata saved. Cleaning up....")
! rm -r /content/protpardelle/samples
torch.cuda.empty_cache()