<a href="https://colab.research.google.com/github/PeptoneLtd/proteinmpnn_ddg/blob/main/ProteinMPNN_ddG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#ProteinMPNN-ddG

Scores all possible point mutations of a protein, to identify those which improve stability and or expression


In [1]:
#@title Install ProteinMPNN-ddG (and colabdesign)
import os
try:
  import proteinmpnn_ddg
except:
  os.system("pip install -q proteinmpnn_ddg[cuda12]@git+https://github.com/PeptoneLtd/proteinmpnn_ddg.git@paper")

from proteinmpnn_ddg import predict_logits_for_all_point_mutations_of_single_pdb

import numpy as np
import pandas as pd

import jax
import jax.numpy as jnp

from google.colab import files
from google.colab import data_table
data_table.disable_dataframe_formatter()

def get_pdb(pdb_code=""):
  if pdb_code is None or pdb_code == "":
    upload_dict = files.upload()
    pdb_string = upload_dict[list(upload_dict.keys())[0]]
    with open("tmp.pdb","wb") as out: out.write(pdb_string)
    return "tmp.pdb"
  elif os.path.isfile(pdb_code):
    return pdb_code
  elif len(pdb_code) == 4:
    os.system(f"wget -qnc https://files.rcsb.org/view/{pdb_code}.pdb")
    return f"{pdb_code}.pdb"
  else:
    os.system(f"wget -qnc https://alphafold.ebi.ac.uk/files/AF-{pdb_code}-F1-model_v3.pdb")
    return f"AF-{pdb_code}-F1-model_v3.pdb"

In [2]:
import warnings, os, re
warnings.simplefilter(action='ignore', category=FutureWarning)

os.system("mkdir -p output")

# USER OPTIONS
#@markdown # ProteinMPNN options
model_name = "v_48_020" #@param ["v_48_002", "v_48_010", "v_48_020", "v_48_030"]
#@markdown (v_48_020 recommended)


#@markdown # Input Options
pdb='6MRR' #@param {type:"string"}
#@markdown (leave `pdb` as  blank to get an upload prompt)
chains = "A" #@param {type:"string"}
#@markdown (You can specify several chains, separating by commas e.g. "A,C")

#@markdown Only the chains specified will be loaded from the PDB file for prediction
# chains_to_predict = "" #@param {type:"string"}
# #@markdown (Leave `chains_to_predict` empty to predict all chains)

nrepeats = 1
seed = 42

# cleaning user options
chains = re.sub("[^A-Za-z]+",",", chains)
chains = chains.split(',')

pdb_path = get_pdb(pdb)

In [3]:
#@title Run ProteinMPNN-ddG

#@markdown (Positive values are good mutations, which strengthen stability and expression)

dfs = []
for chain in chains:
  df = predict_logits_for_all_point_mutations_of_single_pdb(
      model_name,
      chains,
      pdb_path,
      nrepeat=nrepeats,
      seed=seed,
      chain_to_predict=chain,
      pad_inputs=False,
      apply_ddG_correction=True)
  df['chain'] = chain
  dfs.append(df)
df = pd.concat(dfs)
df = df.rename(columns={'logit_difference_ddg': 'proteinmpnn_ddg_score'})
df = df[['chain','pre','pos','post','proteinmpnn_ddg_score']]
df.to_csv('predictions.csv')

display(df.sort_values(
    'proteinmpnn_ddg_score', ascending=False
    ).head(
      10
      ).style.hide().format(
          precision=1, decimal="."
          )
      )

chain,pre,pos,post,proteinmpnn_ddg_score
A,R,10,V,4.1
A,V,36,I,3.4
A,H,9,Y,3.4
A,S,3,D,3.3
A,S,3,N,3.1
A,G,41,C,2.5
A,W,2,M,2.4
A,H,9,W,2.4
A,H,9,F,2.3
A,V,38,M,2.2


In [4]:
#@title download predictions (optional)
from google.colab import files
files.download(f'predictions.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>