## Install Libraries

In [None]:
!pip install aiohttp

# Set password for this notebook

In [None]:
auth_token = "hgtaaproteindesign2025" #put the password here
headers = {
        "Authorization": f"Bearer {auth_token}",
        "Content-Type": "application/json"
}

# Cleaning and Renumbering your PDB

Sometimes, PDBs will start at position 20, this is because PDB's often contain sub components of a bigger protein sequence (You can refer to [UniProt](https://www.uniprot.org/) for the full sequence). If you want to renumber your protein to be numbered from position 1, use the following code. This will also clean your protein by fixing any chain breaks etc.

In [None]:
import aiohttp
import asyncio
from google.colab import files

#@markdown #### Upload a PDB File
upload_dict = files.upload()
pdb_string = upload_dict[list(upload_dict.keys())[0]]
with open("tmp.pdb","wb") as out: out.write(pdb_string)


async def post_clean_pdb(url: str, pdb_string: str):
    payload = {
        "pdb_string": pdb_string
    }
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,timeout=60*60,headers=headers) as response:
            response_json = await response.json()
            return response_json

url = "https://molecularmachines--pdbfixer-fastapi-app.modal.run/fixpdb"
response_data = await post_clean_pdb(url, open("tmp.pdb","r").read())
filename = list(upload_dict.keys())[0]
filename_fixed= f"{filename.replace('.pdb','')}_fixed.pdb"
with open(filename_fixed,"w") as f:
  f.write(response_data["data"])

# ProteinMPNN : Generating Sequences From a Protein Structure

This code runs the ProteinMPNN model and gives you a set of sequences for the pdb sturcture provided. If your pdb has more than chain, then you should also specify the chain that you want to make proposal sequences for. Note that these sequences are predicted to fold into the same structure as the provided PDB but there is no guarantee that they will perform the same function.  

In [None]:
import aiohttp
import asyncio
from google.colab import files

#@markdown #### Upload a PDB File
chains = "A" #@param {type:"string"}
temp = "0.1" #@param {type:"string"}
num_seqs = "8" #@param {type:"string"}
#@markdown #### Fix some positions in the protein sequence and only sample new amino acids at other positions.
fix_pos = "A20-45" #@param {type:"string"}
#@markdown - specify which positions to keep fixed in the sequence (example: `1,2-10`)
#@markdown - you can also specify chain specific constraints (example: `A1-10,B1-20`)
#@markdown - you can also specify to fix entire chain(s) (example: `A`)

#@markdown #### If you want to do the inverse of the fix positions where you only sample at the residues specified but not at the other locations check the following box
inverse = False #@param {type:"boolean"}


upload_dict = files.upload()
pdb_string = upload_dict[list(upload_dict.keys())[0]]
with open("tmp.pdb","wb") as out: out.write(pdb_string)


async def post_sample_request(url: str, pdb_string: str, mpnn_config: dict):
    payload = {
        "pdb_string": pdb_string,
        "params": mpnn_config
    }
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,timeout=60*60,headers=headers) as response:
            response_json = await response.json()
            return response_json

url = "https://molecularmachines--colabdesign-fastapi-app.modal.run/sample"
pdb_string = open("tmp.pdb","r").read()

mpnn_config = {
    "fix_pos": fix_pos,
    "inverse": inverse,
    "temp": float(temp),
    "batch": int(num_seqs),
    "chains": chains
}
response_data = await post_sample_request(url, pdb_string, mpnn_config)
response_data

# AlphaFold : Fold a protein using it's sequence

This code will run alphafold model and return a PDB structure that is the predicted structure for that sequence. You can evaluate how good the predicted structure is by using how confident AlphaFold is about its prediction. High confidence is usually a good indicator for correlating with the ground truth structure.

In [None]:
import aiohttp
import asyncio

#@markdown #### Input the sequence you would like to fold. Output will be in the files under ```output_predicted.pdb```
sequence = "NAFTVTVPKDLYVVEYGSNMTIECKFPVEKQLDLAALIVYWEMEDKNIIQFVHGEEDLKVQHSSYRQRARLLKDQLSLGNAALQITDVKLQDAGVYRCMISYGGADYKRITVKVNAPYAAALEHHHHHH" #@param {type:"string"}

async def post_fold_request(url:str,sequence:str):
    payload = {
        "sequence": sequence
    }
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,headers=headers,timeout=60*60) as response:
            response_json = await response.json()
            return response_json

alphafold_url = "https://molecularmachines--colabfold-fastapi-app.modal.run/alphafold/monomer"

response_data = await post_fold_request(alphafold_url, sequence)
with open("output_predicted.pdb","w") as f:
  f.write(response_data['results']['predicted_output'])

# AlphaFold-Multimer Fold multiple sequences to predict protein complex structure

This code will run alphafold model and return a PDB structure that is the predicted structure of two sequences. You can evaluate how good the predicted structure is by using how confident AlphaFold is about its prediction. High confidence is usually a good indicator for correlating with the ground truth structure.

In [None]:
import aiohttp
import asyncio

#@markdown #### Input the sequences you would like to fold. Output will be in the files under ```output_predicted_complex.pdb```
sequence1 = "NAFTVTVPKDLYVVEYGSNMTIECKFPVEKQLDLAALIVYWEMEDKNIIQFVHGEEDLKVQHSSYRQRARLLKDQLSLGNAALQITDVKLQDAGVYRCMISYGGADYKRITVKVNAPYAAALEHHHHHH" #@param {type:"string"}
sequence2 = "SMEEEIEEAYDLVEEAEKTGDTSLLKKAKELLDKVAEEATKSGNPILLIRVIIILIKIVRNSGDPSVAALARELLEKLEEIAEKEGNRFIEAMGEALRTQIERAL" #@param {type:"string"}

async def post_multimer_request(url:str,sequence1:str,sequence2:str):
    payload = {
        "sequences": [sequence1],
        "target_sequence":sequence2
    }
    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,headers=headers,timeout=60*60) as response:
            response_json = await response.json()
            return response_json
url = "https://molecularmachines--colabfold-fastapi-app.modal.run/alphafold/multimer"

response_data = await post_multimer_request(url, sequence1, sequence2)
pdb_string = response_data["results"][0]["pdb_str"]
with open("output_predicted_complex.pdb","w") as f:
  f.write(pdb_string)

# Embed a protein sequence with ESMC model

Embeds a protein sequence with ESMC - https://www.evolutionaryscale.ai/blog/esm-cambrian and gives you a list of numbers which is the embedding of that sequence

In [None]:
import aiohttp
import asyncio

#@markdown #### Input the sequence you would like to fold. Output will be in the files under ```output_predicted.pdb```
sequence = "SMEEEIEEAYDLVEEAEKTGDTSLLKKAKELLDKVAEEATKSGNPILLIRVIIILIKIVRNSGDPSVAALARELLEKLEEIAEKEGNRFIEAMGEALRTQIERAL" #@param {type:"string"}

async def post_embedding(url:str,sequence:str):
    payload = {
        "sequence": sequence
    }

    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,headers=headers) as response:
            response_json = await response.json()
            return response_json

esm_url = "https://molecularmachines--esm-fast-app.modal.run/esm/embedding"

response_data = await post_embedding(esm_url, sequence)
embedding = response_data
f"pseudo log likelihood score is {embedding['pll']}"

# Make a binder with BindCraft

Make a binder for the provided target pdb

In [None]:
import aiohttp
import asyncio
from google.colab import files
#@markdown #### Input the sequence you would like to fold. Output will be in the files under ```output_predicted.pdb```
upload_dict = files.upload()
pdb_string = upload_dict[list(upload_dict.keys())[0]]
with open("tmp.pdb","wb") as out: out.write(pdb_string)


chains = "A" #@param {type:"string"}
binder_length_range = "60,70" #@param {type:"string"}
num_seqs = "2" #@param {type:"string"}
target_hotspot_residues = "A1-100" # @param {"type":"string","placeholder":""}

async def post_embedding(url:str):
    payload={
                "pdb_str": open("tmp.pdb","r").read(),
                "chains": chains,
                "lengths": binder_length_range,
                "num_samples": int(num_seqs),
                "fold_conditioning": "Default",
                "target_hotspot_residues":target_hotspot_residues
            }

    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,headers=headers,timeout=60*60) as response:
            response_json = await response.json()
            return response_json

esm_url = "https://molecularmachines--bindcraft-fast-app.modal.run/bindcraft/design_binder_wrapper"

response_data = await post_embedding(esm_url)
response_data

# Fold a protein and ligand with boltz-1



In [None]:
import aiohttp
import asyncio
from google.colab import files
import json
import aiohttp


protein_sequence = "MVTPEGNVSLVDESLLVGVTDEDRAVRSAHQFYERLIGLWAPAVMEAAHELGVFAALAEAPADSGELARRLDCDARAMRVLLDALYAYDVIDRIHDTNGFRYLLSAEARECLLPGTLFSLVGKFMHDINVAWPAWRNLAEVVRHGARDTSGAESPNGIAQEDYESLVGGINFWAPPIVTTLSRKLRASGRSGDATASVLDVGCGTGLYSQLLLREFPRWTATGLDVERIATLANAQALRLGVEERFATRAGDFWRGGWGTGYDLVLFANIFHLQTPASAVRLMRHAAACLAPDGLVAVVDQIVDADREPKTPQDRFALLFAASMTNTGGGDAYTFQEYEEWFTAAGLQRIETLDTPMHRILLARRATEPSAVPEGQASENLYFQ" #@param {type:"string"}
ligand_sequence = "N[C@@H](Cc1ccc(O)cc1)C(=O)O" #@param {type:"string"}
async def post_embedding(url:str):
    payload={
              "protein_sequence":protein_sequence,
              "ligand_sequence":ligand_sequence
            }

    async with aiohttp.ClientSession() as session:
        async with session.post(url, json=payload,headers=headers,timeout=60*60) as response:
            output_bytes = await response.read()

            # Write the received bytes to a file named 'output.zip'
            with open("boltz_output.zip", "wb") as f:
                f.write(output_bytes)

            return "Response written to boltz_output.zip"

esm_url = "https://molecularmachines--boltz1-fastapi-app.modal.run/infer"

response_data = await post_embedding(esm_url)
response_data