# Function Prediction for 104M protein



In [1]:
from huggingface_hub import notebook_login
from esm.models.esm3 import ESM3
import biotite.structure as bs
import py3Dmol
from esm.sdk.api import ESMProtein, GenerationConfig
from esm.sdk.experimental import ESM3GuidedDecoding, GuidedDecodingScoringFunction
from esm.sdk.api import ESM3InferenceClient

In [2]:
# Create scoring function (e.g. PTM scoring function)
class PTMScoringFunction(GuidedDecodingScoringFunction):
    def __call__(self, protein: ESMProtein) -> float:
        # Minimal example of a scoring function that scores proteins based on their pTM score
        # Given that ESM3 already has a pTM prediction head, we can directly access the pTM score
        assert protein.ptm is not None, "Protein must have pTM scores to be scored"
        return float(protein.ptm)

In [None]:
token= "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"
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [4]:
model: ESM3InferenceClient = ESM3.from_pretrained().to("cuda") # or "cpu"

## On Forge with larger ESM3 models
#from getpass import getpass

#from esm.sdk import client

#token = getpass("Token from Forge console: ")
#model = client(
#    model="esm3-medium-2024-08", url="https://forge.evolutionaryscale.ai", token=token
#)


Fetching 22 files:   0%|          | 0/22 [00:00<?, ?it/s]

AssertionError: Torch not compiled with CUDA enabled

In [None]:
def get_sample_protein() -> ESMProtein:
    protein = ProteinChain.from_rcsb("1utn")
    protein = ESMProtein.from_protein_chain(protein)
    protein.function_annotations = [
        # Peptidase S1A, chymotrypsin family: https://www.ebi.ac.uk/interpro/structure/PDB/1utn/
        FunctionAnnotation(label="peptidase", start=100, end=114),
        FunctionAnnotation(label="chymotrypsin", start=190, end=202),
    ]
    return protein



In [5]:
ptm_guided_decoding = ESM3GuidedDecoding(
    client=model, scoring_function=PTMScoringFunction()
)
# Start from a fully masked protein
PROTEIN_LENGTH = 256
starting_protein = ESMProtein(sequence="_" * PROTEIN_LENGTH)

# Call guided_generate
generated_protein = ptm_guided_decoding.guided_generate(
    protein=starting_protein,
    num_decoding_steps=len(starting_protein) // 8,
    num_samples_per_step=10,
)


Current score: -1:   0%|          | 0/32 [01:05<?, ?it/s]


KeyboardInterrupt: 

In [None]:
# Generate a protein WITHOUT guidance
generated_protein_no_guided: ESMProtein = model.generate(
    input=starting_protein,
    config=GenerationConfig(track="sequence", num_steps=len(starting_protein) // 8),
)  # type: ignore

# Fold
generated_protein_no_guided: ESMProtein = model.generate(
    input=generated_protein_no_guided,
    config=GenerationConfig(track="structure", num_steps=1),
)  # type: ignore

In [None]:
print(f"pTM Without guidance: {generated_protein_no_guided.ptm:.3f}")
print(f"pTM With guidance: {generated_protein.ptm:.3f}")

In [None]:
# Create a 1x2 grid of viewers (1 row, 2 columns)
view = py3Dmol.view(width=1000, height=500, viewergrid=(1, 2))

# Convert ESMProtein objects to ProteinChain objects
protein_chain1 = generated_protein_no_guided.to_protein_chain()
protein_chain2 = generated_protein.to_protein_chain()

# Add models to respective panels
view.addModel(protein_chain1.to_pdb_string(), "pdb", viewer=(0, 0))
view.addModel(protein_chain2.to_pdb_string(), "pdb", viewer=(0, 1))

# Set styles for each protein
view.setStyle({}, {"cartoon": {"color": "spectrum"}}, viewer=(0, 0))
view.setStyle({}, {"cartoon": {"color": "spectrum"}}, viewer=(0, 1))

# Zoom and center the view
view.zoomTo()
view.show()