In [18]:
from abc import ABC, abstractmethod
import torch
import torch.nn.functional as F
from tqdm import tqdm
import attr
import random
import os
import sys
from datetime import datetime

from esm.sdk.api import (
    ESM3InferenceClient,
    ESMProtein,
    ESMProteinError,
    ESMProteinTensor,
    SamplingConfig,
    SamplingTrackConfig,
    LogitsConfig,
)
from esm.models.esm3 import ESM3
from esm.tokenization import get_esm3_model_tokenizers

# Import classes from denoising_strategies.py
# Import the Tee and PrintFormatter classes from denoising_strategies.py
from denoising_strategies import Tee, PrintFormatter

# Import the BaseDenoising and MaxProbBasedDenoising classes
from denoising_strategies import BaseDenoising, MaxProbBasedDenoising, EntropyBasedDenoising


In [23]:
## On Forge with larger ESM3 models
from esm.sdk import client
token = os.getenv("ESM_FORGE_API_KEY")
model = client(model="esm3-open", url="https://forge.evolutionaryscale.ai", token=token)

In [24]:
# --- Configuration ---
TEST_SEQUENCE = "ACDE"
NOISE_PERCENTAGE = 50.0 # Mask 50% initially (2 positions for length 4)
NUM_DECODING_STEPS = 2 # Number of steps to unmask
TEMPERATURE = 0.0
TRACK = "sequence"
# --- End Configuration ---

# Create a dummy protein
protein = ESMProtein(sequence=TEST_SEQUENCE)
print(f"Original Protein: {protein.sequence}\n")

Original Protein: ACDE



In [25]:
# Instantiate Denoiser with local model
denoiser = EntropyBasedDenoising(model)
denoiser.track = TRACK # Set track for prints

In [None]:
denoiser.denoise(protein, NOISE_PERCENTAGE, NUM_DECODING_STEPS, TEMPERATURE, TRACK)

In [17]:
# What does the full output of a forward_and_sample call look like?
protein = ESMProtein(sequence=TEST_SEQUENCE)
protein_tensor = denoiser.client.encode(protein)
outputs = denoiser.client.forward_and_sample(
    protein_tensor,
    sampling_configuration=SamplingConfig(
        sequence=SamplingTrackConfig(topk_logprobs=5),
        structure=SamplingTrackConfig(topk_logprobs=5),
    )
)
print(outputs.topk_logprob)

ForwardTrackData(sequence=tensor([[-0.5568, -2.1173, -2.6212, -3.2794, -3.7599],
        [-1.2986, -1.3376, -1.3611, -3.4197, -3.5407],
        [-2.3516, -2.4317, -2.5255, -2.5967, -2.7017],
        [-2.4503, -2.4600, -2.4893, -2.5323, -2.6172],
        [-0.7971, -2.9670, -3.0451, -3.0646, -3.0969],
        [-0.9229, -2.9093, -2.9268, -2.9991, -3.1368]]), structure=tensor([[-3.0251, -3.2751, -3.2751, -3.2751, -3.2751],
        [-2.1125, -2.3625, -2.3625, -2.6125, -3.1125],
        [-4.1307, -4.3807, -4.3807, -4.3807, -4.3807],
        [-3.9972, -3.9972, -4.2472, -4.2472, -4.2472],
        [-2.7718, -2.7718, -3.0218, -3.2718, -3.2718],
        [-3.3501, -3.3501, -3.3501, -3.3501, -3.6001]]), secondary_structure=None, sasa=None, function=None)


In [7]:
print("\n=== Testing MaxProbBasedDenoising ===")
max_prob_denoiser = MaxProbBasedDenoising(model)
max_prob_denoiser.track = TRACK

denoised_protein_maxprob = max_prob_denoiser.denoise(
    protein=protein,
    noise_percentage=NOISE_PERCENTAGE,
    num_decoding_steps=NUM_DECODING_STEPS,
    temperature=TEMPERATURE,
    track=TRACK,
    verbose=True
)


=== Testing MaxProbBasedDenoising ===
Starting max-probability denoising process
├── Adding noise to protein tensor
│   ├── Masked positions (sequence): [4, 1]
└── Resulting tensor: tensor([ 0, 32, 23, 13, 32,  2])
├── Initial sequence: _CD_
├── Starting denoising steps:


Denoising:   0%|          | 0/2 [00:00<?, ?it/s]

│   ├── Step 1/2


Denoising:   0%|          | 0/2 [00:00<?, ?it/s]

│   │   ├── Computing position probabilities
Output: ForwardAndSampleOutput(logits=None, embeddings=None, residue_annotation_logits=None, hidden_states=None, protein_tensor=ESMProteinTensor(sequence=tensor([ 0, 20, 23, 13, 24,  2]), structure=tensor([4098, 3425, 1339, 1310, 2048, 4097]), secondary_structure=tensor([0, 0, 0, 0, 0, 0]), sasa=tensor([0, 0, 0, 0, 0, 0]), function=tensor([[0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0]]), residue_annotations=None, coordinates=None, potential_sequence_of_concern=False), entropy=ForwardTrackData(sequence=tensor([1.8691, 1.5893, 2.9100, 2.9065, 2.5071, 2.6606]), structure=tensor([4.2285, 3.7313, 5.3115, 5.3432, 4.9553, 5.1686]), secondary_structure=None, sasa=None, function=None), prob=ForwardTrackData(sequence=tensor([1.0000, 0.3015, 1.0000, 1.0000, 0.3553, 1.0000]), structure=tensor([1.0000, 0.081




AttributeError: 'NoneType' object has no attribute 'sequence'