<a href="https://colab.research.google.com/github/Angelique28/Designing-Protein-Binding-Peptides---CECAM-Workshop/blob/main/notebooks/5_CoFolding_Evaluation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Workshop Notebook 5: Co-folding & Evaluation

In this notebook, we will:
1. Co-fold MDM2 with our generated peptide candidates using Boltz-2.
2. Extract binding metrics (contacts, iPAE, pLDDT).
3. Rank peptide candidates based on binding quality.


In [1]:
#@title **Set up our environment**

import torch
if not torch.cuda.is_available():
    print("⚠️ Warning: GPU runtime not detected. Please go to Runtime > Change runtime type > select GPU.")
else:
    print("✅ GPU detected:", torch.cuda.get_device_name(0))

from google.colab import drive
from pathlib import Path
import requests
import yaml

#@title Install dependencies

runtime = "GPU(L4 or T4)"

import os
import subprocess
import sys

print('Installing dependencies... ', end='')
dependencies = "torch torchvision torchaudio numpy hydra-core pytorch-lightning "
dependencies += "rdkit dm-tree requests pandas types-requests einops einx fairscale "
dependencies += "mashumaro modelcif wandb click pyyaml biopython scipy numba gemmi "
dependencies += "scikit-learn chembl_structure_pipeline py3Dmol "
dependencies2 = "cuequivariance_ops_cu12 cuequivariance_ops_torch_cu12 cuequivariance_torch tqdm biopandas"

if runtime == "GPU(L4 or T4)":
    precision = '32-true'
else:
    precision = 'bf16-true'

subprocess.run("pip install ipywidgets torch torchvision torchaudio", shell=True)
subprocess.run("git clone https://github.com/jwohlwend/boltz.git", shell=True)
subprocess.run(f"sed -i 's/bf16-mixed/{precision}/g' /content/boltz/src/boltz/main.py", shell=True)
subprocess.run(f"pip install {dependencies}", shell=True)
subprocess.run(f"pip install {dependencies2}", shell=True)
subprocess.run("cd boltz; pip install --no-deps .", shell=True)
sys.path.insert(0, '/content/boltz/src/')

import ipywidgets as widgets
from IPython.display import display, HTML
import os
import re
import requests
from rdkit import Chem, RDLogger
from rdkit.Chem import Draw, AllChem
from google.colab import files
from Bio.PDB import MMCIFParser
from Bio.PDB.Polypeptide import is_aa
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import pandas as pd
from tqdm import tqdm
import boltz.main
from boltz.main import predict
from biopandas.pdb import PandasPdb
from scipy.spatial.distance import cdist
import numpy as np
from typing import Optional, Any, Literal
print('done.')


✅ GPU detected: Tesla T4
Installing dependencies... 

[10:41:32] Initializing Normalizer


done.


In [2]:
#@title **Set up our Paths and mount a Google Drive folder**

#@markdown We will set a project ID so that we can keep separate executions separated, and a step ID so that we can keep the outputs of each step separate
PROJECT_ID = "MDM2" #@param {type:"string"}
STEP_ID = "5"

#@markdown We will use Google Drive mounts for persistence between multiple notebooks in this tutorial.

#@markdown Log in with your Google account and give permissions to access the drive.
WORKSHOP_DIRECTORY = Path('/content/drive/MyDrive/cecam_workshop_2025_generative')
drive.mount(str(WORKSHOP_DIRECTORY.parent.parent))
STEP_PATH = WORKSHOP_DIRECTORY / 'projects' / PROJECT_ID / STEP_ID
STEP_PATH.mkdir(exist_ok = True, parents = True)

Mounted at /content/drive


In [3]:
#@title Let's Retrieve our Generated Peptide Sequences

target_record = SeqIO.read(str(STEP_PATH.parent / 'sequences' / 'target.fasta'), "fasta")
target_sequence, target_id = str(target_record.seq), target_record.id
peptide_sequences = []
for filename in (STEP_PATH.parent / 'sequences').glob("*.fasta"):
  for record in SeqIO.parse(filename, 'fasta'):
    if record.id.startswith('tgt_'):
      continue
    peptide_sequences.append((record.id, str(record.seq), ))

df = pd.DataFrame(peptide_sequences)
df.columns = ['peptide_id', 'peptide_sequence']

df = pd.concat([df, pd.DataFrame([{'peptide_id' : 'exp_p53', 'peptide_sequence' : 'SQETFSDLWKLLPEN'}])])
df

Unnamed: 0,peptide_id,peptide_sequence
0,rfp_27ad120,REEMERRGSQ
1,rfp_8082671,EEAARQERAR
2,rfp_d37d154,AEEVARRHAQ
3,rfp_76e9645,QEETQRERAK
4,rfp_a182988,SSHWLQTLQS
5,rfp_a3d6af1,TDVTREAEAQ
6,rfp_a12da38,SATKREAAER
7,rfp_06f484e,TEQEERERLS
8,rfp_627ecd4,EEQQRLREKE
9,rfp_4f57d33,ASQKLEESTR


In [5]:
#@title Load Boltz-2 Model Code

def load_boltz_model(
    # predict_args
    recycling_steps: int = 3,
    sampling_steps: int = 200,
    diffusion_samples: int = 1,
    sampling_steps_affinity: int = 200,
    diffusion_samples_affinity: int = 3,
    max_parallel_samples: Optional[int] = None,
    write_full_pae: bool = True,
    write_full_pde: bool = True,

    # diffusion_params
    step_scale: Optional[float] = None,

    # steering args
    use_potentials: bool = False,

    # msa args
    subsample_msa: bool = True,
    num_subsampled_msa: int = 1024,

    ):

    predict_args = {
        "recycling_steps": recycling_steps,
        "sampling_steps": sampling_steps,
        "diffusion_samples": diffusion_samples,
        "max_parallel_samples": max_parallel_samples,
        "write_confidence_summary": True,
        "write_full_pae": write_full_pae,
        "write_full_pde": write_full_pde,
    }

    from boltz.main import Boltz2DiffusionParams, BoltzSteeringParams, MSAModuleArgs, PairformerArgsV2, Boltz2, download_boltz2
    from dataclasses import asdict

    diffusion_params = Boltz2DiffusionParams()
    step_scale = 1.5 if step_scale is None else step_scale
    diffusion_params.step_scale = step_scale

    steering_args = BoltzSteeringParams()
    steering_args.fk_steering = use_potentials
    steering_args.physical_guidance_update = use_potentials

    msa_args = MSAModuleArgs(
        subsample_msa=subsample_msa,
        num_subsampled_msa=num_subsampled_msa,
        use_paired_feature=True,
    )

    pairformer_args = PairformerArgsV2()

    cache = Path('/content/cache')
    cache.mkdir(exist_ok=True, parents=True)
    checkpoint = cache / "boltz2_conf.ckpt"
    if not checkpoint.exists():
      download_boltz2(cache)
    model = Boltz2.load_from_checkpoint(
        checkpoint,
        strict=True,
        predict_args=predict_args,
        map_location="cpu",
        diffusion_process_args=asdict(diffusion_params),
        ema=False,
        use_kernels=True,
        pairformer_args=asdict(pairformer_args),
        msa_args=asdict(msa_args),
        steering_args=asdict(steering_args),
    )
    model.eval()
    return model

def predict(  # noqa: C901, PLR0915, PLR0912
    data: str,
    out_dir: str,
    model_instance: Any,
    devices: int = 1,
    accelerator: str = "gpu",
    output_format: Literal["pdb", "mmcif"] = "mmcif",
    num_workers: int = 2,
    override: bool = False,
    use_msa_server: bool = False,
    msa_server_url: str = "https://api.colabfold.com",
    msa_pairing_strategy: str = "greedy",
    msa_server_username: Optional[str] = None,
    msa_server_password: Optional[str] = None,
    api_key_header: Optional[str] = None,
    api_key_value: Optional[str] = None,

    model: Literal["boltz1", "boltz2"] = "boltz2",
    method: Optional[str] = None,
    affinity_mw_correction: Optional[bool] = False,
    preprocessing_threads: int = 1,
    max_msa_seqs: int = 8192,

    write_embeddings: bool = False,
) -> None:
    """Run predictions with Boltz."""
    import torch
    from pytorch_lightning import Trainer
    from pytorch_lightning.strategies import DDPStrategy

    from boltz.main import check_inputs, const, process_inputs, Manifest, filter_inputs_structure, BoltzProcessedInput, BoltzWriter
    from boltz.data.module.inferencev2 import Boltz2InferenceDataModule
    import platform

    # Set no grad
    torch.set_grad_enabled(False)

    # Ignore matmul precision warning
    torch.set_float32_matmul_precision("highest")

    # Set rdkit pickle logic
    Chem.SetDefaultPickleProperties(Chem.PropertyPickleOptions.AllProps)

    for key in ["CUEQ_DEFAULT_CONFIG", "CUEQ_DISABLE_AOT_TUNING"]:
        # Disable kernel tuning by default,
        # but do not modify envvar if already set by caller
        os.environ[key] = os.environ.get(key, "1")

    # Get MSA server credentials from environment variables if not provided
    if use_msa_server:
        if msa_server_username is None:
            msa_server_username = os.environ.get("BOLTZ_MSA_USERNAME")
        if msa_server_password is None:
            msa_server_password = os.environ.get("BOLTZ_MSA_PASSWORD")
        if api_key_value is None:
            api_key_value = os.environ.get("MSA_API_KEY_VALUE")

    # Create output directories
    data = Path(data).expanduser()
    out_dir = Path(out_dir).expanduser()
    out_dir = out_dir / f"boltz_results_{data.stem}"
    out_dir.mkdir(parents=True, exist_ok=True)

    # Validate inputs
    data = check_inputs(data)

    # Check method
    if method is not None:
        if model == "boltz1":
            msg = "Method conditioning is not supported for Boltz-1."
            raise ValueError(msg)
        if method.lower() not in const.method_types_ids:
            method_names = list(const.method_types_ids.keys())
            msg = f"Method {method} not supported. Supported: {method_names}"
            raise ValueError(msg)

    # Process inputs
    ccd_path = Path('/content/cache') / "ccd.pkl"
    mol_dir = Path('/content/cache') / "mols"
    process_inputs(
        data=data,
        out_dir=out_dir,
        ccd_path=ccd_path,
        mol_dir=mol_dir,
        use_msa_server=use_msa_server,
        msa_server_url=msa_server_url,
        msa_pairing_strategy=msa_pairing_strategy,
        msa_server_username=msa_server_username,
        msa_server_password=msa_server_password,
        api_key_header=api_key_header,
        api_key_value=api_key_value,
        boltz2=model == "boltz2",
        preprocessing_threads=preprocessing_threads,
        max_msa_seqs=max_msa_seqs,
    )

    # Load manifest
    manifest = Manifest.load(out_dir / "processed" / "manifest.json")

    # Filter out existing predictions
    filtered_manifest = filter_inputs_structure(
        manifest=manifest,
        outdir=out_dir,
        override=override,
    )

    # Load processed data
    processed_dir = out_dir / "processed"
    processed = BoltzProcessedInput(
        manifest=filtered_manifest,
        targets_dir=processed_dir / "structures",
        msa_dir=processed_dir / "msa",
        constraints_dir=(
            (processed_dir / "constraints")
            if (processed_dir / "constraints").exists()
            else None
        ),
        template_dir=(
            (processed_dir / "templates")
            if (processed_dir / "templates").exists()
            else None
        ),
        extra_mols_dir=(
            (processed_dir / "mols") if (processed_dir / "mols").exists() else None
        ),
    )

    # Set up trainer
    strategy = "auto"
    if (isinstance(devices, int) and devices > 1) or (
        isinstance(devices, list) and len(devices) > 1
    ):
        start_method = "fork" if platform.system() != "win32" and platform.system() != "Windows" else "spawn"
        strategy = DDPStrategy(start_method=start_method)
        if len(filtered_manifest.records) < devices:
            if isinstance(devices, list):
                devices = devices[: max(1, len(filtered_manifest.records))]
            else:
                devices = max(1, min(len(filtered_manifest.records), devices))

    # Create prediction writer
    pred_writer = BoltzWriter(
        data_dir=processed.targets_dir,
        output_dir=out_dir / "predictions",
        output_format=output_format,
        boltz2=model == "boltz2",
        write_embeddings=write_embeddings,
    )

    # Set up trainer
    trainer = Trainer(
        default_root_dir=out_dir,
        strategy=strategy,
        callbacks=[pred_writer],
        accelerator=accelerator,
        devices=devices,
        precision=32 if model == "boltz1" else "bf16-mixed",
    )

    if filtered_manifest.records:
        # Create data module
        data_module = Boltz2InferenceDataModule(
            manifest=processed.manifest,
            target_dir=processed.targets_dir,
            msa_dir=processed.msa_dir,
            mol_dir=mol_dir,
            num_workers=num_workers,
            constraints_dir=processed.constraints_dir,
            template_dir=processed.template_dir,
            extra_mols_dir=processed.extra_mols_dir,
            override_method=method,
        )

        # Compute structure predictions
        trainer.predict(
            model_instance,
            datamodule=data_module,
            return_predictions=False,
        )

In [6]:
#@title **Co-Fold our Target with the peptides using Boltz-2**

def fix_msa(msa_path: Path):
  rewrite = False
  with msa_path.open('r') as f:
    msa = list(f.readlines())
    if msa[-1] == '\x00':
      msa = msa[:-1]
      rewrite = True
  if rewrite:
    with msa_path.open('w') as f:
      for line in msa:
        f.write(line)

if len(target_sequence) == 491 and target_sequence.startswith('MCN') and target_sequence.endswith('YFP'):
  msa_path = str(WORKSHOP_DIRECTORY / 'data' / 'uniref_MDM2_full.a3m')
elif len(target_sequence) == 83 and target_sequence.startswith('TLV') and target_sequence.endswith('NLV'):
  msa_path = str(WORKSHOP_DIRECTORY / 'data' / 'uniref_MDM2.a3m')
else:
  msa_path = (WORKSHOP_DIRECTORY / 'projects' / PROJECT_ID / '1' / 'boltz_results_target' / 'msa' / 'target_unpaired_tmp_env' / 'uniref.a3m')
if msa_path:
  fix_msa(Path(msa_path))

def write_boltz_yaml(
    peptide_sequence: str,
    peptide_id: str,
    target_sequence: str = target_sequence,
    target_id: str = target_id,
    ) -> Path:

  data = f'''
sequences:
  - protein:
      id: [A1]
      sequence: {target_sequence}
      msa: {str(msa_path)}
  - protein:
      id: [B1]
      sequence: {peptide_sequence}
      msa: empty
'''
  yaml_path = STEP_PATH / 'boltz_yaml' / f'{target_id}-{peptide_id}.yaml'
  yaml_path.parent.mkdir(exist_ok=True, parents=True)
  with yaml_path.open('w') as f:
    f.write(str(data))
  return yaml_path

def fix_pdb(pdb_path: Path) -> Path:
  fixed_path = pdb_path.with_suffix(".fixed.pdb")
  with pdb_path.open('r') as f:
      with fixed_path.open('w') as g:
          for line in f:
              line = line[:22] + line[23:] #remove the 1 from the chain id # A1 -> A
              g.write(line)
  return fixed_path

#@title Run prediction using Boltz-2
output_format = 'pdb'
num_workers = 0
#@markdown Lower the step scale to increase the diversity of result. (default: 1.638)
step_scale = 1.638 #@param {type:"slider", min:1, max:2, step:0.001}
#@markdown Number of diffusion samples to be generated. (default: 1, AlphaFold3: 5)
diffusion_samples = 1 #@param {type:"slider", min:1, max:10, step:1}
#@markdown Number of recycling steps for the prediction. (default: 3, AlphaFold3: 10)
recycling_steps = 3 #@param {type:"slider", min:1, max:25, step:1}
#@markdown Number of sampling steps for structure prediction. (default: 200)
sampling_steps = 50 #@param {type:"slider", min:50, max:400, step:50}
#@markdown Maximum number of MSA sequences to be used
max_msa_seqs = 8192 #@param [32, 64, 128, 256, 512, 1024, 2048, 4096, 8192]
#@markdown Subsample MSA?
subsample_msa = False #@param {type:"boolean"}
#@markdown Number of subsampled MSA
num_subsampled_msa = 1024 #@param [4, 8, 16, 32, 64, 128, 256, 512, 1024]
#@markdown MSA pairing strategy
msa_pairing_strategy = 'greedy' #@param ['greedy', 'complete']

try:
  model_instance
except NameError:
  model_instance = load_boltz_model(
    recycling_steps = recycling_steps,

    sampling_steps = sampling_steps,
    diffusion_samples = diffusion_samples,
    write_full_pae = True,
    write_full_pde = True,
    step_scale = step_scale
)

for i, row in tqdm(df.iterrows()):
  peptide_id = row['peptide_id']
  structure_path = STEP_PATH / 'boltz_results' / f'boltz_results_{target_id}-{peptide_id}' / 'predictions' / f'{target_id}-{peptide_id}' / f'{target_id}-{peptide_id}_model_0.pdb'
  if structure_path.exists():
    continue
  yaml_path = write_boltz_yaml(
      peptide_sequence = row['peptide_sequence'],
      peptide_id = row['peptide_id'],
      target_sequence = target_sequence,
      target_id = target_id,
  )

  predict(data = str(yaml_path),
    out_dir = str(STEP_PATH / 'boltz_results'),
    model_instance = model_instance,
    use_msa_server = False,
    output_format = output_format,
    num_workers = 4,
    override = True,
    max_msa_seqs = max_msa_seqs,
    msa_pairing_strategy = msa_pairing_strategy,
  )

Downloading the CCD data to /content/cache/mols.tar. This may take a bit of time. You may change the cache directory with the --cache flag.
Extracting the CCD data to /content/cache/mols. This may take a bit of time. You may change the cache directory with the --cache flag.
Downloading the Boltz-2 weights to /content/cache/boltz2_conf.ckpt. You may change the cache directory with the --cache flag.
Downloading the Boltz-2 affinity weights to /content/cache/boltz2_aff.ckpt. You may change the cache directory with the --cache flag.


0it [00:00, ?it/s]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  6.45it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


1it [00:21, 21.98s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 11.26it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


2it [00:32, 15.40s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.59it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


3it [00:43, 13.13s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  8.69it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


4it [00:53, 11.88s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  6.71it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


5it [01:03, 11.44s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  6.24it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


6it [01:14, 11.14s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.68it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


7it [01:25, 11.07s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.74it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


8it [01:35, 10.90s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.51it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


9it [01:46, 10.69s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  7.33it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


10it [01:56, 10.62s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.33it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


11it [02:07, 10.59s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.49it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


12it [02:17, 10.64s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  9.00it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


13it [02:28, 10.71s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.56it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


14it [02:38, 10.54s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  5.90it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


15it [02:49, 10.58s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  9.15it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


16it [02:59, 10.54s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  8.55it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


17it [03:10, 10.68s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.26it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


18it [03:21, 10.68s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  7.89it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


19it [03:32, 10.60s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  6.83it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


20it [03:42, 10.63s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  8.36it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


21it [03:53, 10.59s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  7.63it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


22it [04:05, 11.18s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.58it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


23it [04:16, 11.16s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  8.05it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


24it [04:27, 11.00s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  6.82it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


25it [04:37, 10.81s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  5.86it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


26it [04:48, 10.80s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  9.32it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


27it [04:59, 10.78s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  6.76it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


28it [05:10, 10.91s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.


100%|██████████| 1/1 [00:00<00:00, 10.27it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


29it [05:21, 10.88s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  3.18it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


30it [05:32, 10.91s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  4.17it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


31it [05:43, 10.93s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  7.36it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


32it [05:54, 10.99s/it]

Checking input data.
Processing 1 inputs with 1 threads.



  0%|          | 0/1 [00:00<?, ?it/s][A

Found explicit empty MSA for some proteins, will run these in single sequence mode. Keep in mind that the model predictions will be suboptimal without an MSA.



100%|██████████| 1/1 [00:00<00:00,  7.66it/s]
INFO:pytorch_lightning.utilities.rank_zero:Using bfloat16 Automatic Mixed Precision (AMP)
INFO:pytorch_lightning.utilities.rank_zero:💡 Tip: For seamless cloud uploads and versioning, try installing [litmodels](https://pypi.org/project/litmodels/) to enable LitModelCheckpoint, which syncs automatically with the Lightning model registry.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: True (cuda), used: True
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.accelerators.cuda:LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Predicting: |          | 0/? [00:00<?, ?it/s]

Number of failed examples: 0


33it [06:05, 11.08s/it]


In [7]:
target_id = f'tgt_{PROJECT_ID}'

#@markdown Enter target residues, comma separated. Eg. 12,13,14
TARGET_RESIDUES = '68,29,36,50,74,33' #@param {type:"string"}
TARGET_RESIDUES = [int(i.strip()) for i in TARGET_RESIDUES.split(",")]
#@markdown ---
RANK_ON = "scaled_score" #@param ["n_contacts", "pae_mean", "n_contacts_scaled", "pae_mean_scaled", "scaled_score"]


def get_structure_path(peptide_id: str) -> str:
  structure_path = STEP_PATH / 'boltz_results' / f'boltz_results_{target_id}-{peptide_id}' / 'predictions' / f'{target_id}-{peptide_id}' / f'{target_id}-{peptide_id}_model_0.pdb'
  fix_pdb(structure_path)
  return structure_path.with_suffix('.fixed.pdb')

def get_pae_path(peptide_id: str) -> str:
  structure_path = get_structure_path(peptide_id)
  pae_path = structure_path.parent / f'pae_{structure_path.name.split(".")[0]}.npz'
  return pae_path

def analyse_structure(structure_path: Path):
  structure_df = PandasPdb().read_pdb(str(structure_path)).df['ATOM']
  target_structure = structure_df[structure_df['chain_id'] == 'A']
  target_structure = target_structure[target_structure['residue_number'].isin(TARGET_RESIDUES)]
  peptide_structure = structure_df[structure_df['chain_id'] == 'B']
  target_coords = target_structure[['x_coord','y_coord','z_coord']].to_numpy()
  peptide_coords = peptide_structure[['x_coord','y_coord','z_coord']].to_numpy()

  dist_arr = cdist(target_coords, peptide_coords)
  contacts = np.where(dist_arr <= 5.0)[1]
  n_contacts = contacts.shape[0]
  return n_contacts

def analyse_pae(pae_path:Path, target_sequence: str = target_sequence) -> float:
  pae = np.load(pae_path)['pae']

  target_indices = [i-1 for i in TARGET_RESIDUES]
  pae_1 = pae[target_indices, len(target_sequence):]
  pae_2 = pae[len(target_sequence):, target_indices]
  return (np.mean(pae_1) + np.mean(pae_2)) / 2

df['structure_path'] = df['peptide_id'].apply(get_structure_path)
df['pae_path'] = df['peptide_id'].apply(get_pae_path)

df['n_contacts'] = df['structure_path'].apply(analyse_structure)
df['pae_mean'] = df['pae_path'].apply(analyse_pae)

from sklearn.preprocessing import StandardScaler
def apply_standard_scaler(arr):
  arr = arr.to_numpy().reshape(-1,1)
  scaler = StandardScaler()
  return scaler.fit_transform(arr).flatten()

df['n_contacts_scaled'] = apply_standard_scaler(df['n_contacts'])
df['pae_mean_scaled'] = apply_standard_scaler(df['pae_mean'] * -1) # A low PAE is good. Multiply by -1 so that a high scaled value is good, like with n_contacts
df['scaled_score'] = df[['n_contacts_scaled','pae_mean_scaled']].mean(axis=1)
df = df.sort_values(
    RANK_ON,
    ascending=True if RANK_ON == 'pae_mean' else False
    ).reset_index(drop = True)
df[['peptide_id', 'peptide_sequence', 'n_contacts', 'n_contacts_scaled', 'pae_mean', 'pae_mean_scaled', 'scaled_score']]

Unnamed: 0,peptide_id,peptide_sequence,n_contacts,n_contacts_scaled,pae_mean,pae_mean_scaled,scaled_score
0,rfe_3d6becf,SLNALWQQQQ,123,1.690707,2.181496,0.369326,1.030016
1,exp_p53,SQETFSDLWKLLPEN,114,1.345356,1.52558,0.57843,0.961893
2,rfp_a182988,SSHWLQTLQS,112,1.268612,2.180602,0.369611,0.819111
3,rfe_f13a56a,PEQQDWNDRR,107,1.07675,1.616639,0.549401,0.813075
4,rfe_275bcd9,QIENNMHQAI,94,0.57791,1.879106,0.465727,0.521818
5,rfe_262985b,MLFDVLKQMK,89,0.386049,1.510678,0.58318,0.484615
6,rfp_3c6b025,EEALYDVKTS,85,0.232559,1.791348,0.493704,0.363132
7,rfe_5ae7318,AIRALIARLQ,85,0.232559,1.944942,0.444739,0.338649
8,rfp_eb1a1cd,VDEELRKQQQ,87,0.309304,2.270447,0.340968,0.325136
9,rfp_e0c1341,KLEIIRQLLK,83,0.155815,1.793488,0.493021,0.324418


In [8]:
#@title Visualize Top Peptides {run: "auto"}
RANK = "0" #@param {type:"string"}
RANK = int(RANK)

import py3Dmol
from string import ascii_uppercase, ascii_lowercase

pymol_color_list = ["#33ff33","#00ffff","#ff33cc","#ffff00","#ff9999","#e5e5e5","#7f7fff","#ff7f00",
                    "#7fff7f","#199999","#ff007f","#ffdd5e","#8c3f99","#b2b2b2","#007fff","#c4b200",
                    "#8cb266","#00bfbf","#b27f7f","#fcd1a5","#ff7f7f","#ffbfdd","#7fffff","#ffff7f",
                    "#00ff7f","#337fcc","#d8337f","#bfff3f","#ff7fff","#d8d8ff","#3fffbf","#b78c4c",
                    "#339933","#66b2b2","#ba8c84","#84bf00","#b24c66","#7f7f7f","#3f3fa5","#a5512b"]
alphabet_list = list(ascii_uppercase+ascii_lowercase)

def show_pdb(pdb_str, show_sidechains=False, show_mainchains=False,
             color="pLDDT", chains=2, vmin=50, vmax=90,
             size=(800,480), hbondCutoff=4.0,
             Ls=None,
             animate=False):

  structure_format = 'pdb'


  view = py3Dmol.view(js='https://3Dmol.org/build/3Dmol-min.js', width=size[0], height=size[1])
  if animate:
    view.addModelsAsFrames(pdb_str, structure_format,{'hbondCutoff':hbondCutoff})
  else:
    view.addModel(pdb_str, structure_format) #, {'hbondCutoff':hbondCutoff})


  if color == "pLDDT":
    view.setStyle({'cartoon': {'colorscheme': {'prop':'b','gradient': 'roygb','min':vmin,'max':vmax}}})
  elif color == "rainbow":
    view.setStyle({'cartoon': {'color':'spectrum'}})
  elif color == "chain":
    for n,chain,color in zip(range(chains),alphabet_list,pymol_color_list):
       view.setStyle({'chain':chain},{'cartoon': {'color':color}})

  if show_sidechains:
    BB = ['C','O','N']
    view.addStyle({'and':[{'resn':["GLY","PRO"],'invert':True},{'atom':BB,'invert':True}]},
                  {'stick':{'colorscheme':f"WhiteCarbon",'radius':0.3}})
    view.addStyle({'and':[{'resn':"GLY"},{'atom':'CA'}]},
                  {'sphere':{'colorscheme':f"WhiteCarbon",'radius':0.3}})
    view.addStyle({'and':[{'resn':"PRO"},{'atom':['C','O'],'invert':True}]},
                  {'stick':{'colorscheme':f"WhiteCarbon",'radius':0.3}})

  if show_mainchains:
    BB = ['C','O','N','CA']
    view.addStyle({'atom':BB},{'stick':{'colorscheme':f"WhiteCarbon",'radius':0.3}})

  color_scheme = {'prop':'b','gradient': 'rwb','min':0,'max':100}
  for residue_number in TARGET_RESIDUES:
      view.addStyle({'and':[{'chain':'A'},{'resi':residue_number},{'resn':["GLY","PRO"],'invert':True},{'atom':['C','O','N'],'invert':True}]},
                    {'stick':{'colorscheme':color_scheme,'radius':0.3}})
      view.addStyle({'and':[{'chain':'A'},{'resi':residue_number},{'resn':"GLY"},{'atom':'CA'}]},
                    {'sphere':{'colorscheme':color_scheme,'radius':0.3}})
      view.addStyle({'and':[{'chain':'A'},{'resi':residue_number},{'resn':"PRO"},{'atom':['C','O'],'invert':True}]},
                    {'stick':{'colorscheme':color_scheme,'radius':0.3}})


  view.zoomTo()

  view.setHoverable(
    {},
    True,
    '''function(atom,viewer,event,container) {
        if(!atom.label) {
        atom.label = viewer.addLabel(atom.resn + atom.resi,{position: atom, backgroundColor: 'mintcream', fontColor:'black'});
        }}''',
    '''function(atom,viewer) {
        if(atom.label) {
        viewer.removeLabel(atom.label);
        delete atom.label;
        }
    }''',
    viewer=(0, 1)
  )

  if animate: view.animate()
  return view

color = "chain" #@param ["confidence", "rainbow", "chain"]
if color == "confidence": color = "pLDDT"
show_sidechains = False #@param {type:"boolean"}
show_mainchains = False #@param {type:"boolean"}

with Path(df.iloc[RANK]['structure_path']).open('r') as f:
  pdb_str = f.read()

show_pdb(pdb_str,
         color=color,
         show_sidechains=show_sidechains,
         show_mainchains=show_mainchains,
         ).show()


### Notebook Summary
- We co-folded MDM2 with our candidate peptides using Boltz-2.
- Extracted binding metrics (contacts, iPAE, pLDDT).
- Ranked peptides based on their predicted binding quality.
- Identified a top candidate for MDM2 binding.

➡️ End of workflow: We now have a full pipeline from target analysis → peptide design → co-folding → evaluation.
