In [1]:
%load_ext autoreload
%autoreload 2

In [3]:
from scripts import predict
from scripts import util
from scripts import mmseqs2

import random
import os

from absl import logging
logging.set_verbosity(logging.DEBUG)

In [4]:
jobname = 'T4_lysozyme'
sequence = ("MNIFEMLRIDEGLRLKIYKDTEGYYTIGIGHLLTKSPSLNAAKSELDKAIGRNCNGVIT"
            "KDEAEKLFNQDVDAAVRGILRNAKLKPVYDSLDAVRRCALINMVFQMGETGVAGFTNSL"
            "RMLQQKRWDEAAVNLAKSRWYNQTPNRAKRVITTFRTGTWDAYKNL" )

# PDB IDs, written uppercase with chain ID specified
pdbs = []

# Hyperparameters
n_models = 1
msa_depths = [32]

In [7]:
# The MMSeqs2Runner object submits the amino acid sequence to
# the MMSeqs2 server, generates a directory, and populates it with
# data retrieved from the server. Templates may be specified by the user.
# All templates are fetched if none are provided or the list is empty.
mmseqs2_runner = mmseqs2.MMSeqs2Runner( jobname, sequence )

# Fetch sequences and download data
a3m_lines1, _ = mmseqs2_runner.run_job( templates = pdbs )
a3m_lines2, _ = mmseqs2_runner.run_job( templates = pdbs )

DEBUG:absl:ID: hruUTlhiL4nBtw-UNTrXC5D1FvAONwQ_iUJq5g
INFO:absl:seq	pdb	cid	evalue
INFO:absl:seq	pdb	cid	evalue


In [6]:
# A nested loop in which 5 models are generated per MSA depth value
# In our manuscript we use three MSA depths: 32 sequences, 128, and 5120
for nseq in msa_depths:
  for n_model in range( n_models ):

    # Randomly choose one of the two AlphaFold neural
    # networks capable of using templates.
    # In our experience, model 1 is more sensitive to input templates.
    # However, this observation is purely anecdotal and not backed up by hard numbers.
    model_id = 2

    # Specify the name of the output PDB
    outname = f"{ n_model }_{ nseq }.pdb"
  
    # Run the job and save as a PDB
    '''
    predict.predict_structure_from_templates(
        mmseqs2_runner.seq, # NOTE mmseqs2_runner removes whitespace from seq
        outname,
        a3m_lines,
        template_path = template_path,
        model_id = model_id,
        max_msa_clusters = nseq // 2,
        max_extra_msa = nseq,
        max_recycles = 1
    )
    '''

    # Alternatively, users can run a template-free prediction by uncommenting
    # the line below:
    predict.predict_structure_no_templates(
        sequence,
        outname,
        a3m_lines,
        model_id = model_id,
        max_msa_clusters = nseq // 2,
        max_extra_msa = nseq,
        max_recycles = 1
    )

2024-09-14 14:52:39.478496: W external/xla/xla/service/gpu/nvptx_compiler.cc:836] The NVIDIA driver's CUDA version is 12.2 which is older than the PTX compiler version (12.6.68). Because the driver is older than the PTX compiler version, XLA is disabling parallel compilation, which may slow down compilation. You should update your NVIDIA driver or use the NVIDIA-provided CUDA forward compatibility packages.
DEBUG:absl:Prediction parameters:
DEBUG:absl:	Model ID: 2
DEBUG:absl:	Using templates: False
DEBUG:absl:	Maximum MSA clusters: 16
DEBUG:absl:	Maximum extra MSA clusters: 32
DEBUG:absl:	Number recycling iterations: 1
DEBUG:absl:	Number of structure module repeats: 8
INFO:absl:Running predict with shape(feat) = {'aatype': (2, 164), 'residue_index': (2, 164), 'seq_length': (2,), 'is_distillation': (2,), 'seq_mask': (2, 164), 'msa_mask': (2, 16, 164), 'msa_row_mask': (2, 16), 'random_crop_to_size_seed': (2, 2), 'atom14_atom_exists': (2, 164, 14), 'residx_atom14_to_atom37': (2, 164, 14),