In [None]:
import os
import subprocess
from IPython.utils import io
from IPython import get_ipython

In [None]:
#@title Download RGN2

#@markdown Please execute this cell by pressing the *Play* button on
#@markdown the left.
#@markdown Kernel will restart automatically after execution.

import os
import sys
import subprocess
from IPython.utils import io

WORKDIR = '/content/rgn2'
GIT_REPO = 'https://github.com/aqlaboratory/rgn2'
REQUIREMENTS = os.path.join(WORKDIR, 'requirements.txt')
RGN2_PARAM_SOURCE_URL = 'https://huggingface.co/christinafl/rgn2'
RGN2_PARAMS_DIR = os.path.join(WORKDIR, 'resources')
REFINER_DIR = os.path.join(WORKDIR, 'ter2pdb')
REFINER_PATH = os.path.join(REFINER_DIR, 'ModRefiner-l.zip')
REFINER_URL = 'https://zhanggroup.org/ModRefiner/ModRefiner-l.zip'

try:
  with io.capture_output() as captured:
    %cd '/content'

    %shell rm -rf {WORKDIR}
    %shell git clone {GIT_REPO} {WORKDIR}

    # Install the required versions of all dependencies.
    %shell pip install -r {REQUIREMENTS}

    %shell git lfs clone "{RGN2_PARAM_SOURCE_URL}" "{RGN2_PARAMS_DIR}"

    %shell wget -O {REFINER_PATH} {REFINER_URL}
    %shell unzip -o {REFINER_PATH} -d {REFINER_DIR}
    %shell rm {REFINER_PATH}
except subprocess.CalledProcessError:
  print(captured)
  raise

print("🔁 Restarting kernel after installation...")
get_ipython().kernel.do_shutdown(True)

In [None]:
#@title Import Python packages
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.

%cd '/content/rgn2'

import os
import sys
import subprocess
from google.colab import files

%reload_ext autoreload
%autoreload 2
%tensorflow_version 1.x

sys.path.append(os.path.join(os.getcwd(), 'aminobert'))
from aminobert.prediction import aminobert_predict_sequence
from data_processing.aminobert_postprocessing import aminobert_postprocess
from ter2pdb import ter2pdb


DATA_DIR = 'aminobert_output'
%shell mkdir -p aminobert_output

DATASET_NAME = '1'
PREPEND_M = True
AMINOBERT_CHKPT_DIR = 'resources/aminobert_checkpoint/AminoBERT_runs_v2_uniparc_dataset_v2_5-1024_fresh_start_model.ckpt-1100000'
RGN_CHKPT_DIR = 'resources/rgn2_runs/15106000'
RUN_DIR = 'runs/15106000'

In [None]:
#@markdown ### Enter the amino acid sequence to fold ⬇️
seq_id = 'T1071'  #@param {type:"string"}
sequence = 'MQSLAKLLVIEDDAAIRLNLSVILEFVGEQCEVIESTQIDQINWSAVWGGCILGSLRGQALSEQLIQSLTKANHIPLLVANKQPYSLEEFPNYVGELDFPLNYPQLSDALRHCKEFLGRKGFQVLATARKNTLFRSLVGQSMGIQEVRHLIEQVSTTEANVLILGESGTGKEVVARNIHYHSGRRNGPFVPINCGAIPAELLESELFGHEKGAFTGAITARKGRFELAEGGTLFLDEIGDMPMSMQVKLLRVLQERCFERVGGNSTIKANVRVIAATHRNLEEMIDGQKFREDLYYRLNVFPIEMPALRDRIDDIPLLLQELMTRMEAEGAQPICFTPRAINSMMEHDWPGNVRELANLVERMVILYPNSLVDVNHLPTKYRYSDIPEFQPEPSRFSSVEEQERDVLEGIFAEDFNFEEPQEFVPDIDAPQALPPEGVNLKELLADLEVNLINQALEAQGGVVARAADMLGMRRTTLVEKMRKYNMQR'  #@param {type:"string"}

MAX_SEQUENCE_LENGTH = 1023

# Remove all whitespaces, tabs and end lines; upper-case
sequence = sequence.translate(str.maketrans('', '', ' \n\t')).upper()
aatypes = set('ACDEFGHIKLMNPQRSTVWY')  # 20 standard aatypes
if not set(sequence).issubset(aatypes):
  raise Exception(f'Input sequence contains non-amino acid letters: {set(sequence) - aatypes}. AlphaFold only supports 20 standard amino acids as inputs.')
if len(sequence) > MAX_SEQUENCE_LENGTH:
  raise Exception(f'Input sequence is too long: {len(sequence)} amino acids, while the maximum is {MAX_SEQUENCE_LENGTH}. Please use the full AlphaFold system for long sequences.')

seq_path = os.path.join(DATA_DIR, f'{seq_id}.fa')
ter_path = os.path.join(RUN_DIR, '1', 'outputsTesting', f'{seq_id}.tertiary')
output_dir = os.path.join(RUN_DIR, 'output')
os.makedirs(output_dir, exist_ok=True)

In [None]:
#@title Generate Aminobert Embeddings
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.

aminobert_predict_sequence(seq=sequence, header=seq_id, prepend_m=PREPEND_M,
                           checkpoint=AMINOBERT_CHKPT_DIR, data_dir=DATA_DIR)
aminobert_postprocess(data_dir=DATA_DIR, dataset_name=DATASET_NAME, prepend_m=PREPEND_M)

In [None]:
#@title Run RGN2
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.
cmd = f"python rgn/protling.py {os.path.join(RGN_CHKPT_DIR, 'configuration')} -p -e 'weighted_testing' -a -g 0"
rgn_proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=True)

if rgn_proc.returncode != 0:
    print(rgn_proc.stdout.decode('UTF-8'))
    raise Exception('Prediction failed!')

print('Prediction completed!')
ter2pdb.predicted_ter2pdb(seq_path=seq_path, ter_path=ter_path,
                          output_dir=output_dir, seq_id=seq_id)
files.download(os.path.join(output_dir, f'{seq_id}_{ter2pdb.CA_TRACE_FNAME}'))

In [None]:
#@title Refinement
#@markdown Please execute this cell by pressing the *Play* button on 
#@markdown the left.

REFINEMENT_TIMEOUT=420

seq_path = os.path.join(DATA_DIR, f'{seq_id}.fa')
ter_path = os.path.join(RUN_DIR, '1', 'outputsTesting', f'{seq_id}.tertiary')
ter2pdb.run(seq_path=seq_path, ter_path=ter_path,
            output_dir=output_dir, timeout=REFINEMENT_TIMEOUT, seq_id=seq_id)

print('Refinement completed!')

files.download(os.path.join(output_dir,
                            f'{seq_id}_{ter2pdb.EMPR_CA_TRACE_FNAME}'))