# OmegaFold structure prediction

Installation of necessary software:

In [1]:
from IPython.utils import io

In [2]:
with io.capture_output() as captured:
    !pip install -q torch
    !conda install -y requests
    !conda install -y biopython
    !conda install -y pandas
    !pip install -q git+https://github.com/jvkersch/tmtools.git
    !conda install -y Prody
    !git clone --branch beta --quiet https://github.com/sokrypton/OmegaFold.git
    !pip -q install py3Dmol
    !apt-get install aria2 -qq > /dev/null
    !aria2c -q -x 16 https://helixon.s3.amazonaws.com/release1.pt
    !mkdir -p ~/.cache/omegafold_ckpt
    !mv release1.pt ~/.cache/omegafold_ckpt/model.pt

print(captured.stdout if ("Error" in captured.stdout) or ("ERROR" in captured.stdout) else "Installation successful")

Installation successful


In [3]:
import os, sys, re, torch, requests, json, Bio
import pandas as pd

## Load `proteins` and `chains` dataframes from .csv files

In [4]:
proteins = pd.read_csv('proteins/proteins.csv')
proteins

Unnamed: 0,pdb_id,label,original_pdb_path,original_fasta_path,unique_chains,inferred_fasta_path,masks_path
0,8XPV,monomer,proteins/original_pdb_files/8XPV.pdb,proteins/original_fasta_files/8XPV.fasta,['8XPV:A'],proteins/inferred_fasta_files/8XPV.fasta,proteins/mask_files/8XPV_mask.fasta
1,8GQ4,monomer,proteins/original_pdb_files/8GQ4.pdb,proteins/original_fasta_files/8GQ4.fasta,['8GQ4:A'],proteins/inferred_fasta_files/8GQ4.fasta,proteins/mask_files/8GQ4_mask.fasta
2,8TIF,monomer,proteins/original_pdb_files/8TIF.pdb,proteins/original_fasta_files/8TIF.fasta,['8TIF:A'],proteins/inferred_fasta_files/8TIF.fasta,proteins/mask_files/8TIF_mask.fasta
3,8H3Z,monomer,proteins/original_pdb_files/8H3Z.pdb,proteins/original_fasta_files/8H3Z.fasta,['8H3Z:A'],proteins/inferred_fasta_files/8H3Z.fasta,proteins/mask_files/8H3Z_mask.fasta
4,8ALL,monomer,proteins/original_pdb_files/8ALL.pdb,proteins/original_fasta_files/8ALL.fasta,['8ALL:A'],proteins/inferred_fasta_files/8ALL.fasta,proteins/mask_files/8ALL_mask.fasta
...,...,...,...,...,...,...,...
744,8HNE,synthetic,proteins/original_pdb_files/8HNE.pdb,proteins/original_fasta_files/8HNE.fasta,['8HNE:A'],proteins/inferred_fasta_files/8HNE.fasta,proteins/mask_files/8HNE_mask.fasta
745,8FIN,synthetic,proteins/original_pdb_files/8FIN.pdb,proteins/original_fasta_files/8FIN.fasta,['8FIN:A'],proteins/inferred_fasta_files/8FIN.fasta,proteins/mask_files/8FIN_mask.fasta
746,8J0A,synthetic,proteins/original_pdb_files/8J0A.pdb,proteins/original_fasta_files/8J0A.fasta,['8J0A:A'],proteins/inferred_fasta_files/8J0A.fasta,proteins/mask_files/8J0A_mask.fasta
747,8HDU,synthetic,proteins/original_pdb_files/8HDU.pdb,proteins/original_fasta_files/8HDU.fasta,['8HDU:A'],proteins/inferred_fasta_files/8HDU.fasta,proteins/mask_files/8HDU_mask.fasta


In [5]:
chains = pd.read_csv('proteins/chains.csv')
chains

Unnamed: 0,pdb_id,label,original_pdb_path,original_fasta_path,chain_id,inferred_fasta_path,masks_path
0,8XPV,monomer,proteins/original_pdb_files/8XPV.pdb,proteins/original_fasta_files/8XPV.fasta,8XPV:A,proteins/inferred_fasta_files/8XPV.fasta,proteins/mask_files/8XPV_mask.fasta
1,8GQ4,monomer,proteins/original_pdb_files/8GQ4.pdb,proteins/original_fasta_files/8GQ4.fasta,8GQ4:A,proteins/inferred_fasta_files/8GQ4.fasta,proteins/mask_files/8GQ4_mask.fasta
2,8TIF,monomer,proteins/original_pdb_files/8TIF.pdb,proteins/original_fasta_files/8TIF.fasta,8TIF:A,proteins/inferred_fasta_files/8TIF.fasta,proteins/mask_files/8TIF_mask.fasta
3,8H3Z,monomer,proteins/original_pdb_files/8H3Z.pdb,proteins/original_fasta_files/8H3Z.fasta,8H3Z:A,proteins/inferred_fasta_files/8H3Z.fasta,proteins/mask_files/8H3Z_mask.fasta
4,8ALL,monomer,proteins/original_pdb_files/8ALL.pdb,proteins/original_fasta_files/8ALL.fasta,8ALL:A,proteins/inferred_fasta_files/8ALL.fasta,proteins/mask_files/8ALL_mask.fasta
...,...,...,...,...,...,...,...
1042,8HNE,synthetic,proteins/original_pdb_files/8HNE.pdb,proteins/original_fasta_files/8HNE.fasta,8HNE:A,proteins/inferred_fasta_files/8HNE.fasta,proteins/mask_files/8HNE_mask.fasta
1043,8FIN,synthetic,proteins/original_pdb_files/8FIN.pdb,proteins/original_fasta_files/8FIN.fasta,8FIN:A,proteins/inferred_fasta_files/8FIN.fasta,proteins/mask_files/8FIN_mask.fasta
1044,8J0A,synthetic,proteins/original_pdb_files/8J0A.pdb,proteins/original_fasta_files/8J0A.fasta,8J0A:A,proteins/inferred_fasta_files/8J0A.fasta,proteins/mask_files/8J0A_mask.fasta
1045,8HDU,synthetic,proteins/original_pdb_files/8HDU.pdb,proteins/original_fasta_files/8HDU.fasta,8HDU:A,proteins/inferred_fasta_files/8HDU.fasta,proteins/mask_files/8HDU_mask.fasta


## Predict the structures

Prepare directory:

In [6]:
OF_PREDICTED_PDB_FILES_DIRECTORY = "proteins/omegafold_prediction_pdb_files"
!mkdir {OF_PREDICTED_PDB_FILES_DIRECTORY}

In [7]:
chains["OF_prediction_pdb_path"] = chains.apply(lambda row: f"{OF_PREDICTED_PDB_FILES_DIRECTORY}/{row['chain_id']}.pdb", axis = 1)
chains

Unnamed: 0,pdb_id,label,original_pdb_path,original_fasta_path,chain_id,inferred_fasta_path,masks_path,OF_prediction_pdb_path
0,8XPV,monomer,proteins/original_pdb_files/8XPV.pdb,proteins/original_fasta_files/8XPV.fasta,8XPV:A,proteins/inferred_fasta_files/8XPV.fasta,proteins/mask_files/8XPV_mask.fasta,proteins/omegafold_prediction_pdb_files/8XPV:A...
1,8GQ4,monomer,proteins/original_pdb_files/8GQ4.pdb,proteins/original_fasta_files/8GQ4.fasta,8GQ4:A,proteins/inferred_fasta_files/8GQ4.fasta,proteins/mask_files/8GQ4_mask.fasta,proteins/omegafold_prediction_pdb_files/8GQ4:A...
2,8TIF,monomer,proteins/original_pdb_files/8TIF.pdb,proteins/original_fasta_files/8TIF.fasta,8TIF:A,proteins/inferred_fasta_files/8TIF.fasta,proteins/mask_files/8TIF_mask.fasta,proteins/omegafold_prediction_pdb_files/8TIF:A...
3,8H3Z,monomer,proteins/original_pdb_files/8H3Z.pdb,proteins/original_fasta_files/8H3Z.fasta,8H3Z:A,proteins/inferred_fasta_files/8H3Z.fasta,proteins/mask_files/8H3Z_mask.fasta,proteins/omegafold_prediction_pdb_files/8H3Z:A...
4,8ALL,monomer,proteins/original_pdb_files/8ALL.pdb,proteins/original_fasta_files/8ALL.fasta,8ALL:A,proteins/inferred_fasta_files/8ALL.fasta,proteins/mask_files/8ALL_mask.fasta,proteins/omegafold_prediction_pdb_files/8ALL:A...
...,...,...,...,...,...,...,...,...
1042,8HNE,synthetic,proteins/original_pdb_files/8HNE.pdb,proteins/original_fasta_files/8HNE.fasta,8HNE:A,proteins/inferred_fasta_files/8HNE.fasta,proteins/mask_files/8HNE_mask.fasta,proteins/omegafold_prediction_pdb_files/8HNE:A...
1043,8FIN,synthetic,proteins/original_pdb_files/8FIN.pdb,proteins/original_fasta_files/8FIN.fasta,8FIN:A,proteins/inferred_fasta_files/8FIN.fasta,proteins/mask_files/8FIN_mask.fasta,proteins/omegafold_prediction_pdb_files/8FIN:A...
1044,8J0A,synthetic,proteins/original_pdb_files/8J0A.pdb,proteins/original_fasta_files/8J0A.fasta,8J0A:A,proteins/inferred_fasta_files/8J0A.fasta,proteins/mask_files/8J0A_mask.fasta,proteins/omegafold_prediction_pdb_files/8J0A:A...
1045,8HDU,synthetic,proteins/original_pdb_files/8HDU.pdb,proteins/original_fasta_files/8HDU.fasta,8HDU:A,proteins/inferred_fasta_files/8HDU.fasta,proteins/mask_files/8HDU_mask.fasta,proteins/omegafold_prediction_pdb_files/8HDU:A...


Predict the structure of all the chains in the `chains` dataframe:

In [8]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for index, row in proteins.iterrows():
    fasta_path = row["inferred_fasta_path"]
    with io.capture_output() as captured:
        !python OmegaFold/main.py --device={device} {fasta_path} {OF_PREDICTED_PDB_FILES_DIRECTORY}
    if "Error" in captured.stdout:
        chains.loc[chains["pdb_id"] == row["pdb_id"], "OF_prediction_pdb_path"] = None
        print(f"Prediction of structure {row['pdb_id']} was unsuccessful.")
    elif (index + 1) % 25 == 0:
        print(f"Successfully predicted the structure of {index + 1} proteins.")

chains

Successfully predicted the structure of 25 proteins.
Successfully predicted the structure of 50 proteins.
Successfully predicted the structure of 75 proteins.
Successfully predicted the structure of 100 proteins.
Successfully predicted the structure of 125 proteins.
Successfully predicted the structure of 150 proteins.
Successfully predicted the structure of 175 proteins.
Successfully predicted the structure of 200 proteins.
Successfully predicted the structure of 225 proteins.
Successfully predicted the structure of 250 proteins.
Successfully predicted the structure of 275 proteins.
Successfully predicted the structure of 300 proteins.
Successfully predicted the structure of 325 proteins.
Successfully predicted the structure of 350 proteins.
Successfully predicted the structure of 375 proteins.
Successfully predicted the structure of 400 proteins.
Successfully predicted the structure of 425 proteins.
Successfully predicted the structure of 450 proteins.
Successfully predicted the stru

Unnamed: 0,pdb_id,label,original_pdb_path,original_fasta_path,chain_id,inferred_fasta_path,masks_path,OF_prediction_pdb_path
0,8XPV,monomer,proteins/original_pdb_files/8XPV.pdb,proteins/original_fasta_files/8XPV.fasta,8XPV:A,proteins/inferred_fasta_files/8XPV.fasta,proteins/mask_files/8XPV_mask.fasta,proteins/omegafold_prediction_pdb_files/8XPV:A...
1,8GQ4,monomer,proteins/original_pdb_files/8GQ4.pdb,proteins/original_fasta_files/8GQ4.fasta,8GQ4:A,proteins/inferred_fasta_files/8GQ4.fasta,proteins/mask_files/8GQ4_mask.fasta,proteins/omegafold_prediction_pdb_files/8GQ4:A...
2,8TIF,monomer,proteins/original_pdb_files/8TIF.pdb,proteins/original_fasta_files/8TIF.fasta,8TIF:A,proteins/inferred_fasta_files/8TIF.fasta,proteins/mask_files/8TIF_mask.fasta,proteins/omegafold_prediction_pdb_files/8TIF:A...
3,8H3Z,monomer,proteins/original_pdb_files/8H3Z.pdb,proteins/original_fasta_files/8H3Z.fasta,8H3Z:A,proteins/inferred_fasta_files/8H3Z.fasta,proteins/mask_files/8H3Z_mask.fasta,proteins/omegafold_prediction_pdb_files/8H3Z:A...
4,8ALL,monomer,proteins/original_pdb_files/8ALL.pdb,proteins/original_fasta_files/8ALL.fasta,8ALL:A,proteins/inferred_fasta_files/8ALL.fasta,proteins/mask_files/8ALL_mask.fasta,proteins/omegafold_prediction_pdb_files/8ALL:A...
...,...,...,...,...,...,...,...,...
1042,8HNE,synthetic,proteins/original_pdb_files/8HNE.pdb,proteins/original_fasta_files/8HNE.fasta,8HNE:A,proteins/inferred_fasta_files/8HNE.fasta,proteins/mask_files/8HNE_mask.fasta,proteins/omegafold_prediction_pdb_files/8HNE:A...
1043,8FIN,synthetic,proteins/original_pdb_files/8FIN.pdb,proteins/original_fasta_files/8FIN.fasta,8FIN:A,proteins/inferred_fasta_files/8FIN.fasta,proteins/mask_files/8FIN_mask.fasta,proteins/omegafold_prediction_pdb_files/8FIN:A...
1044,8J0A,synthetic,proteins/original_pdb_files/8J0A.pdb,proteins/original_fasta_files/8J0A.fasta,8J0A:A,proteins/inferred_fasta_files/8J0A.fasta,proteins/mask_files/8J0A_mask.fasta,proteins/omegafold_prediction_pdb_files/8J0A:A...
1045,8HDU,synthetic,proteins/original_pdb_files/8HDU.pdb,proteins/original_fasta_files/8HDU.fasta,8HDU:A,proteins/inferred_fasta_files/8HDU.fasta,proteins/mask_files/8HDU_mask.fasta,proteins/omegafold_prediction_pdb_files/8HDU:A...


## Save chain dataframe as .csv file

In [9]:
chains.to_csv("proteins/chains_omegafold.csv", sep = ",", index = False)