# Omegafold - test predictions, obtaining information about chains from fasta files

Installation of necessary software:

In [1]:
from IPython.utils import io

In [1]:
with io.capture_output() as captured:
    !pip install -q torch
    !conda install -y requests
    !conda install -y biopython
    !conda install -y pandas
    !pip install -q git+https://github.com/jvkersch/tmtools.git
    !conda install -y Prody
    !git clone --branch beta --quiet https://github.com/sokrypton/OmegaFold.git
    !pip -q install py3Dmol
    !apt-get install aria2 -qq > /dev/null
    !aria2c -q -x 16 https://helixon.s3.amazonaws.com/release1.pt
    !mkdir -p ~/.cache/omegafold_ckpt
    !mv release1.pt ~/.cache/omegafold_ckpt/model.pt

print(captured.stderr if captured.stderr != "" else "Installation successful")

Installation successful


In [2]:
import os, sys, re, torch, requests, json, Bio
import pandas as pd

## Load `proteins` and `chains` dataframes from csv files

In [3]:
proteins = pd.read_csv('proteins/proteins.csv')
proteins

Unnamed: 0,pdb_id,original_pdb_path,original_fasta_path,unique_chains,inferred_fasta_path
0,8EIO,proteins/original_pdb_files/8EIO.pdb,proteins/original_fasta_files/8EIO.fasta,"{'8EIO:B', '8EIO:A'}",proteins/inferred_fasta_files/8EIO.fasta
1,8FMN,proteins/original_pdb_files/8FMN.pdb,proteins/original_fasta_files/8FMN.fasta,"{'8FMN:A', '8FMN:B', '8FMN:C'}",proteins/inferred_fasta_files/8FMN.fasta
2,7XBG,proteins/original_pdb_files/7XBG.pdb,proteins/original_fasta_files/7XBG.fasta,"{'7XBG:A', '7XBG:B'}",proteins/inferred_fasta_files/7XBG.fasta
3,7UFZ,proteins/original_pdb_files/7UFZ.pdb,proteins/original_fasta_files/7UFZ.fasta,{'7UFZ:A'},proteins/inferred_fasta_files/7UFZ.fasta
4,8D20,proteins/original_pdb_files/8D20.pdb,proteins/original_fasta_files/8D20.fasta,{'8D20:A'},proteins/inferred_fasta_files/8D20.fasta


In [4]:
chains = pd.read_csv('proteins/chains.csv')
chains

Unnamed: 0,pdb_id,original_pdb_path,original_fasta_path,chain_id,inferred_fasta_path
0,8EIO,proteins/original_pdb_files/8EIO.pdb,proteins/original_fasta_files/8EIO.fasta,8EIO:B,proteins/inferred_fasta_files/8EIO.fasta
1,8EIO,proteins/original_pdb_files/8EIO.pdb,proteins/original_fasta_files/8EIO.fasta,8EIO:A,proteins/inferred_fasta_files/8EIO.fasta
2,8FMN,proteins/original_pdb_files/8FMN.pdb,proteins/original_fasta_files/8FMN.fasta,8FMN:A,proteins/inferred_fasta_files/8FMN.fasta
3,8FMN,proteins/original_pdb_files/8FMN.pdb,proteins/original_fasta_files/8FMN.fasta,8FMN:B,proteins/inferred_fasta_files/8FMN.fasta
4,8FMN,proteins/original_pdb_files/8FMN.pdb,proteins/original_fasta_files/8FMN.fasta,8FMN:C,proteins/inferred_fasta_files/8FMN.fasta
5,7XBG,proteins/original_pdb_files/7XBG.pdb,proteins/original_fasta_files/7XBG.fasta,7XBG:A,proteins/inferred_fasta_files/7XBG.fasta
6,7XBG,proteins/original_pdb_files/7XBG.pdb,proteins/original_fasta_files/7XBG.fasta,7XBG:B,proteins/inferred_fasta_files/7XBG.fasta
7,7UFZ,proteins/original_pdb_files/7UFZ.pdb,proteins/original_fasta_files/7UFZ.fasta,7UFZ:A,proteins/inferred_fasta_files/7UFZ.fasta
8,8D20,proteins/original_pdb_files/8D20.pdb,proteins/original_fasta_files/8D20.fasta,8D20:A,proteins/inferred_fasta_files/8D20.fasta


## Predicting the structures

Prepare directory:

In [5]:
OF_PREDICTED_PDB_FILES_DIRECTORY = "proteins/omegafold_predicted_pdb_files"
!mkdir {OF_PREDICTED_PDB_FILES_DIRECTORY}

mkdir: cannot create directory ‘proteins/omegafold_predicted_pdb_files’: File exists


In [7]:
chains["OF_prediction_pdb_path"] = chains.apply(lambda row: f"{OF_PREDICTED_PDB_FILES_DIRECTORY}/{row['chain_id']}.pdb", axis = 1)

Predict the structure of all the chains in the `chains` dataframe:

In [53]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

for index, row in proteins.iterrows():
    fasta_path = row["original_fasta_path"]
    with io.capture_output() as captured:
        !python OmegaFold/main.py --device={device} {fasta_path} {OF_PREDICTED_PDB_FILES_DIRECTORY}
    if "Error" in captured.stdout:
        chains.loc[chains["pdb_id"] == row["pdb_id"], "OF_prediction_pdb_path"] = None
        print(f"Prediction of structure {row['pdb_id']} was unsuccessful.")
    elif index % 10 == 0 or index == 749:
        print(f"Successfully predicted the structure of {index + 1} proteins.")


Prediction of structure 8EIO was unsuccessful
Successfully predicted the structure of 8FMN
Successfully predicted the structure of 7XBG
Successfully predicted the structure of 7UFZ
Successfully predicted the structure of 8D20


## Save chain dataframe as csv file

In [None]:
chains.to_csv("proteins/chains_omegafold.csv", sep = ",", index = False)