In [1]:
import os
import sys
import argparse
import requests
import numpy as np
import networkx as nx

sys.path.append('../pyprot/')
import pyprot.graph_models as graph_models
from pyprot.downloader import PdbDownloader, ConsurfDBDownloader
from pyprot.protein import Protein
from pyprot.structure import Perseus




In [2]:
protein = Protein("data/1RFQ.pdb")



In [3]:
structure = protein.generate_structure(lambda row: row["full_id"][4][0] == "CA")
perseus = Perseus()
perseus.execute_persistent_hom(protein)

<pyprot.protein.Protein at 0x7f1d0c0395c0>

In [5]:
structure_model = graph_models.StructureGraphGenerator()
protein.generate_graph(structure_model, {"step": structure.persistent_hom_params["b3_step"]})

<networkx.classes.graph.Graph at 0x7f1cc5242668>

In [6]:
depths, _ = structure.calculate_depth(protein.graph)
for node_idx, depth in depths.items():
    protein.graph.nodes[node_idx]["depth"] = depth
    

In [7]:
protein.graph.nodes[654]

{'full_id': ('1RFQ', 0, 'B', (' ', 308, ' '), ('CA', ' ')),
 'depth': 16.327796624076356}

In [9]:
print(protein.graph.number_of_nodes(), protein.graph.number_of_edges())

722 4691


In [10]:
protein.df = protein.df[~protein.df.coord.isnull()]
ATP_coords = protein.df[protein.df.resname == "ATP"].coord.to_list()
if len(ATP_coords) == 0:
    # This may happen because of an oddly formatted PDB file which Bio cannot read
    # correctly.
    print("WARNING: no ATP atoms found. Skipping.")
protein.df["distance"] = protein.df.coord.apply(
    lambda atom: min(map(lambda atp_atom: np.linalg.norm(atom-atp_atom), ATP_coords))
)
# Sanity check
if min(protein.df[protein.df.resname != "ATP"].distance) > 4.0:
    print("WARNING: no atoms are linked to ligand")


In [11]:
structure_model.add_features(protein.df, columns = [
        "bfactor", "score", "color",
        "color_confidence_interval_high", "color_confidence_interval_low",
        "score_confidence_interval_high", "score_confidence_interval_low",
        "resname", "coord", "distance"
])

<networkx.classes.graph.Graph at 0x7f1cc5242668>

In [12]:
G = structure_model.get_diffused_graph(steps=3)

In [20]:
propagated_df = structure_model.graph_to_dataframe(G)
propagated_df = propagated_df.loc[:, [col for col in propagated_df.columns if "distance_" not in col]]
propagated_df

Unnamed: 0,depth_1,depth_2,depth_3,bfactor_1,bfactor_2,bfactor_3,full_id,depth,bfactor,resname,coord,distance
0,7.505823,10.851894,16.724897,43.120000,42.840278,44.998824,"(1RFQ, 0, A, ( , 5, ), (CA, ))",0.000000,51.81,THR,"[-5.392, 114.001, 32.936]",27.010250
1,4.684672,8.531199,14.303858,42.742500,42.058750,41.714151,"(1RFQ, 0, A, ( , 6, ), (CA, ))",3.833863,50.49,THR,"[-6.744, 113.2, 36.433]",23.668327
2,8.017853,10.870894,16.536269,42.129167,41.884359,44.551111,"(1RFQ, 0, A, ( , 7, ), (CA, ))",6.542878,46.23,ALA,"[-8.795, 115.902, 38.191]",20.512384
3,7.835647,10.586502,15.825033,40.701765,41.640714,43.823582,"(1RFQ, 0, A, ( , 8, ), (CA, ))",10.082987,44.33,LEU,"[-8.341, 116.951, 41.841]",17.373285
4,10.887662,11.304131,16.614147,37.583571,41.962041,43.371481,"(1RFQ, 0, A, ( , 9, ), (CA, ))",11.268626,41.73,VAL,"[-11.187, 117.564, 44.296]",13.774267
5,12.839416,11.610803,15.081044,40.410714,40.455000,43.342048,"(1RFQ, 0, A, ( , 10, ), (CA, ))",15.069179,40.36,CYS,"[-10.722, 119.828, 47.313]",11.129080
6,14.996313,14.723988,14.959623,39.176923,39.856140,43.164466,"(1RFQ, 0, A, ( , 11, ), (CA, ))",16.505294,42.58,ASP,"[-13.633, 120.453, 49.743]",7.451199
7,18.331867,15.264799,13.463964,40.400000,40.554098,43.978710,"(1RFQ, 0, A, ( , 12, ), (CA, ))",20.311471,44.10,ASN,"[-12.529, 123.397, 51.888]",6.710636
8,17.986887,15.747365,13.568161,40.306000,40.986774,44.164636,"(1RFQ, 0, A, ( , 13, ), (CA, ))",20.261405,43.74,GLY,"[-14.531, 122.84, 55.079]",3.095815
9,18.833352,16.053503,12.923311,40.536923,41.524898,44.511622,"(1RFQ, 0, A, ( , 14, ), (CA, ))",22.177136,42.41,SER,"[-14.38, 125.198, 58.053]",3.676152
