In [1]:
from uuid import uuid4
from pdb2sql import pdb2sql
import numpy as np
from typing import List
from scipy.spatial import distance_matrix
from deeprankcore.molstruct.structure import Chain, PDBStructure
from deeprankcore.molstruct.atom import Atom
from deeprankcore.molstruct.pair import AtomicContact, ResidueContact
from deeprankcore.molstruct.variant import SingleResidueVariant
from deeprankcore.utils.graph import Edge, Graph
from deeprankcore.utils.buildgraph import get_structure
from deeprankcore.features.contact import add_features, _intra_partners
from deeprankcore.domain.aminoacidlist import alanine
from deeprankcore.domain import edgestorage as Efeat
from tests.features.test_contact import _get_atom



In [2]:
def _abs_distance(positions, atom1: List[int], atom2: List[int]):
    pos1, pos2 = positions[atom1], positions[atom2]
    sum_of_squares = 0
    for i in range(3):
        sum_of_squares += (pos1[i]-pos2[i])**2
    return np.sqrt(sum_of_squares)


In [3]:
pdb = pdb2sql("../data/pdb/1ak4/1ak4.pdb")
try:
    structure = get_structure(pdb, "101m")
finally:
    pdb._close() # pylint: disable=protected-access

chain_C = structure.get_chain('C')
chain_D = structure.get_chain('D')

atoms = structure.get_atoms()
positions = np.array([atom.position for atom in atoms])
distances = distance_matrix(positions, positions)

count_atoms = len(atoms)

intra_matrix = _intra_partners(distances, 3)

assert intra_matrix.shape == (count_atoms, count_atoms)

index_C_phe60_CE1 = atoms.index(_get_atom(chain_C, 60, "CE1"))
index_C_trp121_CZ2 = atoms.index(_get_atom(chain_C, 121, "CZ2"))
index_C_asn102_O = atoms.index(_get_atom(chain_C, 102, "O"))
index_D_leu111_CG = atoms.index(_get_atom(chain_D, 111, "CG"))
index_D_pro93_CA = atoms.index(_get_atom(chain_D, 93, "CA"))
index_D_pro93_CB = atoms.index(_get_atom(chain_D, 93, "CB"))
index_D_pro93_CG = atoms.index(_get_atom(chain_D, 93, "CG"))
index_D_pro93_CD = atoms.index(_get_atom(chain_D, 93, "CD"))
index_D_ala92_CA = atoms.index(_get_atom(chain_D, 92, "CA"))
index_D_ala92_CB = atoms.index(_get_atom(chain_D, 92, "CB"))
index_D_gly89_N = atoms.index(_get_atom(chain_D, 89, "N"))

# one bond away
print('1', _abs_distance(positions, index_D_pro93_CA, index_D_pro93_CB))
assert intra_matrix[index_D_pro93_CA, index_D_pro93_CB]
assert intra_matrix[index_D_pro93_CB, index_D_pro93_CA]

# two bonds away
print('2', _abs_distance(positions, index_D_pro93_CA, index_D_pro93_CG))
assert intra_matrix[index_D_pro93_CA, index_D_pro93_CG]
assert intra_matrix[index_D_pro93_CG, index_D_pro93_CA]

# three bonds away
print('3', _abs_distance(positions, index_D_pro93_CA, index_D_ala92_CA))
assert intra_matrix[index_D_pro93_CA, index_D_ala92_CA]
assert intra_matrix[index_D_ala92_CA, index_D_pro93_CA]

# four bonds away
print('4', _abs_distance(positions, index_D_pro93_CA, index_D_ala92_CB))
assert not intra_matrix[index_D_pro93_CA, index_D_ala92_CB]

# in different chain, but hydrogen bonded
print('Hbond, diff chain', _abs_distance(positions, index_D_gly89_N, index_C_asn102_O))
assert not intra_matrix[index_D_gly89_N, index_C_asn102_O]

# close, but not connected
print('close, but not connected', _abs_distance(positions, index_C_trp121_CZ2, index_C_phe60_CE1))
assert not intra_matrix[index_C_trp121_CZ2, index_C_phe60_CE1]

# far away from each other
print('far', _abs_distance(positions, index_D_leu111_CG, index_D_pro93_CA))
assert not intra_matrix[index_D_leu111_CG, index_D_pro93_CA]


1 1.5269060219934996
2 2.3760523984121265
3 3.8166208876439427
4 4.347795993374118
Hbond, diff chain 3.0299316823981353
close, but not connected 4.094302504700892
far 19.142930261587434


In [8]:
pair_12 = distances < 2.1

pair_13_max = _intra_partners(distances, 2)
pair_13_exact = np.logical_xor(pair_13_max, pair_12)

pair_14_max = _intra_partners(distances, 3)
pair_14_exact = np.logical_xor(pair_13_max, pair_14_max)


beyond_13 = np.logical_not(pair_13_max)
beyond_14 = np.logical_not(pair_14_max)


In [11]:
# 1-3 pairing

cutoff_13 = 2.9
within_cutoff_13 = distances < 2.9
beyond_cutoff_13 = np.logical_not(within_cutoff_13)

tp13 = np.logical_and(within_cutoff_13, pair_13_max)
fp13 = np.logical_and(within_cutoff_13, beyond_13)
tn13 = np.logical_and(beyond_cutoff_13, beyond_13)
fn13 = np.logical_and(beyond_cutoff_13, pair_13_max)

tp13_rate = np.sum(tp13)/np.sum(pair_13_max)
fp13_rate = np.sum(fp13)/np.sum(beyond_13)
tn13_rate = np.sum(tn13)/np.sum(beyond_13)
fn13_rate = np.sum(fn13)/np.sum(pair_13_max)

print(f'1-3 pairing at distance cutoff {cutoff_13} Angstrom')
print(f'true positives: {tp13_rate*100:.2f}%')
print(f'false positives: {fp13_rate*100:.2f}%')
# print(tn13_rate*100)
# print(fn13_rate*100)


1-3 pairing at distance cutoff 2.9 Angstrom
true positives: 100.00%
false positives: 0.03%


In [14]:
# 1-4 pairing

cutoff_14 = 4.2
within_cutoff_14 = distances < cutoff_14
beyond_cutoff_14 = np.logical_not(within_cutoff_14)

tp14 = np.logical_and(within_cutoff_14, pair_14_max)
fp14 = np.logical_and(within_cutoff_14, beyond_14)
tn14 = np.logical_and(beyond_cutoff_14, beyond_14)
fn14 = np.logical_and(beyond_cutoff_14, pair_14_max)

tp14_rate = np.sum(tp14)/np.sum(pair_14_max)
fp14_rate = np.sum(fp14)/np.sum(beyond_14)
tn14_rate = np.sum(tn14)/np.sum(beyond_14)
fn14_rate = np.sum(fn14)/np.sum(pair_14_max)


print(f'1-4 pairing at distance cutoff {cutoff_14} Angstrom')
print(f'true positives: {tp14_rate*100:.2f}%')
print(f'false positives: {fp14_rate*100:.2f}%')

# print(tn14_rate*100)
# print(fn14_rate*100)


1-4 pairing at distance cutoff 4.2 Angstrom
true positives: 100.00%
false positives: 0.24%
99.76069904628423
