In [None]:
from uuid import uuid4

from pdb2sql import pdb2sql
import numpy
import pandas as pd

from deeprankcore.models.structure import Chain, Atom
from deeprankcore.models.contact import AtomicContact, ResidueContact
from deeprankcore.models.graph import Edge, Graph
from deeprankcore.tools.pdb import get_structure
from deeprankcore.models.amino_acid import alanine
from deeprankcore.models.variant import SingleResidueVariant
from deeprankcore.domain.features import edgefeats as Efeat

from typing import List
import logging
import numpy as np
from scipy.spatial import distance_matrix
from deeprankcore.models.structure import Atom
from deeprankcore.models.graph import Graph, Edge
from deeprankcore.models.contact import ResidueContact, AtomicContact
from deeprankcore.domain.features import edgefeats as Efeat
from deeprankcore.domain.forcefield import atomic_forcefield, COULOMB_CONSTANT, EPSILON0, MAX_COVALENT_DISTANCE
from deeprankcore.models.forcefield.vanderwaals import VanderwaalsParam
from deeprankcore.models.error import UnknownAtomError

from deeprankcore.feature.atomic_contact import get_coulomb_potentials, get_lennard_jones_potentials
from tests.feature.test_atomic_contact import _get_atom, _wrap_in_graph

pdb_path = "/home/dbodor/git/DeepRank/deeprank-core/tests/data/pdb/101M/101M.pdb"

pdb = pdb2sql(pdb_path)
try:
    structure = get_structure(pdb, "101m")
finally:
    pdb._close() # pylint: disable=protected-access


In [None]:
RES1 = 0
RES2 = 2

contact = ResidueContact(
    structure.chains[0].residues[RES1], structure.chains[0].residues[RES2]
)
edge = Edge(contact)


In [None]:
def LJPOT(distances: np.ndarray, atoms: List[Atom],
                                 vanderwaals_parameters: List[VanderwaalsParam]) -> np.ndarray:
    """ Calculate Lennard-Jones potentials, given a distance matrix and a list of atoms with vanderwaals parameters of equal size.

         Warning: there's no distance cutoff here. The radius of influence is assumed to infinite
    """

    # check for the correct data shapes
    atom_count = len(atoms)
    if atom_count != len(vanderwaals_parameters):
        raise ValueError(f"The number of atoms ({atom_count}) does not match the number of vanderwaals parameters ({len(vanderwaals_parameters)})")
    if atom_count != distances.shape[0] or atom_count != distances.shape[1]:
        raise ValueError("Cannot calculate potentials between {} atoms and {} distances" # pylint: disable=consider-using-f-string
                         .format(atom_count, "x".join([str(d) for d in distances.shape])))

    # collect parameters
    sigmas1 = np.empty((atom_count, atom_count))
    sigmas2 = np.empty((atom_count, atom_count))
    epsilons1 = np.empty((atom_count, atom_count))
    epsilons2 = np.empty((atom_count, atom_count))
    for atom1_index in range(atom_count):
        for atom2_index in range(atom_count):
            atom1 = atoms[atom1_index]
            atom2 = atoms[atom2_index]

            # Which parameter we take, depends on whether the contact is intra- or intermolecular.
            if atom1.residue != atom2.residue:

                sigmas1[atom1_index][atom2_index] = vanderwaals_parameters[atom1_index].inter_sigma
                sigmas2[atom1_index][atom2_index] = vanderwaals_parameters[atom2_index].inter_sigma

                epsilons1[atom1_index][atom2_index] = vanderwaals_parameters[atom1_index].inter_epsilon
                epsilons2[atom1_index][atom2_index] = vanderwaals_parameters[atom2_index].inter_epsilon
            else:
                sigmas1[atom1_index][atom2_index] = vanderwaals_parameters[atom1_index].intra_sigma
                sigmas2[atom1_index][atom2_index] = vanderwaals_parameters[atom2_index].intra_sigma

                epsilons1[atom1_index][atom2_index] = vanderwaals_parameters[atom1_index].intra_epsilon
                epsilons2[atom1_index][atom2_index] = vanderwaals_parameters[atom2_index].intra_epsilon

    # calculate potentials
    sigmas = 0.5 * (sigmas1 + sigmas2)
    epsilons = np.sqrt(sigmas1 * sigmas2)
    potentials = 4.0 * epsilons * ((sigmas / distances) ** 12 - (sigmas / distances) ** 6)

    return potentials

In [None]:



def add_features_for_residues(edges): # pylint: disable=too-many-locals
    # get a set of all the atoms involved
    atoms = set([])
    for edge in edges:
        contact = edge.id
        for atom in (contact.residue1.atoms + contact.residue2.atoms):
            atoms.add(atom)
    atoms = list(atoms)

    # get all atomic parameters
    atom_indices = {}
    positions = []
    atom_charges = []
    atom_vanderwaals_parameters = []
    atom_chains = []
    for atom_index, atom in enumerate(atoms):

        try:
            charge = atomic_forcefield.get_charge(atom)
            vanderwaals = atomic_forcefield.get_vanderwaals_parameters(atom)

        except UnknownAtomError:
            print("Ignoring atom %s, because it's unknown to the forcefield", atom)

            # set parameters to zero, so that the potential becomes zero
            charge = 0.0
            vanderwaals = VanderwaalsParam(0.0, 0.0, 0.0, 0.0)

        atom_charges.append(charge)
        atom_vanderwaals_parameters.append(vanderwaals)
        positions.append(atom.position)
        atom_indices[atom] = atom_index
        atom_chains.append(atom.residue.chain.id)


    # calculate the distance matrix for those atoms
    interatomic_distances = distance_matrix(positions, positions, p=2)
    dist_df = pd.DataFrame(columns=atoms, data=interatomic_distances)

    # calculate potentials
    interatomic_electrostatic_potentials = get_coulomb_potentials(interatomic_distances, atom_charges)
    interatomic_vanderwaals_potentials = get_lennard_jones_potentials(interatomic_distances, atoms, atom_vanderwaals_parameters)

    electrostat_df = pd.DataFrame(columns=atoms, data=interatomic_electrostatic_potentials)
    vdw_df = pd.DataFrame(columns=atoms, data=interatomic_vanderwaals_potentials)

    # determine which atoms are close enough to form a covalent bond
    covalent_neighbours = interatomic_distances < MAX_COVALENT_DISTANCE

    # set the edge features
    for _, edge in enumerate(edges):
        contact = edge.id
        edge.features[Efeat.ELECTROSTATIC] = 0
        edge.features[Efeat.VANDERWAALS] = 0

        for atom1 in contact.residue1.atoms:
            for atom2 in contact.residue2.atoms:

                atom1_index = atom_indices[atom1]
                atom2_index = atom_indices[atom2]

                edge.features[Efeat.DISTANCE] = min(edge.features.get(Efeat.DISTANCE, 1e99),
                                                              interatomic_distances[atom_indices[atom1], atom_indices[atom2]])

                edge.features[Efeat.VANDERWAALS] = (edge.features.get(Efeat.VANDERWAALS, 0.0) +
                                                              interatomic_vanderwaals_potentials[atom1_index, atom2_index])

                edge.features[Efeat.ELECTROSTATIC] = (edge.features.get(Efeat.ELECTROSTATIC, 0.0) +
                                                          interatomic_electrostatic_potentials[atom1_index, atom2_index])

                if covalent_neighbours[atom1_index, atom2_index]:
                    edge.features[Efeat.COVALENT] = 1.0
                elif Efeat.COVALENT not in edge.features:
                    edge.features[Efeat.COVALENT] = 0.0

            if atom_chains[atom1_index] == atom_chains[atom2_index]:
                edge.features[Efeat.SAMECHAIN] = 1.0
            else:
                edge.features[Efeat.SAMECHAIN] = 0.0
    
    return dist_df, electrostat_df, vdw_df


dist_df, electrostat_df, vdw_df = add_features_for_residues(_wrap_in_graph(edge).edges)

print('distance', edge.features[Efeat.DISTANCE])
print('vdw', edge.features[Efeat.VANDERWAALS])
print('electrostat', edge.features[Efeat.ELECTROSTATIC])


# original: 1.3296811647910176



In [None]:
print(dist_df.shape)
print(electrostat_df.shape)
print(vdw_df.shape)

electrostat_df

In [None]:
# redefined distances thingy


def get_residue_features(edges):
    for edge in edges:
        contact = edge.id
        atoms1 = contact.residue1.atoms
        atoms2 = contact.residue2.atoms

        # distance & covalentness
        positions1 = [x.position for x in atoms1]
        positions2 = [x.position for x in atoms2]

        distances = distance_matrix(positions1, positions2)
        distance = np.min(distances)
        covalent = distance < MAX_COVALENT_DISTANCE

        samechain = contact.residue1.chain.id == contact.residue2.chain.id

        # electrostatic
        charges1 = [atomic_forcefield.get_charge(x) for x in atoms1]
        charges2 = [atomic_forcefield.get_charge(x) for x in atoms2]
        potentials = np.expand_dims(charges1, axis=1) * np.expand_dims(charges2, axis=0) \
                 * COULOMB_CONSTANT / (EPSILON0 * distances)
        
        # vdw
        vdw1 = [atomic_forcefield.get_vanderwaals_parameters(x) for x in atoms1]
        vdw2 = [atomic_forcefield.get_vanderwaals_parameters(x) for x in atoms2]
        
        

        print (contact.residue1.chain.id)
        print('distance:', distance)
        print('distances shape:', distances.shape)
        print('covalent:', covalent)
        print('samechain:', samechain)
        print('electrostat:', np.sum(potentials))


get_residue_features(_wrap_in_graph(edge).edges)



In [None]:
np.zeros((7,8))

In [None]:


A = [.25,.63,.13,.64,.15]
dist = np.array([[.21,.42,.63,.24,.15],
                 [.41,.52,.83,.41,.55],
                 [.11,.62,.73,.4,.25],
                 [.13,.52,.93,.34,.65],
                 [.31,.21,.13,.64,.85],
                    ])

# print(dist.shape)

A0 = np.expand_dims(A, axis=0)
A1 = np.expand_dims(A, axis=1)


ori = A0 * A1 * COULOMB_CONSTANT / (EPSILON0*dist)
new = np.square(A) * COULOMB_CONSTANT / (EPSILON0*dist)

print(A0*A1)
print(np.square(A))

ori_df = pd.DataFrame(data=ori)
new_df = pd.DataFrame(data=new)

# numpy.testing.assert_array_equal (ori,new)

# print(ori_df)
# print(new_df)

assert ori_df.shape == new_df.shape



In [None]:

def add_features_for_residues(edges): # pylint: disable=too-many-locals
    # get a set of all the atoms involved
    atoms = set([])
    for edge in edges:
        contact = edge.id
        for atom in (contact.residue1.atoms + contact.residue2.atoms):
            atoms.add(atom)
    atoms = list(atoms)


    # get all atomic parameters
    atom_indices = {}
    positions = []
    atom_charges = []
    atom_vanderwaals_parameters = []
    atom_chains = []
    for atom_index, atom in enumerate(atoms):

        try:
            charge = atomic_forcefield.get_charge(atom)
            vanderwaals = atomic_forcefield.get_vanderwaals_parameters(atom)

        except UnknownAtomError:
            _log.warning("Ignoring atom %s, because it's unknown to the forcefield", atom)

            # set parameters to zero, so that the potential becomes zero
            charge = 0.0
            vanderwaals = VanderwaalsParam(0.0, 0.0, 0.0, 0.0)

        atom_charges.append(charge)
        atom_vanderwaals_parameters.append(vanderwaals)
        positions.append(atom.position)
        atom_indices[atom] = atom_index
        atom_chains.append(atom.residue.chain.id)


    # calculate the distance matrix for those atoms
    interatomic_distances = distance_matrix(positions, positions, p=2)
    df = pd.DataFrame(columns=atoms, data=interatomic_distances)

    # calculate potentials from function
    interatomic_electrostatic_potentials = COULOMB_POT(interatomic_distances, atom_charges)
    
    # calculate the potentials here
    potentials = np.expand_dims(atom_charges, axis=0) * np.expand_dims(atom_charges, axis=1) \
                 * COULOMB_CONSTANT / (EPSILON0 * interatomic_distances)
    potentials_sq = np.square(atom_charges) * COULOMB_CONSTANT / (EPSILON0 * interatomic_distances)

    df_pot_ori = pd.DataFrame(columns=atoms, data=potentials)
    df_pot_new = pd.DataFrame(columns=atoms, data=potentials_sq)

    # interatomic_electrostatic_potentials = get_coulomb_potentials(interatomic_distances, atom_charges)
    interatomic_vanderwaals_potentials = get_lennard_jones_potentials(interatomic_distances, atoms, atom_vanderwaals_parameters)

    # determine which atoms are close enough to form a covalent bond
    covalent_neighbours = interatomic_distances < MAX_COVALENT_DISTANCE

    # set the edge features
    for _, edge in enumerate(edges):
        contact = edge.id




        for atom1 in contact.residue1.atoms:
            for atom2 in contact.residue2.atoms:

                atom1_index = atom_indices[atom1]
                atom2_index = atom_indices[atom2]
                # X+=1
                # print(X, 'indices', atom1_index, atom2_index)

                edge.features[Efeat.DISTANCE] = min(edge.features.get(Efeat.DISTANCE, 1e99),
                                                              interatomic_distances[atom_indices[atom1], atom_indices[atom2]])

                edge.features[Efeat.VANDERWAALS] = (edge.features.get(Efeat.VANDERWAALS, 0.0) +
                                                              interatomic_vanderwaals_potentials[atom1_index, atom2_index])

                edge.features[Efeat.ELECTROSTATIC] = (edge.features.get(Efeat.ELECTROSTATIC, 0.0) +
                                                          interatomic_electrostatic_potentials[atom1_index, atom2_index])

                if covalent_neighbours[atom1_index, atom2_index]:
                    edge.features[Efeat.COVALENT] = 1.0
                elif Efeat.COVALENT not in edge.features:
                    edge.features[Efeat.COVALENT] = 0.0

            if atom_chains[atom1_index] == atom_chains[atom2_index]:
                edge.features[Efeat.SAMECHAIN] = 1.0
            else:
                edge.features[Efeat.SAMECHAIN] = 0.0
    
    return interatomic_distances, df_pot_ori, df_pot_new

# check that we can calculate residue contacts
contact = ResidueContact(
    structure.chains[0].residues[0], structure.chains[0].residues[1]
)
edge = Edge(contact)


interatomic_distances, df_pot_ori, df_pot_new = add_features_for_residues(_wrap_in_graph(edge).edges)


# print (edge.features[Efeat.DISTANCE])
# original: 1.3296811647910176



In [None]:
df.shape
df

In [None]:
def dummy():
    A = [3,3]
    B = [4,4]
    C = [4,4]
    D = [3,3]

    x1 = [1,2]
    x2 = [0]

    y1 = [1]
    y2 = [0]

    return distance_matrix([A,B,x1], [C])

dummy()