Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
Merge pull request #101 from DeepRank/issue92_atomdensity
Browse files Browse the repository at this point in the history
Update atom densities feature calculation
  • Loading branch information
CunliangGeng committed Sep 18, 2019
2 parents 5bc7968 + fb37bed commit e53f60b
Show file tree
Hide file tree
Showing 15 changed files with 188 additions and 120 deletions.
1 change: 1 addition & 0 deletions deeprank/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .chemicals import AA_codes, AA_codes_3to1, AA_codes_1to3
from .chemicals import AA_codes_pssm_ordered
from .chemicals import AA_properties
from .chemicals import atom_vdw_radius, atom_vdw_radius_noH

# Debug
DEBUG = False
Expand Down
17 changes: 16 additions & 1 deletion deeprank/config/chemicals.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,19 @@
'THR': 'polar',
'TRP': 'polar',
'TYR': 'polar'
}
}


# atom vdw radius
# William M Haynes. CRC Handbook of Chemistry and Physics.
# ISBN 9781482208689.
# URL: https://books.google.no/books?id=bNDMBQAAQBAJ.

atom_vdw_radius_noH = {
"C": 1.7,
"N": 1.55,
"O": 1.52,
"S": 1.8,
}

atom_vdw_radius = {**atom_vdw_radius_noH, "H": 1.1}
9 changes: 6 additions & 3 deletions deeprank/generate/DataGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -803,7 +803,7 @@ def map_features(self, grid_info={},
>>> grid_info = {
>>> 'number_of_points' : [30,30,30],
>>> 'resolution' : [1.,1.,1.],
>>> 'atomic_densities' : {'CA':3.5,'N':3.5,'O':3.5,'C':3.5},
>>> 'atomic_densities' : {'C':1.7, 'N':1.55, 'O':1.52, 'S':1.8},
>>> }
>>>
>>> database.map_features(grid_info,try_sparse=True,time=False,prog_bar=True)
Expand Down Expand Up @@ -1303,7 +1303,7 @@ def _add_pdb(molgrp, pdbfile, name):
for line in fi if line.startswith('ATOM')]
# PDB default line length is 80
# http://www.wwpdb.org/documentation/file-format
data = np.array(data).astype('|S73')
data = np.array(data).astype('|S78')
molgrp.create_dataset(name, data=data)


Expand Down Expand Up @@ -1341,6 +1341,8 @@ def _add_aug_pdb(molgrp, pdbfile, name, axis, angle):
# close the db
sqldb.close()

# TODO the output does not obey PDB format
# TODO should not strip them!
# export the data to h5
data = []
for d in sqldata:
Expand All @@ -1358,6 +1360,7 @@ def _add_aug_pdb(molgrp, pdbfile, name, axis, angle):
line += '{: 8.3f}'.format(d[7]) # x
line += '{: 8.3f}'.format(d[8]) # y
line += '{: 8.3f}'.format(d[9]) # z
# TODO add the element
try:
line += '{: 6.2f}'.format(d[10]) # occ
line += '{: 6.2f}'.format(d[11]) # temp
Expand All @@ -1366,7 +1369,7 @@ def _add_aug_pdb(molgrp, pdbfile, name, axis, angle):
line += '{: 6.2f}'.format(0) # temp
data.append(line)

data = np.array(data).astype('|S73')
data = np.array(data).astype('|S78')
molgrp.create_dataset(name, data=data)

return center
Expand Down
31 changes: 11 additions & 20 deletions deeprank/generate/GridTools.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def __init__(self, molgrp,
number_of_points(int, optional): number of points we want in
each direction of the grid.
resolution(float, optional): distance(in Angs) between two points.
atomic_densities(dict, optional): dictionary of atom types with
their vdw radius, e.g. {'CA':1.7, 'C':1.7, 'N':1.55, 'O':1.52}
atomic_densities(dict, optional): dictionary of element types with
their vdw radius, see deeprank.config.atom_vdw_radius_noH
atomic_densities_mode(str, optional): Mode for mapping
(deprecated must be 'ind').
feature(None, optional): Name of the features to be mapped.
Expand Down Expand Up @@ -336,24 +336,15 @@ def map_atomic_densities(self, only_contact=True):
self.sqldb.get('rowID', chainID='B'))

# loop over all the data we want
for atomtype, vdw_rad in self.local_tqdm(
for elementtype, vdw_rad in self.local_tqdm(
self.atomic_densities.items()):

t0 = time()

# get the contact atom that of the correct type on both chains
if only_contact:
xyzA = np.array(self.sqldb.get(
'x,y,z', rowID=index[0], name=atomtype))
xyzB = np.array(self.sqldb.get(
'x,y,z', rowID=index[1], name=atomtype))

else:
# get the atom that are of the correct type on both chains
xyzA = np.array(self.sqldb.get(
'x,y,z', chainID='A', name=atomtype))
xyzB = np.array(self.sqldb.get(
'x,y,z', chainID='B', name=atomtype))
xyzA = np.array(self.sqldb.get(
'x,y,z', rowID=index[0], element=elementtype))
xyzB = np.array(self.sqldb.get(
'x,y,z', rowID=index[1], element=elementtype))

tprocess = time() - t0

Expand Down Expand Up @@ -404,16 +395,16 @@ def map_atomic_densities(self, only_contact=True):

# create the final grid : A - B
if mode == 'diff':
self.atdens[atomtype] = atdensA - atdensB
self.atdens[elementtype] = atdensA - atdensB

# create the final grid : A + B
elif mode == 'sum':
self.atdens[atomtype] = atdensA + atdensB
self.atdens[elementtype] = atdensA + atdensB

# create the final grid : A and B
elif mode == 'ind':
self.atdens[atomtype + '_chainA'] = atdensA
self.atdens[atomtype + '_chainB'] = atdensB
self.atdens[elementtype + '_chainA'] = atdensA
self.atdens[elementtype + '_chainB'] = atdensB
else:
raise ValueError(f'Atomic density mode {mode} not recognized')

Expand Down
48 changes: 25 additions & 23 deletions deeprank/learn/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import numpy as np
from tqdm import tqdm

from deeprank import config
from deeprank.config import logger
from deeprank.generate import MinMaxParam, NormalizeData, NormParam
from deeprank.tools import pdb2sql, sparse
Expand Down Expand Up @@ -65,8 +66,7 @@ def __init__(self, train_database, valid_database=None, test_database=None,
Select the features used in the learning.
if mapfly is True:
{'AtomDensities': 'all', 'Features': 'all'}
{'AtomicDensities': {
'CA': 1.7, 'C': 1.7, 'N': 1.55, 'O': 1.52},
{'AtomicDensities': config.atom_vdw_radius_noH,
'Features': ['PSSM_*', 'pssm_ic_*']}
if mapfly is False:
{'AtomDensities_ind': 'all',
Expand Down Expand Up @@ -196,10 +196,10 @@ def __init__(self, train_database, valid_database=None, test_database=None,
@staticmethod
def _get_database_name(database):
"""Get the list of hdf5 database file names.
Args:
database(None, str or list(str)): hdf5 database name(s).
Returns:
list: hdf5 file names
"""
Expand Down Expand Up @@ -310,6 +310,7 @@ def __getitem__(self, index):
"""

fname, mol, angle, axis = self.index_complexes[index]
print(fname, mol)

if self.mapfly:
feature, target = self.map_one_molecule(fname, mol, angle, axis)
Expand Down Expand Up @@ -500,7 +501,6 @@ def filter(self, molgrp):
Args:
molgrp (str): group name of the molecule in the hdf5 file
Returns:
bool: True if we keep the complex False otherwise
Expand Down Expand Up @@ -541,7 +541,7 @@ class parameter self.select_feature examples:
- {'Feature_ind': ['PSSM_*', 'pssm_ic_*']}
Feature type must be: 'AtomicDensities_ind' or 'Feature_ind'.
Raises:
KeyError: Wrong feature type.
KeyError: Wrong feature type.
Expand Down Expand Up @@ -632,12 +632,10 @@ def get_raw_feature_name(self):
class parameter self.select_feature examples:
- 'all'
- {'AtomicDensities': 'all', 'Features':all}
- {'AtomicDensities': {
'CA': 1.7, 'C': 1.7, 'N': 1.55, 'O': 1.52},
- {'AtomicDensities': config.atom_vaw_radius_noH,
'Features': ['PSSM_*', 'pssm_ic_*']}
Feature type must be: 'AtomicDensities' or 'Features'.
Raises:
KeyError: Wrong feature type.
KeyError: Wrong feature type.
Expand All @@ -650,8 +648,7 @@ class parameter self.select_feature examples:
# if we select all the features
if self.select_feature == "all":
self.select_feature = {}
self.select_feature['AtomicDensities'] = {
'CA': 1.7, 'C': 1.7, 'N': 1.55, 'O': 1.52}
self.select_feature['AtomicDensities'] = config.atom_vdw_radius_noH
self.select_feature['Features'] = [
name for name in raw_data.keys()]

Expand All @@ -663,8 +660,8 @@ class parameter self.select_feature examples:
# if for a given type we need all the feature
if feat_names == 'all':
if feat_type == 'AtomicDensities':
self.select_feature['AtomicDensities'] = {
'CA': 1.7, 'C': 1.7, 'N': 1.55, 'O': 1.52}
self.select_feature['AtomicDensities'] = \
config.atom_vdw_radius_noH
elif feat_type == 'Features':
self.select_feature[feat_type] = list(raw_data.keys())
else:
Expand Down Expand Up @@ -1220,13 +1217,13 @@ def make_feature_pair(feature, op):

def get_grid(self, mol_data):
"""Get meshed grids and number of pointgs
Args:
mol_data(h5 group): HDF5 moleucle group
Raises:
ValueError: Grid points not found in mol_data.
Returns:
tuple, tuple: meshgrid, npts
"""
Expand Down Expand Up @@ -1269,9 +1266,9 @@ def get_grid(self, mol_data):
def map_atomic_densities(
self, feat_names, mol_data, grid, npts, angle, axis):
"""Map atomic densities.
Args:
feat_names(dict): Atom type and vdw radius
feat_names(dict): Element type and vdw radius
mol_data(h5 group): HDF5 molecule group
grid(tuple): mesh grid of x,y,z
npts(tuple): number of points on axis x,y,z
Expand All @@ -1289,16 +1286,21 @@ def map_atomic_densities(
center = [np.mean(g) for g in grid]

densities = []
for atomtype, vdw_rad in feat_names.items():
for elementtype, vdw_rad in feat_names.items():

# get pos of the contact atoms of correct type
xyzA = np.array(sql.get('x,y,z', rowID=index[0], name=atomtype))
xyzB = np.array(sql.get('x,y,z', rowID=index[1], name=atomtype))
xyzA = np.array(sql.get(
'x,y,z', rowID=index[0], element=elementtype))
xyzB = np.array(sql.get(
'x,y,z', rowID=index[1], element=elementtype))

# rotate if necessary
if angle is not None:
xyzA = self._rotate_coord(xyzA, center, angle, axis)
xyzB = self._rotate_coord(xyzB, center, angle, axis)
if xyzA != np.array([]):
xyzA = self._rotate_coord(xyzA, center, angle, axis)

if xyzB != np.array([]):
xyzB = self._rotate_coord(xyzB, center, angle, axis)

# init the grid
atdensA = np.zeros(npts)
Expand Down
22 changes: 11 additions & 11 deletions deeprank/tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ Here are located all the generic tools used during one or across multiple steps

## PDB2SQL

The file pdb2sql.py contains a class named pdb2sql that allows using sqlite3 to manipulate PDB files. The use of SQL queries makes it very easy to extract information from the PDB file using only one line of code.
The file pdb2sql.py contains a class named pdb2sql that allows using sqlite3 to manipulate PDB files. The use of SQL queries makes it very easy to extract information from the PDB file using only one line of code.

### Create a SQl data base

Expand All @@ -27,11 +27,11 @@ After its creation the database contains 13 columns and one line for each atom i
* resName : the name of the residue the atom belongs to
* chaiID : the ID of the chain the atom belongs to
* resSeq : the residue number the atom belongs to
* iCode : Code for insertion of residue
* iCode : Code for insertion of residue
* x : x coordinate of the atom
* y : y coordinate of the atom
* z : z coordinate of the atom
* occ : occupancy
* occ : occupancy
* temp : temperature factor


Expand Down Expand Up @@ -138,19 +138,19 @@ sqldb.close(rmdb=False)

## FeatureClass

The file FeatureClass.py contain a super class that all feature calculations should subclass. So far the super class only contains one method **FatureClass.export_data()** that is used to export the data of the feature to a file. This ensure that we keep the same syntax for all the features. The class has 3 attributes
The file FeatureClass.py contain a super class that all feature calculations should subclass. So far the super class only contains one method **FatureClass.export_data()** that is used to export the data of the feature to a file. This ensure that we keep the same syntax for all the features. The class has 3 attributes


* self.type : "Atomic" or "Residue"
* self.feature_data : dictionary {feature_name : feature_dict}

feature_name is the name of the feature e.g. 'coulomb' or 'vdwaals'

feature_dict is a dictionary. The format of the key depends on the type of feature

residue-based feature
{(chainID, residue_name(3-letter), residue_number) : [values1, values2, ....]}

atomic-based feature
{(chainID, residue_name(3-letter), residue_number, atom_name) : [values1, values2, ....]}

Expand Down Expand Up @@ -191,11 +191,11 @@ The file atomic_feature.py contains a class named atomicFeature that allows comp
* a file containing the vdw parameters
* evantually a patch file for the force field parameters

An example of use is provided in ./example/grid/atomicfeature.py.
An example of use is provided in ./example/grid/atomicfeature.py.

```python
```python
from deeprank.tools import atomicFeature

PDB = 'complex.pdb'
FF = './forcefield/'

Expand Down Expand Up @@ -225,4 +225,4 @@ atfeat.sqldb.close()
```


In this example we compute the pair interactions and the atomic charges of the complex given in the example folder and using the force field parameters also located there. The pair interactions are outputed on the screen. For the charges, the contact atom list is extended to all the residues that contains at least one contact atom.
In this example we compute the pair interactions and the atomic charges of the complex given in the example folder and using the force field parameters also located there. The pair interactions are outputed on the screen. For the charges, the contact atom list is extended to all the residues that contains at least one contact atom.

0 comments on commit e53f60b

Please sign in to comment.