Merge dd4c69e into a4d203f

DeepRank · Oct 27, 2020 · 69bfe18 · 69bfe18
2 parents a4d203f + dd4c69e
commit 69bfe18
Show file tree

Hide file tree

Showing 28 changed files with 6,762 additions and 6,418 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,9 +1,13 @@
 Byte-compiled / optimized
 *__pycache__*
 
+
+
 # distribution
 deeprank.egg-info
 database*
+dist
+build 
 
 # specific architure files
 deeprank/learn/arch_*

diff --git a/README.md b/README.md
@@ -2,10 +2,13 @@
 
 **Deep Learning for ranking protein-protein conformations**
 
+[![PyPI](https://img.shields.io/pypi/v/deeprank)](https://pypi.org/project/deeprank/)
+[![Documentation Status](https://readthedocs.org/projects/deeprank/badge/?version=latest)](http://deeprank.readthedocs.io/?badge=latest)
+[![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3735042.svg)](https://doi.org/10.5281/zenodo.3735042)
 [![Build Status](https://secure.travis-ci.org/DeepRank/deeprank.svg?branch=master)](https://travis-ci.org/DeepRank/deeprank)
 [![Codacy Badge](https://api.codacy.com/project/badge/Grade/9252e59633cf46a7ada0c3c614c175ea)](https://www.codacy.com/app/NicoRenaud/deeprank?utm_source=github.com&utm_medium=referral&utm_content=DeepRank/deeprank&utm_campaign=Badge_Grade)
 [![Coverage Status](https://coveralls.io/repos/github/DeepRank/deeprank/badge.svg?branch=master)](https://coveralls.io/github/DeepRank/deeprank?branch=master)
-[![Documentation Status](https://readthedocs.org/projects/deeprank/badge/?version=latest)](http://deeprank.readthedocs.io/?badge=latest) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.3735042.svg)](https://doi.org/10.5281/zenodo.3735042)
+
 
 The documentation of the module can be found on readthedocs :
 <http://deeprank.readthedocs.io/en/latest/>

diff --git a/deeprank/features/BSA.py b/deeprank/features/BSA.py
@@ -117,13 +117,9 @@ def get_contact_residue_sasa(self, cutoff=5.5):
             # define the xyz key : (chain,x,y,z)
             chain = {'A': 0, 'B': 1}[res[0]]
 
-            atcenter = 'CB'
-            if res[2] == 'GLY':
-                atcenter = 'CA'
-            xyz = self.sql.get(
-                'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
-            # xyz = np.mean(self.sql.get('x,y,z',resSeq=r[1],chainID=r[0]),0)
-            xyzkey = tuple([chain] + xyz)
+            # get the center            
+            _, xyz = self.get_residue_center(self.sql, res=res)
+            xyzkey = tuple([chain] + xyz[0])
 
             # put the data in dict
             self.bsa_data[res] = [bsa]

diff --git a/deeprank/features/FeatureClass.py b/deeprank/features/FeatureClass.py
@@ -1,12 +1,14 @@
 import numpy as np
 
-
 class FeatureClass(object):
 
     def __init__(self, feature_type):
-        """Master class from which all the other feature classes should be
-        derived.
+        """Master class from which all the other feature classes should be derived.
+
+        Arguments
+            feature_type(str): 'Atomic' or 'Residue'
 
+        Notes:
             Each subclass must compute:
 
             - self.feature_data: dictionary of features in
@@ -28,22 +30,25 @@ def __init__(self, feature_type):
                 {'coulomb': data_dict_clb, 'vdwaals': data_dict_vdw}
                     data_dict_clb = {xyz_info: [values]}
                         xyz_info = (chainNum, x, y, z)
-
-        Args:
-            feature_type(str): 'Atomic' or 'Residue'
         """
+
         self.type = feature_type
         self.feature_data = {}
         self.feature_data_xyz = {}
 
     def export_data_hdf5(self, featgrp):
-        """Export the data in human readable format to HDF5's group.
-
-        - For atomic features, the format of the data must be:
-            {(chainID, resSeq, resName, name): [values]}
-        - For residue features, the format must be:
-            {(chainID, resSeq, resName): [values]}
+        """Export the data in xyz-val format in an HDF5 file group.
+        
+        Arguments:
+            featgrp {[hdf5_group]} -- The hdf5 group of the feature
+        
+        Notes:
+            - For atomic features, the format of the data must be:
+                {(chainID, resSeq, resName, name): [values]}
+            - For residue features, the format must be:
+                {(chainID, resSeq, resName): [values]}
         """
+
         # loop through the datadict and name
         for name, data in self.feature_data.items():
 
@@ -84,22 +89,14 @@ def export_data_hdf5(self, featgrp):
             else:
                 featgrp.create_dataset(name + '_raw', data=ds)
 
-    ########################################
-    #
-    # export the data in an HDF5 file group
-    # the format of the data is here
-    # PRO : fast when mapping
-    # CON : only usefull for deeprank
-    #
-    ########################################
-
+
     def export_dataxyz_hdf5(self, featgrp):
         """Export the data in xyz-val format in an HDF5 file group.
-
-        For atomic and residue the format of the data must be:
-        {(chainNum(0 or 1), x, y, z): [values]}
+        
+        Arguments:
+            featgrp {[hdf5_group]} -- The hdf5 group of the feature
         """
-
+        
         # loop through the datadict and name
         for name, data in self.feature_data_xyz.items():
 
@@ -112,3 +109,77 @@ def export_dataxyz_hdf5(self, featgrp):
                 old[...] = ds
             else:
                 featgrp.create_dataset(name, data=ds)
+
+    @staticmethod
+    def get_residue_center(sql, centers=['CB','CA','mean'], res=None):
+        """Computes the center of each residue by trying different options
+        
+        Arguments:
+            sql {pdb2sql} -- The pdb2sql instance
+        
+        Keyword Arguments:
+            centers {list} -- list of strings (default: {['CB','CA','mean']})
+            res {list} -- list of residue to be considered ([[chainID, resSeq, resName]])
+        
+        Raises:
+            ValueError: [description]
+        
+        Returns:
+            [type] -- list(res), list(xyz)
+        """
+
+        # get all residues if None were provided
+        # [chainID, resName, resSeq]
+        if res is None:
+            res = [tuple(x) for x in sql.get('chainID,resSeq,resName')]
+            res = sorted(set(res), key=res.index)
+
+
+        # make sure that we have a list of res
+        # even if ony 1 res was provided
+        # res=[chainID, resSeq, resName] -> res=[[chainID, resSeq, resName]]
+        elif not isinstance(res[0],list):
+            res = [res]            
+
+        # make sure that we have a list of possible centers
+        if not isinstance(centers,list):
+            centers = list(centers)
+
+        xyz = []
+
+        for r in res:
+
+            for ctr in centers:
+
+                if ctr in ['CB','CA']:
+
+                    xyz_res = sql.get('x,y,z', 
+                                      chainID=r[0],
+                                      resSeq=r[1],
+                                      resName=r[2],
+                                      name=ctr)
+
+                elif ctr == 'mean':
+                    xyz_res = [np.mean(sql.get('x,y,z',
+                                       chainID=r[0],
+                                       resSeq=r[1],
+                                       resName=r[2]),axis=0).tolist()]
+
+                else:
+                    raise ValueError('Center %s not recognized' %c)
+
+                if len(xyz_res) == 0:
+                    continue
+
+                elif len(xyz_res) == 1:
+                    xyz.append(xyz_res[0])
+                    break
+
+                else:
+                    raise ValueError('Residue center not found')
+
+        if len(xyz) == 0:
+            raise ValueError('Center not found')
+
+        return res, xyz
+
diff --git a/deeprank/features/FullPSSM.py b/deeprank/features/FullPSSM.py
@@ -166,12 +166,7 @@ def get_feature_value(self, cutoff=5.5):
         sql = pdb2sql.interface(self.pdb_file)
 
         # set achors for all residues and get their xyz
-        xyz_info = sql.get('chainID,resSeq,resName', name='CB')
-        xyz_info += sql.get('chainID,resSeq,resName', name='CA',
-                            resName='GLY')
-
-        xyz = sql.get('x,y,z', name='CB')
-        xyz += sql.get('x,y,z', name='CA', resName='GLY')
+        xyz_info, xyz = self.get_residue_center(sql)
 
         xyz_dict = {}
         for pos, info in zip(xyz, xyz_info):
@@ -212,6 +207,7 @@ def get_feature_value(self, cutoff=5.5):
                 f"{self.mol_name}: The following interface residues have "
                 f" no pssm value:\n {ctc_res_wo_pssm}"
             )
+
         else:
             ctc_res_with_pssm = ctc_res