Merge 1c35cb7 into 8ee54ac

DeepRank · Apr 10, 2020 · 95f4cbb · 95f4cbb
2 parents 8ee54ac + 1c35cb7
commit 95f4cbb
Show file tree

Hide file tree

Showing 6 changed files with 104 additions and 51 deletions.
diff --git a/deeprank/features/BSA.py b/deeprank/features/BSA.py
@@ -117,13 +117,9 @@ def get_contact_residue_sasa(self, cutoff=5.5):
             # define the xyz key : (chain,x,y,z)
             chain = {'A': 0, 'B': 1}[res[0]]
 
-            atcenter = 'CB'
-            if res[2] == 'GLY':
-                atcenter = 'CA'
-            xyz = self.sql.get(
-                'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
-            # xyz = np.mean(self.sql.get('x,y,z',resSeq=r[1],chainID=r[0]),0)
-            xyzkey = tuple([chain] + xyz)
+            # get the center            
+            _, xyz = self.get_residue_center(self.sql, res=res)
+            xyzkey = tuple([chain] + xyz[0])
 
             # put the data in dict
             self.bsa_data[res] = [bsa]

diff --git a/deeprank/features/FeatureClass.py b/deeprank/features/FeatureClass.py
@@ -1,12 +1,14 @@
 import numpy as np
 
-
 class FeatureClass(object):
 
     def __init__(self, feature_type):
-        """Master class from which all the other feature classes should be
-        derived.
+        """Master class from which all the other feature classes should be derived.
+
+        Arguments
+            feature_type(str): 'Atomic' or 'Residue'
 
+        Notes:
             Each subclass must compute:
 
             - self.feature_data: dictionary of features in
@@ -28,22 +30,25 @@ def __init__(self, feature_type):
                 {'coulomb': data_dict_clb, 'vdwaals': data_dict_vdw}
                     data_dict_clb = {xyz_info: [values]}
                         xyz_info = (chainNum, x, y, z)
-
-        Args:
-            feature_type(str): 'Atomic' or 'Residue'
         """
+
         self.type = feature_type
         self.feature_data = {}
         self.feature_data_xyz = {}
 
     def export_data_hdf5(self, featgrp):
-        """Export the data in human readable format to HDF5's group.
-
-        - For atomic features, the format of the data must be:
-            {(chainID, resSeq, resName, name): [values]}
-        - For residue features, the format must be:
-            {(chainID, resSeq, resName): [values]}
+        """Export the data in xyz-val format in an HDF5 file group.
+        
+        Arguments:
+            featgrp {[hdf5_group]} -- The hdf5 group of the feature
+        
+        Notes:
+            - For atomic features, the format of the data must be:
+                {(chainID, resSeq, resName, name): [values]}
+            - For residue features, the format must be:
+                {(chainID, resSeq, resName): [values]}
         """
+
         # loop through the datadict and name
         for name, data in self.feature_data.items():
 
@@ -84,22 +89,14 @@ def export_data_hdf5(self, featgrp):
             else:
                 featgrp.create_dataset(name + '_raw', data=ds)
 
-    ########################################
-    #
-    # export the data in an HDF5 file group
-    # the format of the data is here
-    # PRO : fast when mapping
-    # CON : only usefull for deeprank
-    #
-    ########################################
-
+
     def export_dataxyz_hdf5(self, featgrp):
         """Export the data in xyz-val format in an HDF5 file group.
-
-        For atomic and residue the format of the data must be:
-        {(chainNum(0 or 1), x, y, z): [values]}
+        
+        Arguments:
+            featgrp {[hdf5_group]} -- The hdf5 group of the feature
         """
-
+        
         # loop through the datadict and name
         for name, data in self.feature_data_xyz.items():
 
@@ -112,3 +109,77 @@ def export_dataxyz_hdf5(self, featgrp):
                 old[...] = ds
             else:
                 featgrp.create_dataset(name, data=ds)
+
+    @staticmethod
+    def get_residue_center(sql, centers=['CB','CA','mean'], res=None):
+        """Computes the center of each residue by trying different options
+        
+        Arguments:
+            sql {pdb2sql} -- The pdb2sql instance
+        
+        Keyword Arguments:
+            centers {list} -- list of strings (default: {['CB','CA','mean']})
+            res {list} -- list of residue to be considered ([[chainID, resSeq, resName]])
+        
+        Raises:
+            ValueError: [description]
+        
+        Returns:
+            [type] -- list(res), list(xyz)
+        """
+
+        # get all residues if None were provided
+        # [chainID, resName, resSeq]
+        if res is None:
+            res = [tuple(x) for x in sql.get('chainID,resSeq,resName')]
+            res = sorted(set(res), key=res.index)
+
+
+        # make sure that we have a list of res
+        # even if ony 1 res was provided
+        # res=[chainID, resSeq, resName] -> res=[[chainID, resSeq, resName]]
+        elif not isinstance(res[0],list):
+            res = [res]            
+
+        # make sure that we have a list of possible centers
+        if not isinstance(centers,list):
+            centers = list(centers)
+
+        xyz = []
+
+        for r in res:
+
+            for ctr in centers:
+
+                if ctr in ['CB','CA']:
+
+                    xyz_res = sql.get('x,y,z', 
+                                      chainID=r[0],
+                                      resSeq=r[1],
+                                      resName=r[2],
+                                      name=ctr)
+
+                elif ctr == 'mean':
+                    xyz_res = [np.mean(sql.get('x,y,z',
+                                       chainID=r[0],
+                                       resSeq=r[1],
+                                       resName=r[2]),axis=0).tolist()]
+
+                else:
+                    raise ValueError('Center %s not recognized' %c)
+
+                if len(xyz_res) == 0:
+                    continue
+
+                elif len(xyz_res) == 1:
+                    xyz.append(xyz_res[0])
+                    break
+
+                else:
+                    raise ValueError('Residue center not found')
+
+        if len(xyz) == 0:
+            raise ValueError('Center not found')
+
+        return res, xyz
+
diff --git a/deeprank/features/FullPSSM.py b/deeprank/features/FullPSSM.py
@@ -166,12 +166,7 @@ def get_feature_value(self, cutoff=5.5):
         sql = pdb2sql.interface(self.pdb_file)
 
         # set achors for all residues and get their xyz
-        xyz_info = sql.get('chainID,resSeq,resName', name='CB')
-        xyz_info += sql.get('chainID,resSeq,resName', name='CA',
-                            resName='GLY')
-
-        xyz = sql.get('x,y,z', name='CB')
-        xyz += sql.get('x,y,z', name='CA', resName='GLY')
+        xyz_info, xyz = self.get_residue_center(sql)
 
         xyz_dict = {}
         for pos, info in zip(xyz, xyz_info):
@@ -212,6 +207,7 @@ def get_feature_value(self, cutoff=5.5):
                 f"{self.mol_name}: The following interface residues have "
                 f" no pssm value:\n {ctc_res_wo_pssm}"
             )
+
         else:
             ctc_res_with_pssm = ctc_res
 

diff --git a/deeprank/features/ResidueDensity.py b/deeprank/features/ResidueDensity.py
@@ -123,17 +123,10 @@ def extract_features(self):
             # total density in raw format
             self.feature_data['RCD_total'][key] = [res.density['total']]
 
-            # get the type of the center
-            atcenter = 'CB'
-            if key[2] == 'GLY':
-                atcenter = 'CA'
+            # get the center
+            _, xyz = self.get_residue_center(self.sql, res=key)
+            xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz[0])
 
-            # get the xyz of the center atom
-            xyz = self.sql.get(
-                'x,y,z', resSeq=key[1], chainID=key[0], name=atcenter)[0]
-            #xyz = np.mean(self.sql.get('x,y,z',resSeq=key[1],chainID=key[0]),0).tolist()
-
-            xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz)
             self.feature_data_xyz['RCD_total'][xyz_key] = [
                 res.density['total']]
 

diff --git a/deeprank/generate/DataGenerator.py b/deeprank/generate/DataGenerator.py
@@ -251,7 +251,6 @@ def create_database(
 
             # names of the molecule
             mol_name = os.path.splitext(os.path.basename(cplx))[0]
-            mol_name = mol_name.replace('-', '_')
             mol_aug_name_list = []
 
             try:

diff --git a/test/test_generate.py b/test/test_generate.py
@@ -257,8 +257,6 @@ def test_7_realign(self):
         database = DataGenerator(hdf5=copy_name)
         database.realign_complexes(align={'axis':'z'})
 
-
-
 if __name__ == "__main__":
 
     # unittest.main()