intoduced get_residue_center

DeepRank · Apr 10, 2020 · 1564d11 · 1564d11
1 parent 79d4c15
commit 1564d11
Show file tree

Hide file tree

Showing 6 changed files with 152 additions and 72 deletions.
diff --git a/deeprank/features/BSA.py b/deeprank/features/BSA.py
@@ -117,19 +117,21 @@ def get_contact_residue_sasa(self, cutoff=5.5):
             # define the xyz key : (chain,x,y,z)
             chain = {'A': 0, 'B': 1}[res[0]]
 
-            atcenter = 'CB'
-            if res[2] == 'GLY':
-                atcenter = 'CA'
+            # atcenter = 'CB'
+            # if res[2] == 'GLY':
+            #     atcenter = 'CA'
 
-            try :
-                xyz = self.sql.get(
-                    'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
-            except IndexError :
-                warnings .warn('Atom ', atcenter, ' not found for residue ', key[1], \
-                               '. Use residue center as feature center')            
-                xyz = np.mean(self.sql.get('x,y,z',resSeq=r[1],chainID=r[0]),0)
-
-            xyzkey = tuple([chain] + xyz)
+            # try :
+            #     xyz = self.sql.get(
+            #         'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
+            # except IndexError :
+            #     warnings.warn('Atom ', atcenter, ' not found for residue ', key[1], \
+            #                    '. Use residue center as feature center')            
+            #     xyz = np.mean(self.sql.get('x,y,z',resSeq=res[1],chainID=res[0]),0)
+            # xyzkey = tuple([chain] + xyz)
+
+            _, xyz = self.get_residue_center(self.sql, res=res)
+            xyzkey = tuple([chain] + xyz[0])
 
             # put the data in dict
             self.bsa_data[res] = [bsa]

diff --git a/deeprank/features/FeatureClass.py b/deeprank/features/FeatureClass.py
@@ -1,12 +1,15 @@
 import numpy as np
-
+import warnings
 
 class FeatureClass(object):
 
     def __init__(self, feature_type):
-        """Master class from which all the other feature classes should be
-        derived.
+        """Master class from which all the other feature classes should be derived.
+
+        Arguments
+            feature_type(str): 'Atomic' or 'Residue'
 
+        Notes:
             Each subclass must compute:
 
             - self.feature_data: dictionary of features in
@@ -28,22 +31,25 @@ def __init__(self, feature_type):
                 {'coulomb': data_dict_clb, 'vdwaals': data_dict_vdw}
                     data_dict_clb = {xyz_info: [values]}
                         xyz_info = (chainNum, x, y, z)
-
-        Args:
-            feature_type(str): 'Atomic' or 'Residue'
         """
+
         self.type = feature_type
         self.feature_data = {}
         self.feature_data_xyz = {}
 
     def export_data_hdf5(self, featgrp):
-        """Export the data in human readable format to HDF5's group.
-
-        - For atomic features, the format of the data must be:
-            {(chainID, resSeq, resName, name): [values]}
-        - For residue features, the format must be:
-            {(chainID, resSeq, resName): [values]}
+        """Export the data in xyz-val format in an HDF5 file group.
+        
+        Arguments:
+            featgrp {[hdf5_group]} -- The hdf5 group of the feature
+        
+        Notes:
+            - For atomic features, the format of the data must be:
+                {(chainID, resSeq, resName, name): [values]}
+            - For residue features, the format must be:
+                {(chainID, resSeq, resName): [values]}
         """
+
         # loop through the datadict and name
         for name, data in self.feature_data.items():
 
@@ -84,22 +90,14 @@ def export_data_hdf5(self, featgrp):
             else:
                 featgrp.create_dataset(name + '_raw', data=ds)
 
-    ########################################
-    #
-    # export the data in an HDF5 file group
-    # the format of the data is here
-    # PRO : fast when mapping
-    # CON : only usefull for deeprank
-    #
-    ########################################
-
+
     def export_dataxyz_hdf5(self, featgrp):
         """Export the data in xyz-val format in an HDF5 file group.
-
-        For atomic and residue the format of the data must be:
-        {(chainNum(0 or 1), x, y, z): [values]}
+        
+        Arguments:
+            featgrp {[hdf5_group]} -- The hdf5 group of the feature
         """
-
+        
         # loop through the datadict and name
         for name, data in self.feature_data_xyz.items():
 
@@ -112,3 +110,77 @@ def export_dataxyz_hdf5(self, featgrp):
                 old[...] = ds
             else:
                 featgrp.create_dataset(name, data=ds)
+
+    @staticmethod
+    def get_residue_center(sql, centers=['CB','CA','mean'], res=None):
+        """Computes the center of each residue by trying different options
+        
+        Arguments:
+            sql {pdb2sql} -- The pdb2sql instance
+        
+        Keyword Arguments:
+            centers {list} -- list of strings (default: {['CB','CA','mean']})
+            res {list} -- list of residue to be considered ([[chainID, resSeq, resName]])
+        
+        Raises:
+            ValueError: [description]
+        
+        Returns:
+            [type] -- list(res), list(xyz)
+        """
+
+        # get all residues if None were provided
+        # [chainID, resName, resSeq]
+        if res is None:
+            res = [tuple(x) for x in sql.get('chainID,resSeq,resName')]
+            res = sorted(set(res), key=res.index)
+
+
+        # make sure that we have a list of res
+        # even if ony 1 res was provided
+        # res=[chainID, resSeq, resName] -> res=[[chainID, resSeq, resName]]
+        elif not isinstance(res[0],list):
+            res = [res]            
+
+        # make sure that we have a list of possible centers
+        if not isinstance(centers,list):
+            centers = list(centers)
+
+        xyz = []
+
+        for r in res:
+
+            for ctr in centers:
+
+                if ctr in ['CB','CA']:
+
+                    xyz_res = sql.get('x,y,z', 
+                                      chainID=r[0],
+                                      resSeq=r[1],
+                                      resName=r[2],
+                                      name=ctr)
+
+                elif ctr == 'mean':
+                    xyz_res = [np.mean(sql.get('x,y,z',
+                                       chainID=r[0],
+                                       resSeq=r[1],
+                                       resName=r[2]),axis=0).tolist()]
+
+                else:
+                    raise ValueError('Center %s not recognized' %c)
+
+                if len(xyz_res) == 0:
+                    continue
+
+                elif len(xyz_res) == 1:
+                    xyz.append(xyz_res[0])
+                    break
+
+                else:
+                    raise ValueError('Residue center not found')
+
+        if len(xyz) == 0:
+            raise ValueError('Center not found')
+
+        return res, xyz
+
diff --git a/deeprank/features/FullPSSM.py b/deeprank/features/FullPSSM.py
@@ -166,12 +166,13 @@ def get_feature_value(self, cutoff=5.5):
         sql = pdb2sql.interface(self.pdb_file)
 
         # set achors for all residues and get their xyz
-        xyz_info = sql.get('chainID,resSeq,resName', name='CB')
-        xyz_info += sql.get('chainID,resSeq,resName', name='CA',
-                            resName='GLY')
+        # xyz_info = sql.get('chainID,resSeq,resName', name='CB')
+        # xyz_info += sql.get('chainID,resSeq,resName', name='CA',
+        #                     resName='GLY')
+        # xyz = sql.get('x,y,z', name='CB')
+        # xyz += sql.get('x,y,z', name='CA', resName='GLY')
 
-        xyz = sql.get('x,y,z', name='CB')
-        xyz += sql.get('x,y,z', name='CA', resName='GLY')
+        xyz_info, xyz = self.get_residue_center(sql)
 
         xyz_dict = {}
         for pos, info in zip(xyz, xyz_info):
@@ -212,14 +213,16 @@ def get_feature_value(self, cutoff=5.5):
                 f"{self.mol_name}: The following interface residues have "
                 f" no pssm value:\n {ctc_res_wo_pssm}"
             )
-        elif len(pssm_res_set.difference(ctc_res_set)) > 0:
-            # can happen if CA/CB is missing in the res
-            pssm_res_wo_ctc = pssm_res_set.difference(ctc_res_set)
-            ctc_res_with_pssm = pssm_res_set.intersection(ctc_res_set)
-            warnings.warn(
-                f"{self.mol_name}: The following interface residues have "
-                f" a CA or CB missing :\n {pssm_res_wo_ctc}"
-            )
+
+        # elif len(pssm_res_set.difference(ctc_res_set)) > 0:
+        #     # can happen if CA/CB is missing in the res
+        #     pssm_res_wo_ctc = pssm_res_set.difference(ctc_res_set)
+        #     ctc_res_with_pssm = pssm_res_set.intersection(ctc_res_set)
+        #     warnings.warn(
+        #         f"{self.mol_name}: The following interface residues have "
+        #         f" a CA or CB missing :\n {pssm_res_wo_ctc}"
+        #     )
+
         else:
             ctc_res_with_pssm = ctc_res
 

diff --git a/deeprank/features/ResidueDensity.py b/deeprank/features/ResidueDensity.py
@@ -123,21 +123,24 @@ def extract_features(self):
             # total density in raw format
             self.feature_data['RCD_total'][key] = [res.density['total']]
 
-            # get the type of the center
-            atcenter = 'CB'
-            if key[2] == 'GLY':
-                atcenter = 'CA'
-
-            # get the xyz of the center atom
-            try:
-                xyz = self.sql.get(
-                    'x,y,z', resSeq=key[1], chainID=key[0], name=atcenter)[0]
-            except IndexError :
-                warnings .warn('Atom ', atcenter, ' not found for residue ', key[1], \
-                               '. Use residue center as feature center')
-                xyz = np.mean(self.sql.get('x,y,z',resSeq=key[1],chainID=key[0]),0).tolist()
-
-            xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz)
+            # # get the type of the center
+            # atcenter = 'CB'
+            # if key[2] == 'GLY':
+            #     atcenter = 'CA'
+
+            # # get the xyz of the center atom
+            # try:
+            #     xyz = self.sql.get(
+            #         'x,y,z', resSeq=key[1], chainID=key[0], name=atcenter)[0]
+            # except IndexError :
+            #     warnings.warn('Atom ', atcenter, ' not found for residue ', key[1], \
+            #                    '. Use residue center as feature center')
+            #     xyz = np.mean(self.sql.get('x,y,z',resSeq=key[1],chainID=key[0]),0).tolist()
+            # xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz)
+
+            _, xyz = self.get_residue_center(self.sql, res=key)
+            xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz[0])
+
             self.feature_data_xyz['RCD_total'][xyz_key] = [
                 res.density['total']]
 

diff --git a/deeprank/generate/DataGenerator.py b/deeprank/generate/DataGenerator.py
@@ -251,7 +251,7 @@ def create_database(
 
             # names of the molecule
             mol_name = os.path.splitext(os.path.basename(cplx))[0]
-            mol_name = mol_name.replace('-', '_')
+            #mol_name = mol_name.replace('-', '_')
             mol_aug_name_list = []
 
             try:

diff --git a/test/test_generate.py b/test/test_generate.py
@@ -262,9 +262,9 @@ def test_7_realign(self):
     # unittest.main()
     inst = TestGenerateData()
     inst.test_1_generate()
-    inst.test_1_generate_mapfly()
-    inst.test_3_add_unique_target()
-    inst.test_4_add_feature()
-    inst.test_5_align()
-    inst.test_6_align_interface()
-    inst.test_7_realign()
+    # inst.test_1_generate_mapfly()
+    # inst.test_3_add_unique_target()
+    # inst.test_4_add_feature()
+    # inst.test_5_align()
+    # inst.test_6_align_interface()
+    # inst.test_7_realign()