DeepRank · CunliangGeng · Oct 23, 2019 · Sep 20, 2019 · Oct 1, 2019 · Oct 1, 2019
diff --git a/deeprank/features/AtomicFeature.py b/deeprank/features/AtomicFeature.py
@@ -630,6 +630,8 @@ def evaluate_pair_interaction(self, print_interactions=False,
 
             # store in matrix form so that
             # we don't have to recalculate for B
+            # here assumes that the chainID order is A,B...
+            # otherwise rowID will be different with the matrix index
             indb_matrix = [i - natA for i in indsB]
             matrix_elec[iA, indb_matrix] = ec
             matrix_vdw[iA, indb_matrix] = evdw

diff --git a/deeprank/features/FeatureClass.py b/deeprank/features/FeatureClass.py
@@ -71,7 +71,11 @@ def export_data_hdf5(self, featgrp):
                 # append
                 ds.append(feat)
 
-            ds = np.array(ds).astype('|S' + str(len(ds[0])))
+            if ds:
+                ds = np.array(ds).astype('|S' + str(len(ds[0])))
+            else:
+                ds = np.array(ds)
+
 
             # create the dataset
             if name + '_raw' in featgrp:

diff --git a/deeprank/features/FullPSSM.py b/deeprank/features/FullPSSM.py
@@ -48,16 +48,17 @@ def __init__(self, mol_name=None, pdb_file=None, pssm_path=None,
         self.mol_name = mol_name
         self.pdb_file = pdb_file
         self.pssm_path = pssm_path
-        self.ref_mol_name = self.get_ref_mol_name(mol_name)
         self.pssm_format = pssm_format
         self.out_type = out_type.lower()
 
         if isinstance(pdb_file, str) and mol_name is None:
-            self.mol_name = os.path.splitext(pdb_file)[0]
+            self.mol_name = os.path.basename(pdb_file).split('.')[0]
+
+        self.ref_mol_name = self.get_ref_mol_name(self.mol_name)
 
         if self.out_type == 'pssmic' and not self.pssm_format == 'new':
             raise ValueError(f"You must provide 'new' format PSSM files"
-                             f" to generate PSSM IC features.")
+                             f" to generate PSSM IC features for {self.mol_name}")
 
         if self.out_type == 'pssmvalue':
             # the residue order in res_names must be consistent with
@@ -74,6 +75,7 @@ def __init__(self, mol_name=None, pdb_file=None, pssm_path=None,
             self.feature_data[name] = {}
             self.feature_data_xyz[name] = {}
 
+
     @staticmethod
     def get_ref_mol_name(mol_name):
         """Get the bared mol name."""
@@ -83,7 +85,10 @@ def read_PSSM_data(self):
         """Read the PSSM data into a dictionary."""
 
         names = os.listdir(self.pssm_path)
-        fnames = list(filter(lambda x: self.ref_mol_name in x, names))
+        fnames = list(filter(lambda x: self.mol_name in x, names))
+        # if decoy pssm files not exist, use reference pssm files
+        if not fnames:
+            fnames = list(filter(lambda x: self.ref_mol_name in x, names))
         num_pssm_files = len(fnames)
 
         if num_pssm_files == 0:
@@ -113,7 +118,7 @@ def read_PSSM_data(self):
                                 for r in self.pssm_res_id]
             self.pssm_data = np.array(raw_data)[:, 3:].astype(np.float)
 
-        # new format with 2 files (each chain has one file)
+        # new format with ≥2 files (each chain has one file)
         # and aligned mapping and IC (i.e. the iScore format)
         elif self.pssm_format == 'new':
 
@@ -182,28 +187,29 @@ def get_feature_value(self, cutoff=5.5):
         total_res = len(ctc_res)
         if total_res == 0:
             raise ValueError(
-                f"No interface residue found with the cutoff {cutoff}Å."
-                f" Failed to calculate the features of FullPSSM/PSSM_IC")
+                f"{self.mol_name}: No interface residue found with the "
+                f"cutoff {cutoff}Å."
+                f" Failed to calculate the features of FullPSSM/PSSM_IC.")
         elif total_res < 5:  # this is an empirical value
             warnings.warn(
-                f"Only {total_res} interface residues found with "
-                f"cutoff {cutoff}Å. Be careful with using the features "
-                f" FullPSSM/PSSM_IC")
+                f"{self.mol_name}: Only {total_res} interface residues found"
+                f" with cutoff {cutoff}Å. Be careful with"
+                f" using the features FullPSSM/PSSM_IC")
 
         # check if interface residues have pssm values
         ctc_res_set = set(ctc_res)
         pssm_res_set = set(self.pssm.keys())
         if len(ctc_res_set.intersection(pssm_res_set)) == 0:
             raise ValueError(
-                f"All interface residues have no pssm values."
-                f"Check residue chainID/ID/name consistency "
+                f"{self.mol_name}: All interface residues have no pssm values."
+                f" Check residue chainID/ID/name consistency "
                 f"between PDB and PSSM files"
             )
         elif len(ctc_res_set.difference(pssm_res_set)) > 0:
             ctc_res_wo_pssm = ctc_res_set.difference(pssm_res_set)
             ctc_res_with_pssm = ctc_res_set - ctc_res_wo_pssm
             warnings.warn(
-                f"The following interface residues have "
+                f"{self.mol_name}: The following interface residues have "
                 f" no pssm value:\n {ctc_res_wo_pssm}"
             )
         else:
@@ -267,12 +273,13 @@ def __compute_feature__(pdb_data, featgrp, featgrp_raw, out_type='pssmvalue'):
     t0 = time()
     base_path = os.path.dirname(os.path.dirname(os.path.dirname(
         os.path.realpath(__file__))))
-    pdb_file = os.path.join(base_path, "test/1AK4/native/1AK4.pdb")
+    # pdb_file = os.path.join(base_path, "test/1AK4/native/1AK4.pdb")
+    pdb_file = os.path.join(base_path, "test/1AK4/decoys/1AK4_cm-itw_238w.pdb")
     path = os.path.join(base_path, "test/1AK4/pssm_new")
 
     # pssm = FullPSSM(mol_name='1AK4', pdb_file=pdb_file, pssm_path=path,
     #                 pssm_format='new', out_type='pssmic')
-    pssm = FullPSSM(mol_name='1AK4', pdb_file=pdb_file, pssm_path=path,
+    pssm = FullPSSM(pdb_file=pdb_file, pssm_path=path,
                     pssm_format='new', out_type='pssmvalue')
 
     # get the pssm smoothed sum score

diff --git a/deeprank/learn/NeuralNet.py b/deeprank/learn/NeuralNet.py
@@ -639,12 +639,18 @@ def _train(self, index_train, index_valid, index_test,
             logger.info(f'\n: epoch {epoch:03d} / {nepoch:03d} {"-"*45}')
             t0 = time.time()
 
+            # train the model
+            logger.info(f"\n\t=> train the model\n")
+            train_loss, self.data['train'] = self._epoch(
+                train_loader, train_model=True)
+            self.losses['train'].append(train_loss)
+            if self.save_classmetrics:
+                for i in self.metricnames:
+                    self.classmetrics[i]['train'].append(self.data['train'][i])
+
             # validate the model
             if _valid_:
-
-                sys.stdout.flush()
                 logger.info(f"\n\t=> validate the model\n")
-
                 valid_loss, self.data['valid'] = self._epoch(
                     valid_loader, train_model=False)
                 self.losses['valid'].append(valid_loss)
@@ -655,9 +661,7 @@ def _train(self, index_train, index_valid, index_test,
 
             # test the model
             if _test_:
-                sys.stdout.flush()
                 logger.info(f"\n\t=> test the model\n")
-
                 test_loss, self.data['test'] = self._epoch(
                     test_loader, train_model=False)
                 self.losses['test'].append(test_loss)
@@ -666,16 +670,6 @@ def _train(self, index_train, index_valid, index_test,
                         self.classmetrics[i]['test'].append(
                             self.data['test'][i])
 
-            # train the model
-            sys.stdout.flush()
-            logger.info(f"\n\t=> train the model\n")
-            train_loss, self.data['train'] = self._epoch(
-                train_loader, train_model=True)
-            self.losses['train'].append(train_loss)
-            if self.save_classmetrics:
-                for i in self.metricnames:
-                    self.classmetrics[i]['train'].append(self.data['train'][i])
-
             # talk a bit about losse
             logger.info(f'\n  train loss       : {train_loss:1.3e}')
             if _valid_:

diff --git a/deeprank/tools/pdb2sql.py b/deeprank/tools/pdb2sql.py
@@ -655,7 +655,7 @@ def get_contact_atoms(self,
 
         # if no atoms were found
         if len(index_contact_1) == 0:
-            raise ValueError(f"No contact atoms found with cutoff {cutoff}Å")
+            warnings.warn(f"No contact atoms found with cutoff {cutoff}Å")
 
         # extend the list to entire residue
         if extend_to_residue: