Merge aebf829 into ff10817

DeepRank · Nov 25, 2019 · b7a9495 · b7a9495
2 parents ff10817 + aebf829
commit b7a9495
Show file tree

Hide file tree

Showing 24 changed files with 158 additions and 3,082 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,9 +21,8 @@ example/*.hdf5
 example/*.pdb
 
 # some test file
-test/out_2d
-test/out_3d
-test/out_3d_class
+test/out_2d*
+test/out_3d*
 test/out_test
 test/*.pckl
 test/*.hdf5

diff --git a/.travis.yml b/.travis.yml
@@ -19,7 +19,7 @@ before_install:
  # pytest
   - conda install -c anaconda pytest
   - conda install -c conda-forge pytest-cov
-  - conda install python=3.6
+  - conda install python=3.7
 
   # codacy-coverage
   - pip install -q --upgrade pip

diff --git a/README.md b/README.md
@@ -3,35 +3,35 @@
 **Deep Learning for ranking protein-protein conformations**
 
 [![Build Status](https://secure.travis-ci.org/DeepRank/deeprank.svg?branch=master)](https://travis-ci.org/DeepRank/deeprank)
-[![Codacy Badge](https://api.codacy.com/project/badge/Grade/9252e59633cf46a7ada0c3c614c175ea)](https://www.codacy.com/app/NicoRenaud/deeprank?utm_source=github.com&amp;utm_medium=referral&amp;utm_content=DeepRank/deeprank&amp;utm_campaign=Badge_Grade)
+[![Codacy Badge](https://api.codacy.com/project/badge/Grade/9252e59633cf46a7ada0c3c614c175ea)](https://www.codacy.com/app/NicoRenaud/deeprank?utm_source=github.com&utm_medium=referral&utm_content=DeepRank/deeprank&utm_campaign=Badge_Grade)
 [![Documentation Status](https://readthedocs.org/projects/deeprank/badge/?version=latest)](http://deeprank.readthedocs.io/?badge=latest)
 [![Coverage Status](https://coveralls.io/repos/github/DeepRank/deeprank/badge.svg?branch=master)](https://coveralls.io/github/DeepRank/deeprank?branch=master)
 
 The documentation of the module can be found on readthedocs :
-http://deeprank.readthedocs.io/en/latest/
+<http://deeprank.readthedocs.io/en/latest/>
 
 ![alt-text](./pics/deeprank.png)
 
 ## 1 . Installation
 
 Minimal information to install the module
 
-  * clone the repository `git clone https://github.com/DeepRank/deeprank.git`
-  * go there             `cd deeprank`
-  * install the module   `pip install -e ./`
-  * go int the test dir `cd test`
-  * run the test suite `pytest`
-
+-   clone the repository `git clone https://github.com/DeepRank/deeprank.git`
+-   go there             `cd deeprank`
+-   install the module   `pip install -e ./`
+-   go int the test dir `cd test`
+-   run the test suite `pytest`
 
 ## 2 . Tutorial
 
-We give here the tutorial like introduction to the DeepRank machinery. More informatoin can be found in the documentation http://deeprank.readthedocs.io/en/latest/.  We quickly illsutrate here the two main steps of Deeprank :
- * the generation of the data
- * running deep leaning experiments.
+We give here the tutorial like introduction to the DeepRank machinery. More informatoin can be found in the documentation <http://deeprank.readthedocs.io/en/latest/>.  We quickly illsutrate here the two main steps of Deeprank :
+
+-   the generation of the data
+-   running deep leaning experiments.
 
 ### A . Generate the data set (using MPI)
 
-The generation of the data require only require PDBs files of decoys and their native and the PSSM if needed. All the features/targets and mapped features onto grid points will be auomatically calculated and store in a HDF5 file. 
+The generation of the data require only require PDBs files of decoys and their native and the PSSM if needed. All the features/targets and mapped features onto grid points will be auomatically calculated and store in a HDF5 file.
 
 ```python
 from deeprank.generate import *
@@ -79,39 +79,36 @@ grid_info = {
 This script can be exectuted using for example 4 MPI processes with the command:
 
 ```
-NP=4
-mpiexec -n $NP python generate.py
+    NP=4
+    mpiexec -n $NP python generate.py
 ```
 
-
-In  the first part of the script we define the path where to find the PDBs of the decoys and natives that we want to have in the dataset. All the .pdb files present in *pdb_source* will be used in the dataset. We need to specify where to find the native conformations to be able to compute RMSD and the dockQ score. For each pdb file detected in *pdb_source*, the code will try to find a native conformation in *pdb_native*.
+In  the first part of the script we define the path where to find the PDBs of the decoys and natives that we want to have in the dataset. All the .pdb files present in _pdb_source_ will be used in the dataset. We need to specify where to find the native conformations to be able to compute RMSD and the dockQ score. For each pdb file detected in _pdb_source_, the code will try to find a native conformation in _pdb_native_.
 
 We then initialize the `DataGenerator` object. This object (defined in `deeprank/generate/DataGenerator.py`) needs a few input parameters:
 
-  * pdb_source : where to find the pdb to include in the dataset
-  * pdb_native : where to find the corresponding native conformations
-  * compute_targets : list of modules used to compute the targets
-  * compute_features : list of modules used to compute the features
-  * hdf5 : Name of the HDF5 file to store the data set
+-   pdb_source : where to find the pdb to include in the dataset
+-   pdb_native : where to find the corresponding native conformations
+-   compute_targets : list of modules used to compute the targets
+-   compute_features : list of modules used to compute the features
+-   hdf5 : Name of the HDF5 file to store the data set
 
 We then create the data base with the command `database.create_database()`. This function autmatically create an HDF5 files where each pdb has its own group. In each group we can find the pdb of the complex and its native form, the calculated features and the calculated targets. We can now mapped the features to a grid. This is done via the command `database.map_features()`. As you can see this method requires a dictionary as input. The dictionary contains the instruction to map the data.
 
-  * number_of_points: the number of points in each direction
-  * resolution : the resolution in Angs
-  * atomic_densities : {'atom_name' : vvdw_radius} the atomic densities required
+-   number_of_points: the number of points in each direction
+-   resolution : the resolution in Angs
+-   atomic_densities : {'atom_name' : vvdw_radius} the atomic densities required
 
 The atomic densities are mapped following the [protein-ligand paper](https://arxiv.org/abs/1612.02751). The other features are mapped to the grid points using a Gaussian function (other modes are possible but somehow hard coded)
 
 #### Visualization of the mapped features
 
-To explore the HDf5 file and vizualize the features you can use the dedicated browser https://github.com/DeepRank/DeepXplorer. This tool saloows to dig through the hdf5 file and to directly generate the files required to vizualie the features in VMD or PyMol. An iPython comsole is also embedded to analyze the feature values, plot them etc ....
-
+To explore the HDf5 file and vizualize the features you can use the dedicated browser <https://github.com/DeepRank/DeepXplorer>. This tool saloows to dig through the hdf5 file and to directly generate the files required to vizualie the features in VMD or PyMol. An iPython comsole is also embedded to analyze the feature values, plot them etc ....
 
 ### B . Deep Learning
 
 The HDF5 files generated above can be used as input for deep learning experiments. You can take a look at the file `test/test_learn.py` for some examples. We give here a quick overview of the process.
 
-
 ```python
 from deeprank.learn import *
 from deeprank.learn.model3d import cnn as cnn3d
@@ -145,9 +142,6 @@ model.optimizer = optim.SGD(model.net.parameters(),
 model.train(nepoch = 50,divide_trainset=0.8, train_batch_size = 5,num_workers=0)
 ```
 
-
-
 In the first part of the script we create a Torch database from the HDF5 file. We can specify one or several HDF5 files and even select some conformations using the `dict_filter` argument. Other options of `DataSet` can be used to specify the features/targets the normalization, etc ...
 
 We then create a `NeuralNet` instance that takes the dataset as input argument. Several options are available to specify the task to do, the GPU use, etc ... We then have simply to train the model. Simple !
-
diff --git a/deeprank/features/AtomicFeature.py b/deeprank/features/AtomicFeature.py
@@ -2,9 +2,9 @@
 import warnings
 
 import numpy as np
+import pdb2sql
 
 from deeprank.features import FeatureClass
-from deeprank.tools import pdb2sql
 
 
 class AtomicFeature(FeatureClass):
@@ -81,7 +81,7 @@ def __init__(self, pdbfile, param_charge=None, param_vdw=None,
         self.atom_key = 'chainID, resSeq, resName, name'
 
         # read the pdb as an sql
-        self.sqldb = pdb2sql(self.pdbfile)
+        self.sqldb = pdb2sql.pdb2sql(self.pdbfile)
 
         # read the force field
         self.read_charge_file()

diff --git a/deeprank/features/BSA.py b/deeprank/features/BSA.py
@@ -1,7 +1,8 @@
 import warnings
 
+import pdb2sql
+
 from deeprank.features import FeatureClass
-from deeprank.tools import pdb2sql
 
 try:
     import freesasa
@@ -33,7 +34,7 @@ def __init__(self, pdb_data, chainA='A', chainB='B'):
         >>> bsa.sql.close()
         """
         self.pdb_data = pdb_data
-        self.sql = pdb2sql(pdb_data)
+        self.sql = pdb2sql.interface(pdb_data)
         self.chains_label = [chainA, chainB]
 
         self.feature_data = {}
@@ -83,9 +84,8 @@ def get_contact_residue_sasa(self, cutoff=5.5):
         self.bsa_data = {}
         self.bsa_data_xyz = {}
 
-        # res = ([chain1 residues], [chain2 residues])
-        ctc_res = self.sql.get_contact_residue(cutoff=cutoff)
-        ctc_res = ctc_res[0] + ctc_res[1]
+        ctc_res = self.sql.get_contact_residues(cutoff=cutoff)
+        ctc_res = ctc_res["A"] + ctc_res["B"]
 
         # handle with small interface or no interface
         total_res = len(ctc_res)

diff --git a/deeprank/features/FullPSSM.py b/deeprank/features/FullPSSM.py
@@ -2,10 +2,10 @@
 import warnings
 
 import numpy as np
+import pdb2sql
 
 from deeprank import config
 from deeprank.features import FeatureClass
-from deeprank.tools import pdb2sql
 
 ########################################################################
 #
@@ -163,7 +163,7 @@ def read_PSSM_data(self):
     def get_feature_value(self, cutoff=5.5):
         """get the feature value."""
 
-        sql = pdb2sql(self.pdb_file)
+        sql = pdb2sql.interface(self.pdb_file)
 
         # set achors for all residues and get their xyz
         xyz_info = sql.get('chainID,resSeq,resName', name='CB')
@@ -178,10 +178,10 @@ def get_feature_value(self, cutoff=5.5):
             xyz_dict[tuple(info)] = pos
 
         # get interface contact residues
-        # ctc_res = ([chain 1 residues], [chain2 residues])
-        ctc_res = sql.get_contact_residue(cutoff=cutoff)
+        # ctc_res = {"A":[chain 1 residues], "B": [chain2 residues]}
+        ctc_res = sql.get_contact_residues(cutoff=cutoff)
         sql.close()
-        ctc_res = ctc_res[0] + ctc_res[1]
+        ctc_res = ctc_res["A"] + ctc_res["B"]
 
         # handle with small interface or no interface
         total_res = len(ctc_res)

diff --git a/deeprank/features/NaivePSSM.py b/deeprank/features/NaivePSSM.py
@@ -2,9 +2,10 @@
 from time import time
 
 import numpy as np
+import pdb2sql
 
 from deeprank.features import FeatureClass
-from deeprank.tools import SASA, pdb2sql
+from deeprank.tools import SASA
 
 
 def printif(string, cond): return print(string) if cond else None
@@ -148,7 +149,7 @@ def _smooth_pssm(pssm_data, msmooth=3):
     def get_feature_value(self, contact_only=True):
         """get the feature value."""
 
-        sql = pdb2sql(self.pdbfile)
+        sql = pdb2sql.interface(self.pdbfile)
         xyz_info = sql.get('chainID,resSeq,resName', name='CB')
         xyz = sql.get('x,y,z', name='CB')
 
@@ -157,7 +158,7 @@ def get_feature_value(self, contact_only=True):
             xyz_dict[tuple(info)] = pos
 
         contact_residue = sql.get_contact_residue(cutoff=5.5)
-        contact_residue = contact_residue[0] + contact_residue[1]
+        contact_residue = contact_residue["A"] + contact_residue["B"]
         sql.close()
 
         pssm_data_xyz = {}

diff --git a/deeprank/features/ResidueDensity.py b/deeprank/features/ResidueDensity.py
@@ -1,8 +1,8 @@
 import itertools
 import warnings
+import pdb2sql
 
 from deeprank.features import FeatureClass
-from deeprank.tools import pdb2sql
 from deeprank import config
 
 
@@ -23,7 +23,7 @@ def __init__(self, pdb_data, chainA='A', chainB='B'):
         """
 
         self.pdb_data = pdb_data
-        self.sql = pdb2sql(pdb_data)
+        self.sql = pdb2sql.interface(pdb_data)
         self.chains_label = [chainA, chainB]
 
         self.feature_data = {}
@@ -40,7 +40,7 @@ def get(self, cutoff=5.5):
         # res = {('chainA,resSeq,resName'): set(
         #                               ('chainB,res1Seq,res1Name),
         #                               ('chainB,res2Seq,res2Name'))}
-        res = self.sql.get_contact_residue(chain1=self.chains_label[0],
+        res = self.sql.get_contact_residues(chain1=self.chains_label[0],
                                            chain2=self.chains_label[1],
                                            cutoff=cutoff,
                                            return_contact_pairs=True)