Merge 0236e62 into 6375c88

DeepRank · Jan 11, 2021 · d36f034 · d36f034
2 parents 6375c88 + 0236e62
commit d36f034
Show file tree

Hide file tree

Showing 97 changed files with 329 additions and 30,753 deletions.
diff --git a/.gitignore b/.gitignore
@@ -44,23 +44,22 @@ test/*.pdb
 *.ref_pairs
 
 # docs
-#docs/_build
-#docs/_static
+docs/_build
+# docs/_static
 #docs/_templates
 
 
 # vscode setting
 .vscode
 
-.DS_Store
 test/1AK4/atomic_features/test_1AK4_100w.dat
 test/2OUL/atomic_features/test_2OUL_1.dat
 test/atomic_pair_interaction.dat
 
 # Mac OSX files
-.DS_Store
+*.DS_Store
 
 # test coverage
 htmlcov
 coverage.xml
-.coverage
+.coverage
diff --git a/README.md b/README.md
@@ -19,12 +19,17 @@ The documentation of the module can be found on readthedocs :
 
 Minimal information to install the module
 
+Installation with pypi:
 
--   clone the repository `git clone https://github.com/DeepRank/deeprank.git`
--   go there             `cd deeprank`
--   install the module   `pip install -e ./`
--   go int the test dir `cd test`
--   run the test suite `pytest`
+-   Install the module `pip install deeprank`
+
+Installation from GitHub repository:
+
+-   Clone the repository `git clone https://github.com/DeepRank/deeprank.git`
+-   Go there             `cd deeprank`
+-   Install the module   `pip install -e ./`
+-   Go into the test dir `cd test`
+-   Run the test suite `pytest`
 
 
 ## 2 . Tutorial
@@ -39,46 +44,64 @@ We give here the tutorial like introduction to the DeepRank machinery. More info
 The generation of the data require only require PDBs files of decoys and their native and the PSSM if needed. All the features/targets and mapped features onto grid points will be auomatically calculated and store in a HDF5 file.
 
 ```python
-from deeprank.generate import *
-from mpi4py import MPI
-
-comm = MPI.COMM_WORLD
-
-# adress of the BM4 folder
-BM4 = '/path/to/BM4/data/'
-
-# sources to assemble the data base
-pdb_source     = ['./1AK4/decoys/']
-pdb_native     = ['./1AK4/native/']
-pssm_source    = ['./1AK4/pssm_new/']
-
-# output file
-h5file = './1ak4.hdf5'
-
-#init the data assembler
-database = DataGenerator(pdb_source=pdb_source,
-                         pdb_native=pdb_native,
-                         pssm_source=pssm_source,
-                         data_augmentation = 1,
-                         compute_targets  = ['deeprank.targets.dockQ','deeprank.targets.binary_class'],
-                         compute_features = ['deeprank.features.AtomicFeature',
-                                             'deeprank.features.FullPSSM',
-                                             'deeprank.features.PSSM_IC',
-                                             'deeprank.features.BSA',
-                                             'deeprank.features.ResidueDensity'],
-                         hdf5=h5file,mpi_comm=comm)
-
-#create new files
-database.create_database(prog_bar=True)
-
-# map the features
-grid_info = {
-  'number_of_points' : [30,30,30],
-  'resolution' : [1.,1.,1.],
-  'atomic_densities' : {'CA':3.5,'N':3.5,'O':3.5,'C':3.5},
-}
-
- database.map_features(grid_info,try_sparse=True,time=False,prog_bar=True)
+from deeprank.generate import * 
+from mpi4py import MPI 
+
+comm = MPI.COMM_WORLD 
+
+# name of the hdf5 to generate 
+h5file = './hdf5/1ak4.hdf5' 
+
+# for each hdf5 file where to find the pdbs 
+pdb_source = ['../test/1AK4/decoys/'] 
+
+
+# where to find the native conformations 
+# pdb_native is only used to calculate i-RMSD, dockQ and so on. 
+# The native pdb files will not be saved in the hdf5 file 
+pdb_native = ['../test/1AK4/native/'] 
+
+
+# where to find the pssm 
+pssm_source = '../test/1AK4/pssm_new/' 
+
+
+# initialize the database 
+database = DataGenerator(
+    chain1='C', chain2='D', 
+    pdb_source=pdb_source, 
+    pdb_native=pdb_native, 
+    pssm_source=pssm_source, 
+    data_augmentation=0, 
+    compute_targets=[ 
+        'deeprank.targets.dockQ', 
+        'deeprank.targets.binary_class'], 
+    compute_features=[ 
+        'deeprank.features.AtomicFeature', 
+        'deeprank.features.FullPSSM', 
+        'deeprank.features.PSSM_IC', 
+        'deeprank.features.BSA', 
+        'deeprank.features.ResidueDensity'], 
+    hdf5=h5file, 
+    mpi_comm=comm) 
+
+
+# create the database 
+# compute features/targets for all complexes 
+print('{:25s}'.format('Create new database') + database.hdf5) 
+database.create_database(prog_bar=True) 
+
+
+# define the 3D grid 
+ grid_info = { 
+   'number_of_points' : [30,30,30], 
+   'resolution' : [1.,1.,1.], 
+   'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8}, 
+ } 
+
+# Map the features 
+database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True) 
+
 ```
 
 This script can be exectuted using for example 4 MPI processes with the command:
@@ -116,25 +139,34 @@ The HDF5 files generated above can be used as input for deep learning experiment
 
 ```python
 from deeprank.learn import *
-from deeprank.learn.model3d import cnn as cnn3d
+from deeprank.learn.model3d import cnn_reg
 import torch.optim as optim
+import numpy as np
 
 # input database
 database = '1ak4.hdf5'
 
+# output directory
+out = './my_DL_test/'
+
 # declare the dataset instance
 data_set = DataSet(database,
-            grid_shape=(30,30,30),
-            select_feature={'AtomicDensities_ind' : 'all',
-                            'Feature_ind' : ['coulomb','vdwaals','charge','pssm'] },
+            chain1='C',
+            chain2='D',
+            grid_info={
+                'number_of_points': (10, 10, 10),
+                'resolution': (3, 3, 3)},
+            select_feature={
+                'AtomicDensities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
+                'Features': ['coulomb', 'vdwaals', 'charge', 'PSSM_*']},
             select_target='DOCKQ',
             normalize_features = True, normalize_targets=True,
             pair_chain_feature=np.add,
-            dict_filter={'IRMSD':'<4. or >10.'})
+            dict_filter={'DOCKQ':'<1'})
 
 
-# create the networkt
-model = NeuralNet(data_set,cnn3d,model_type='3d',task='reg',
+# create the network
+model = NeuralNet(data_set,cnn_reg,model_type='3d',
                   cuda=False,plot=True,outdir=out)
 
 # change the optimizer (optional)

diff --git a/deeprank/features/AtomicFeature.py b/deeprank/features/AtomicFeature.py
@@ -44,28 +44,28 @@ def __init__(self, pdbfile, chain1='A', chain2='B', param_charge=None,
             verbose (bool): print or not.
 
         Examples:
-        >>> pdb = '1AK4_100w.pdb'
-        >>>
-        >>> # get the force field included in deeprank
-        >>> # if another FF has been used to compute the ref
-        >>> # change also this path to the correct one
-        >>> FF = pkg_resources.resource_filename(
-        >>>     'deeprank.features','') + '/forcefield/'
-        >>>
-        >>> # declare the feature calculator instance
-        >>> atfeat = AtomicFeature(pdb,
-        >>>    param_charge = FF + 'protein-allhdg5-4_new.top',
-        >>>    param_vdw    = FF + 'protein-allhdg5-4_new.param',
-        >>>    patch_file   = FF + 'patch.top')
-        >>>
-        >>> # assign parameters
-        >>> atfeat.assign_parameters()
-        >>>
-        >>> # only compute the pair interactions here
-        >>> atfeat.evaluate_pair_interaction(save_interactions=test_name)
-        >>>
-        >>> # close the db
-        >>> atfeat.sqldb._close()
+            >>> pdb = '1AK4_100w.pdb'
+            >>>
+            >>> # get the force field included in deeprank
+            >>> # if another FF has been used to compute the ref
+            >>> # change also this path to the correct one
+            >>> FF = pkg_resources.resource_filename(
+            >>>     'deeprank.features','') + '/forcefield/'
+            >>>
+            >>> # declare the feature calculator instance
+            >>> atfeat = AtomicFeature(pdb,
+            >>>    param_charge = FF + 'protein-allhdg5-4_new.top',
+            >>>    param_vdw    = FF + 'protein-allhdg5-4_new.param',
+            >>>    patch_file   = FF + 'patch.top')
+            >>>
+            >>> # assign parameters
+            >>> atfeat.assign_parameters()
+            >>>
+            >>> # only compute the pair interactions here
+            >>> atfeat.evaluate_pair_interaction(save_interactions=test_name)
+            >>>
+            >>> # close the db
+            >>> atfeat.sqldb._close()
         """
 
         super().__init__("Atomic")
@@ -975,7 +975,7 @@ def __compute_feature__(pdb_data, featgrp, featgrp_raw, chain1, chain2):
     atfeat.assign_parameters()
     atfeat.evaluate_pair_interaction()
     atfeat.evaluate_charges(extend_contact_to_residue=True)
-    atfeat.sqldb.close()
+    atfeat.sqldb._close()
 
     # export in the hdf5 file
     pprint(atfeat.feature_data)

diff --git a/deeprank/features/BSA.py b/deeprank/features/BSA.py
@@ -22,16 +22,16 @@ def __init__(self, pdb_data, chain1='A', chain2='B'):
         as a separate module. They can be installed using
         >>> pip install freesasa
 
-        Args :
+        Args:
             pdb_data (list(byte) or str): pdb data or pdb filename
             chain1 (str, optional): name of the first chain
             chain2 (str, optional): name of the second chain
 
-        Example :
-        >>> bsa = BSA('1AK4.pdb')
-        >>> bsa.get_structure()
-        >>> bsa.get_contact_residue_sasa()
-        >>> bsa.sql._close()
+        Example:
+            >>> bsa = BSA('1AK4.pdb')
+            >>> bsa.get_structure()
+            >>> bsa.get_contact_residue_sasa()
+            >>> bsa.sql._close()
         """
         self.pdb_data = pdb_data
         self.sql = pdb2sql.interface(pdb_data)

diff --git a/deeprank/features/ResidueDensity.py b/deeprank/features/ResidueDensity.py
@@ -16,10 +16,10 @@ def __init__(self, pdb_data, chain1='A', chain2='B'):
             chain1 (str): First chain ID. Defaults to 'A'
             chain2 (str): Second chain ID. Defaults to 'B'
 
-        Example :
-        >>> rcd = ResidueDensity('1EWY_100w.pdb')
-        >>> rcd.get(cutoff=5.5)
-        >>> rcd.extract_features()
+        Example:
+            >>> rcd = ResidueDensity('1EWY_100w.pdb')
+            >>> rcd.get(cutoff=5.5)
+            >>> rcd.extract_features()
         """
 
         self.pdb_data = pdb_data

diff --git a/deeprank/generate/DataGenerator.py b/deeprank/generate/DataGenerator.py
@@ -67,23 +67,25 @@ def __init__(self, chain1, chain2,
 
         Example :
 
-        >>> from deeprank.generate import *
-        >>> # sources to assemble the data base
-        >>> pdb_source     = ['./1AK4/decoys/']
-        >>> pdb_native     = ['./1AK4/native/']
-        >>> h5file = '1ak4.hdf5'
-        >>>
-        >>> #init the data assembler
-        >>> database = DataGenerator(chain1='A',
-        >>>                          chain2='B',
-        >>>                          pdb_source=pdb_source,
-        >>>                          pdb_native=pdb_native,
-        >>>                          data_augmentation=None,
-        >>>                          compute_targets=['deeprank.targets.dockQ'],
-        >>>                          compute_features=['deeprank.features.AtomicFeature',
-        >>>                                            'deeprank.features.PSSM_IC',
-        >>>                                            'deeprank.features.BSA'],
-        >>>                          hdf5=h5file)
+            >>> from deeprank.generate import *
+            >>> # sources to assemble the data base
+            >>> pdb_source     = ['./1AK4/decoys/']
+            >>> pdb_native     = ['./1AK4/native/']
+            >>> pssm_source    = ['./1AK4/pssm_new/']
+            >>> h5file = '1ak4.hdf5'
+            >>>
+            >>> #init the data assembler
+            >>> database = DataGenerator(chain1='C',
+            >>>                          chain2='D',
+            >>>                          pdb_source=pdb_source,
+            >>>                          pdb_native=pdb_native,
+            >>>                          pssm_source=pssm_source,
+            >>>                          data_augmentation=None,
+            >>>                          compute_targets=['deeprank.targets.dockQ'],
+            >>>                          compute_features=['deeprank.features.AtomicFeature',
+            >>>                                            'deeprank.features.PSSM_IC',
+            >>>                                            'deeprank.features.BSA'],
+            >>>                          hdf5=h5file)
         """
 
         self.chain1 = chain1
@@ -192,10 +194,16 @@ def create_database(
         >>> # sources to assemble the data base
         >>> pdb_source     = ['./1AK4/decoys/']
         >>> pdb_native     = ['./1AK4/native/']
+        >>> pssm_source    = ['./1AK4/pssm_new/']
         >>> h5file = '1ak4.hdf5'
         >>>
         >>> #init the data assembler
-        >>> database = DataGenerator(pdb_source=pdb_source,pdb_native=pdb_native,data_augmentation=None,
+        >>> database = DataGenerator(chain1='C',
+        >>>                          chain2='D',
+        >>>                          pdb_source=pdb_source,
+        >>>                          pdb_native=pdb_native,
+        >>>                          pssm_source=pssm_source,
+        >>>                          data_augmentation=None,
         >>>                          compute_targets  = ['deeprank.targets.dockQ'],
         >>>                          compute_features = ['deeprank.features.AtomicFeature',
         >>>                                              'deeprank.features.PSSM_IC',

diff --git a/deeprank/generate/NormalizeData.py b/deeprank/generate/NormalizeData.py
@@ -25,8 +25,8 @@ def __init__(self, fname, shape=None):
 
         Example:
 
-        >>> norm = NormalizeData('1ak4.hdf5')
-        >>> norm.get()
+            >>> norm = NormalizeData('1ak4.hdf5')
+            >>> norm.get()
         """
         self.fname = fname
         self.parameters = {'features': {}, 'targets': {}}