Merge pull request #143 from DeepRank/tidy_examples

Tidy examples
DeepRank · Apr 7, 2020 · 8ee54ac · 8ee54ac
2 parents b7d7aae + 719940c
commit 8ee54ac
Show file tree

Hide file tree

Showing 6 changed files with 6,047 additions and 5,846 deletions.
diff --git a/deeprank/utils/plot_utils.py b/deeprank/utils/plot_utils.py
@@ -463,6 +463,7 @@ def prepare_df(deeprank_h5FL, HS_h5FL, epoch, scenario):
         label caseID               modelID target                                          sourceFL        DR      irmsd         HS
         Test   1AVX  1AVX_ranair-it0_5286      0  /home/lixue/DBs/BM5-haddock24/hdf5/000_1AVX.hdf5  0.503823  25.189108   6.980802
         Test   1AVX     1AVX_ti5-itw_354w      1  /home/lixue/DBs/BM5-haddock24/hdf5/000_1AVX.hdf5  0.502845   3.668682 -95.158100
+
     '''
 
     print ("=== Prepare the df ===")
@@ -762,6 +763,9 @@ def main(HS_h5FL='/projects/0/deeprank/BM5/docked_models/stats.h5'): # on cartes
     df.to_csv(rawdataFL, sep = '\t', index = False, float_format = '%.5f')
     print(f'{rawdataFL} generated.\n')
 
+    #rawdataFL=f'{scenario}.rawdata.tsv'
+    #df = pd.read_csv(rawdataFL, sep='\t')
+
     # -- report the number of hits for train/valid/test
     hit_statistics(df)
 

diff --git a/example/generate_dataset_alignH5.py b/example/generate_dataset_alignH5.py
@@ -0,0 +1,67 @@
+from deeprank.generate import *
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+
+# name of the hdf5 to generate
+h5file = './hdf5/1ak4.hdf5'
+
+# for each hdf5 file where to find the pdbs
+pdb_source = '../test/1AK4/decoys/'
+
+# where to find the native conformations
+# pdb_native is only used to calculate i-RMSD, dockQ and so on.
+# The native pdb files will not be saved in the hdf5 file
+pdb_native = '../test/1AK4/native/'
+
+
+# where to find the pssm
+pssm_source = '../test/1AK4/pssm_new/'
+
+# # initialize the database
+# database = DataGenerator(
+#     pdb_source=pdb_source,
+#     pdb_native=pdb_native,
+#     pssm_source=pssm_source,
+#     data_augmentation=2,
+#     compute_targets=[
+#         'deeprank.targets.dockQ',
+#         'deeprank.targets.binary_class'],
+#     compute_features=[
+#         'deeprank.features.AtomicFeature',
+#         'deeprank.features.FullPSSM',
+#         'deeprank.features.PSSM_IC',
+#         'deeprank.features.BSA',
+#         'deeprank.features.ResidueDensity'],
+#     hdf5=h5file,
+#     mpi_comm=comm)
+
+
+# # create the database
+# # compute features/targets for all complexes
+# print('{:25s}'.format('Create new database') + database.hdf5)
+# database.create_database(prog_bar=True)
+
+newdb = DataGenerator(hdf5=h5file)
+newdb.realign_complexes(align={'axis':'z'})
+
+
+# define the 3D grid
+# grid_info = {
+#   'number_of_points' : [30,30,30],
+#   'resolution' : [1.,1.,1.],
+#   'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
+# }
+
+# generate the grid
+#print('{:25s}'.format('Generate the grid') + database.hdf5)
+#database.precompute_grid(grid_info,try_sparse=True, time=False, prog_bar=True)
+
+
+# print('{:25s}'.format('Map features in database') + database.hdf5)
+# database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)
+
+# # get the normalization of the features
+# print('{:25s}'.format('Normalization') + database.hdf5)
+# norm = NormalizeData(database.hdf5)
+# norm.get()
diff --git a/example/generate_dataset_alignPDB.py b/example/generate_dataset_alignPDB.py
@@ -0,0 +1,62 @@
+from deeprank.generate import *
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+
+# name of the hdf5 to generate
+h5file = './hdf5/1ak4_xue.hdf5'
+
+# for each hdf5 file where to find the pdbs
+pdb_source = '../test/1AK4/decoys/'
+
+# where to find the native conformations
+# pdb_native is only used to calculate i-RMSD, dockQ and so on.
+# The native pdb files will not be saved in the hdf5 file
+pdb_native = '../test/1AK4/native/'
+
+# where to find the pssm
+pssm_source = '../test/1AK4/pssm_new/'
+
+# initialize the database
+database = DataGenerator(
+    pdb_source=pdb_source,
+    pdb_native=pdb_native,
+    pssm_source=pssm_source,
+    align={"axis":'x','export':False},
+    data_augmentation=None,
+    compute_targets=[
+        'deeprank.targets.binary_class'],
+    compute_features=[
+        'deeprank.features.AtomicFeature',
+        'deeprank.features.FullPSSM',
+        'deeprank.features.PSSM_IC',
+        'deeprank.features.BSA',
+        'deeprank.features.ResidueDensity'],
+    hdf5=h5file,
+    mpi_comm=comm)
+
+# compute the features/tagets and write to hdf5 file
+print('{:25s}'.format('Create new database') + database.hdf5)
+database.create_database(prog_bar=True)
+
+
+# define the 3D grid
+grid_info = {
+  'number_of_points' : [30,30,30],
+  'resolution' : [1.,1.,1.],
+  'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
+}
+
+# generate the grid
+#print('{:25s}'.format('Generate the grid') + database.hdf5)
+#database.precompute_grid(grid_info,try_sparse=True, time=False, prog_bar=True)
+
+print('{:25s}'.format('Map features in database') + database.hdf5)
+database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)
+
+# get the normalization of the features.
+# This step can also be done the DataSet class (see learn.py)
+#
+# print('{:25s}'.format('Normalization') + database.hdf5)
+# norm = NormalizeData(database.hdf5)
+# norm.get()
diff --git a/example/generate_dataset_noalign.py b/example/generate_dataset_noalign.py
@@ -0,0 +1,68 @@
+from deeprank.generate import *
+from mpi4py import MPI
+
+comm = MPI.COMM_WORLD
+
+# name of the hdf5 to generate
+h5file = './hdf5/1ak4.hdf5'
+
+# for each hdf5 file where to find the pdbs
+pdb_source = '../test/1AK4/decoys/'
+
+# where to find the native conformations
+# pdb_native is only used to calculate i-RMSD, dockQ and so on.
+# The native pdb files will not be saved in the hdf5 file
+pdb_native = '../test/1AK4/native/'
+
+
+# where to find the pssm
+pssm_source = '../test/1AK4/pssm_new/'
+
+# initialize the database
+database = DataGenerator(
+    pdb_source=pdb_source,
+    pdb_native=pdb_native,
+    pssm_source=pssm_source,
+    data_augmentation=2,
+    compute_targets=[
+        'deeprank.targets.dockQ',
+        'deeprank.targets.binary_class'],
+    compute_features=[
+        'deeprank.features.AtomicFeature',
+        'deeprank.features.FullPSSM',
+        'deeprank.features.PSSM_IC',
+        'deeprank.features.BSA',
+        'deeprank.features.ResidueDensity'],
+    hdf5=h5file,
+    mpi_comm=comm)
+
+
+# create the database
+# compute features/targets for all complexes
+print('{:25s}'.format('Create new database') + database.hdf5)
+database.create_database(prog_bar=True)
+
+
+#define the 3D grid
+grid_info = {
+  'number_of_points' : [30,30,30],
+  'resolution' : [1.,1.,1.],
+  'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
+}
+
+# generate the grid and write the grid coordinates to hdf5
+# This step can be skipped and is covered by map_features() below.
+#
+# print('{:25s}'.format('Generate the grid') + database.hdf5)
+# database.precompute_grid(grid_info,try_sparse=True, time=False, prog_bar=True)
+
+# map features to the 3D grid
+print('{:25s}'.format('Map features in database') + database.hdf5)
+database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)
+
+# get the normalization of the features.
+# This step can also be done the DataSet class (see learn.py)
+#
+# print('{:25s}'.format('Normalization') + database.hdf5)
+# norm = NormalizeData(database.hdf5)
+# norm.get()