Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
Merge pull request #143 from DeepRank/tidy_examples
Browse files Browse the repository at this point in the history
Tidy examples
  • Loading branch information
NicoRenaud authored Apr 7, 2020
2 parents b7d7aae + 719940c commit 8ee54ac
Show file tree
Hide file tree
Showing 6 changed files with 6,047 additions and 5,846 deletions.
4 changes: 4 additions & 0 deletions deeprank/utils/plot_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,7 @@ def prepare_df(deeprank_h5FL, HS_h5FL, epoch, scenario):
label caseID modelID target sourceFL DR irmsd HS
Test 1AVX 1AVX_ranair-it0_5286 0 /home/lixue/DBs/BM5-haddock24/hdf5/000_1AVX.hdf5 0.503823 25.189108 6.980802
Test 1AVX 1AVX_ti5-itw_354w 1 /home/lixue/DBs/BM5-haddock24/hdf5/000_1AVX.hdf5 0.502845 3.668682 -95.158100
'''

print ("=== Prepare the df ===")
Expand Down Expand Up @@ -762,6 +763,9 @@ def main(HS_h5FL='/projects/0/deeprank/BM5/docked_models/stats.h5'): # on cartes
df.to_csv(rawdataFL, sep = '\t', index = False, float_format = '%.5f')
print(f'{rawdataFL} generated.\n')

#rawdataFL=f'{scenario}.rawdata.tsv'
#df = pd.read_csv(rawdataFL, sep='\t')

# -- report the number of hits for train/valid/test
hit_statistics(df)

Expand Down
67 changes: 67 additions & 0 deletions example/generate_dataset_alignH5.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
from deeprank.generate import *
from mpi4py import MPI

comm = MPI.COMM_WORLD

# name of the hdf5 to generate
h5file = './hdf5/1ak4.hdf5'

# for each hdf5 file where to find the pdbs
pdb_source = '../test/1AK4/decoys/'

# where to find the native conformations
# pdb_native is only used to calculate i-RMSD, dockQ and so on.
# The native pdb files will not be saved in the hdf5 file
pdb_native = '../test/1AK4/native/'


# where to find the pssm
pssm_source = '../test/1AK4/pssm_new/'

# # initialize the database
# database = DataGenerator(
# pdb_source=pdb_source,
# pdb_native=pdb_native,
# pssm_source=pssm_source,
# data_augmentation=2,
# compute_targets=[
# 'deeprank.targets.dockQ',
# 'deeprank.targets.binary_class'],
# compute_features=[
# 'deeprank.features.AtomicFeature',
# 'deeprank.features.FullPSSM',
# 'deeprank.features.PSSM_IC',
# 'deeprank.features.BSA',
# 'deeprank.features.ResidueDensity'],
# hdf5=h5file,
# mpi_comm=comm)


# # create the database
# # compute features/targets for all complexes
# print('{:25s}'.format('Create new database') + database.hdf5)
# database.create_database(prog_bar=True)

newdb = DataGenerator(hdf5=h5file)
newdb.realign_complexes(align={'axis':'z'})


# define the 3D grid
# grid_info = {
# 'number_of_points' : [30,30,30],
# 'resolution' : [1.,1.,1.],
# 'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
# }

# generate the grid
#print('{:25s}'.format('Generate the grid') + database.hdf5)
#database.precompute_grid(grid_info,try_sparse=True, time=False, prog_bar=True)


# print('{:25s}'.format('Map features in database') + database.hdf5)
# database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)

# # get the normalization of the features
# print('{:25s}'.format('Normalization') + database.hdf5)
# norm = NormalizeData(database.hdf5)
# norm.get()
62 changes: 62 additions & 0 deletions example/generate_dataset_alignPDB.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
from deeprank.generate import *
from mpi4py import MPI

comm = MPI.COMM_WORLD

# name of the hdf5 to generate
h5file = './hdf5/1ak4_xue.hdf5'

# for each hdf5 file where to find the pdbs
pdb_source = '../test/1AK4/decoys/'

# where to find the native conformations
# pdb_native is only used to calculate i-RMSD, dockQ and so on.
# The native pdb files will not be saved in the hdf5 file
pdb_native = '../test/1AK4/native/'

# where to find the pssm
pssm_source = '../test/1AK4/pssm_new/'

# initialize the database
database = DataGenerator(
pdb_source=pdb_source,
pdb_native=pdb_native,
pssm_source=pssm_source,
align={"axis":'x','export':False},
data_augmentation=None,
compute_targets=[
'deeprank.targets.binary_class'],
compute_features=[
'deeprank.features.AtomicFeature',
'deeprank.features.FullPSSM',
'deeprank.features.PSSM_IC',
'deeprank.features.BSA',
'deeprank.features.ResidueDensity'],
hdf5=h5file,
mpi_comm=comm)

# compute the features/tagets and write to hdf5 file
print('{:25s}'.format('Create new database') + database.hdf5)
database.create_database(prog_bar=True)


# define the 3D grid
grid_info = {
'number_of_points' : [30,30,30],
'resolution' : [1.,1.,1.],
'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
}

# generate the grid
#print('{:25s}'.format('Generate the grid') + database.hdf5)
#database.precompute_grid(grid_info,try_sparse=True, time=False, prog_bar=True)

print('{:25s}'.format('Map features in database') + database.hdf5)
database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)

# get the normalization of the features.
# This step can also be done the DataSet class (see learn.py)
#
# print('{:25s}'.format('Normalization') + database.hdf5)
# norm = NormalizeData(database.hdf5)
# norm.get()
68 changes: 68 additions & 0 deletions example/generate_dataset_noalign.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from deeprank.generate import *
from mpi4py import MPI

comm = MPI.COMM_WORLD

# name of the hdf5 to generate
h5file = './hdf5/1ak4.hdf5'

# for each hdf5 file where to find the pdbs
pdb_source = '../test/1AK4/decoys/'

# where to find the native conformations
# pdb_native is only used to calculate i-RMSD, dockQ and so on.
# The native pdb files will not be saved in the hdf5 file
pdb_native = '../test/1AK4/native/'


# where to find the pssm
pssm_source = '../test/1AK4/pssm_new/'

# initialize the database
database = DataGenerator(
pdb_source=pdb_source,
pdb_native=pdb_native,
pssm_source=pssm_source,
data_augmentation=2,
compute_targets=[
'deeprank.targets.dockQ',
'deeprank.targets.binary_class'],
compute_features=[
'deeprank.features.AtomicFeature',
'deeprank.features.FullPSSM',
'deeprank.features.PSSM_IC',
'deeprank.features.BSA',
'deeprank.features.ResidueDensity'],
hdf5=h5file,
mpi_comm=comm)


# create the database
# compute features/targets for all complexes
print('{:25s}'.format('Create new database') + database.hdf5)
database.create_database(prog_bar=True)


#define the 3D grid
grid_info = {
'number_of_points' : [30,30,30],
'resolution' : [1.,1.,1.],
'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
}

# generate the grid and write the grid coordinates to hdf5
# This step can be skipped and is covered by map_features() below.
#
# print('{:25s}'.format('Generate the grid') + database.hdf5)
# database.precompute_grid(grid_info,try_sparse=True, time=False, prog_bar=True)

# map features to the 3D grid
print('{:25s}'.format('Map features in database') + database.hdf5)
database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)

# get the normalization of the features.
# This step can also be done the DataSet class (see learn.py)
#
# print('{:25s}'.format('Normalization') + database.hdf5)
# norm = NormalizeData(database.hdf5)
# norm.get()
Loading

0 comments on commit 8ee54ac

Please sign in to comment.