Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
Merge 0236e62 into 6375c88
Browse files Browse the repository at this point in the history
  • Loading branch information
DarioMarzella committed Jan 11, 2021
2 parents 6375c88 + 0236e62 commit d36f034
Show file tree
Hide file tree
Showing 97 changed files with 329 additions and 30,753 deletions.
9 changes: 4 additions & 5 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,23 +44,22 @@ test/*.pdb
*.ref_pairs

# docs
#docs/_build
#docs/_static
docs/_build
# docs/_static
#docs/_templates


# vscode setting
.vscode

.DS_Store
test/1AK4/atomic_features/test_1AK4_100w.dat
test/2OUL/atomic_features/test_2OUL_1.dat
test/atomic_pair_interaction.dat

# Mac OSX files
.DS_Store
*.DS_Store

# test coverage
htmlcov
coverage.xml
.coverage
.coverage
136 changes: 84 additions & 52 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,17 @@ The documentation of the module can be found on readthedocs :

Minimal information to install the module

Installation with pypi:

- clone the repository `git clone https://github.com/DeepRank/deeprank.git`
- go there `cd deeprank`
- install the module `pip install -e ./`
- go int the test dir `cd test`
- run the test suite `pytest`
- Install the module `pip install deeprank`

Installation from GitHub repository:

- Clone the repository `git clone https://github.com/DeepRank/deeprank.git`
- Go there `cd deeprank`
- Install the module `pip install -e ./`
- Go into the test dir `cd test`
- Run the test suite `pytest`


## 2 . Tutorial
Expand All @@ -39,46 +44,64 @@ We give here the tutorial like introduction to the DeepRank machinery. More info
The generation of the data require only require PDBs files of decoys and their native and the PSSM if needed. All the features/targets and mapped features onto grid points will be auomatically calculated and store in a HDF5 file.

```python
from deeprank.generate import *
from mpi4py import MPI

comm = MPI.COMM_WORLD

# adress of the BM4 folder
BM4 = '/path/to/BM4/data/'

# sources to assemble the data base
pdb_source = ['./1AK4/decoys/']
pdb_native = ['./1AK4/native/']
pssm_source = ['./1AK4/pssm_new/']

# output file
h5file = './1ak4.hdf5'

#init the data assembler
database = DataGenerator(pdb_source=pdb_source,
pdb_native=pdb_native,
pssm_source=pssm_source,
data_augmentation = 1,
compute_targets = ['deeprank.targets.dockQ','deeprank.targets.binary_class'],
compute_features = ['deeprank.features.AtomicFeature',
'deeprank.features.FullPSSM',
'deeprank.features.PSSM_IC',
'deeprank.features.BSA',
'deeprank.features.ResidueDensity'],
hdf5=h5file,mpi_comm=comm)

#create new files
database.create_database(prog_bar=True)

# map the features
grid_info = {
'number_of_points' : [30,30,30],
'resolution' : [1.,1.,1.],
'atomic_densities' : {'CA':3.5,'N':3.5,'O':3.5,'C':3.5},
}

database.map_features(grid_info,try_sparse=True,time=False,prog_bar=True)
from deeprank.generate import *
from mpi4py import MPI

comm = MPI.COMM_WORLD

# name of the hdf5 to generate
h5file = './hdf5/1ak4.hdf5'

# for each hdf5 file where to find the pdbs
pdb_source = ['../test/1AK4/decoys/']


# where to find the native conformations
# pdb_native is only used to calculate i-RMSD, dockQ and so on.
# The native pdb files will not be saved in the hdf5 file
pdb_native = ['../test/1AK4/native/']


# where to find the pssm
pssm_source = '../test/1AK4/pssm_new/'


# initialize the database
database = DataGenerator(
chain1='C', chain2='D',
pdb_source=pdb_source,
pdb_native=pdb_native,
pssm_source=pssm_source,
data_augmentation=0,
compute_targets=[
'deeprank.targets.dockQ',
'deeprank.targets.binary_class'],
compute_features=[
'deeprank.features.AtomicFeature',
'deeprank.features.FullPSSM',
'deeprank.features.PSSM_IC',
'deeprank.features.BSA',
'deeprank.features.ResidueDensity'],
hdf5=h5file,
mpi_comm=comm)


# create the database
# compute features/targets for all complexes
print('{:25s}'.format('Create new database') + database.hdf5)
database.create_database(prog_bar=True)


# define the 3D grid
grid_info = {
'number_of_points' : [30,30,30],
'resolution' : [1.,1.,1.],
'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
}

# Map the features
database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)

```

This script can be exectuted using for example 4 MPI processes with the command:
Expand Down Expand Up @@ -116,25 +139,34 @@ The HDF5 files generated above can be used as input for deep learning experiment

```python
from deeprank.learn import *
from deeprank.learn.model3d import cnn as cnn3d
from deeprank.learn.model3d import cnn_reg
import torch.optim as optim
import numpy as np

# input database
database = '1ak4.hdf5'

# output directory
out = './my_DL_test/'

# declare the dataset instance
data_set = DataSet(database,
grid_shape=(30,30,30),
select_feature={'AtomicDensities_ind' : 'all',
'Feature_ind' : ['coulomb','vdwaals','charge','pssm'] },
chain1='C',
chain2='D',
grid_info={
'number_of_points': (10, 10, 10),
'resolution': (3, 3, 3)},
select_feature={
'AtomicDensities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
'Features': ['coulomb', 'vdwaals', 'charge', 'PSSM_*']},
select_target='DOCKQ',
normalize_features = True, normalize_targets=True,
pair_chain_feature=np.add,
dict_filter={'IRMSD':'<4. or >10.'})
dict_filter={'DOCKQ':'<1'})


# create the networkt
model = NeuralNet(data_set,cnn3d,model_type='3d',task='reg',
# create the network
model = NeuralNet(data_set,cnn_reg,model_type='3d',
cuda=False,plot=True,outdir=out)

# change the optimizer (optional)
Expand Down
46 changes: 23 additions & 23 deletions deeprank/features/AtomicFeature.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,28 +44,28 @@ def __init__(self, pdbfile, chain1='A', chain2='B', param_charge=None,
verbose (bool): print or not.
Examples:
>>> pdb = '1AK4_100w.pdb'
>>>
>>> # get the force field included in deeprank
>>> # if another FF has been used to compute the ref
>>> # change also this path to the correct one
>>> FF = pkg_resources.resource_filename(
>>> 'deeprank.features','') + '/forcefield/'
>>>
>>> # declare the feature calculator instance
>>> atfeat = AtomicFeature(pdb,
>>> param_charge = FF + 'protein-allhdg5-4_new.top',
>>> param_vdw = FF + 'protein-allhdg5-4_new.param',
>>> patch_file = FF + 'patch.top')
>>>
>>> # assign parameters
>>> atfeat.assign_parameters()
>>>
>>> # only compute the pair interactions here
>>> atfeat.evaluate_pair_interaction(save_interactions=test_name)
>>>
>>> # close the db
>>> atfeat.sqldb._close()
>>> pdb = '1AK4_100w.pdb'
>>>
>>> # get the force field included in deeprank
>>> # if another FF has been used to compute the ref
>>> # change also this path to the correct one
>>> FF = pkg_resources.resource_filename(
>>> 'deeprank.features','') + '/forcefield/'
>>>
>>> # declare the feature calculator instance
>>> atfeat = AtomicFeature(pdb,
>>> param_charge = FF + 'protein-allhdg5-4_new.top',
>>> param_vdw = FF + 'protein-allhdg5-4_new.param',
>>> patch_file = FF + 'patch.top')
>>>
>>> # assign parameters
>>> atfeat.assign_parameters()
>>>
>>> # only compute the pair interactions here
>>> atfeat.evaluate_pair_interaction(save_interactions=test_name)
>>>
>>> # close the db
>>> atfeat.sqldb._close()
"""

super().__init__("Atomic")
Expand Down Expand Up @@ -975,7 +975,7 @@ def __compute_feature__(pdb_data, featgrp, featgrp_raw, chain1, chain2):
atfeat.assign_parameters()
atfeat.evaluate_pair_interaction()
atfeat.evaluate_charges(extend_contact_to_residue=True)
atfeat.sqldb.close()
atfeat.sqldb._close()

# export in the hdf5 file
pprint(atfeat.feature_data)
Expand Down
12 changes: 6 additions & 6 deletions deeprank/features/BSA.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,16 @@ def __init__(self, pdb_data, chain1='A', chain2='B'):
as a separate module. They can be installed using
>>> pip install freesasa
Args :
Args:
pdb_data (list(byte) or str): pdb data or pdb filename
chain1 (str, optional): name of the first chain
chain2 (str, optional): name of the second chain
Example :
>>> bsa = BSA('1AK4.pdb')
>>> bsa.get_structure()
>>> bsa.get_contact_residue_sasa()
>>> bsa.sql._close()
Example:
>>> bsa = BSA('1AK4.pdb')
>>> bsa.get_structure()
>>> bsa.get_contact_residue_sasa()
>>> bsa.sql._close()
"""
self.pdb_data = pdb_data
self.sql = pdb2sql.interface(pdb_data)
Expand Down
8 changes: 4 additions & 4 deletions deeprank/features/ResidueDensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@ def __init__(self, pdb_data, chain1='A', chain2='B'):
chain1 (str): First chain ID. Defaults to 'A'
chain2 (str): Second chain ID. Defaults to 'B'
Example :
>>> rcd = ResidueDensity('1EWY_100w.pdb')
>>> rcd.get(cutoff=5.5)
>>> rcd.extract_features()
Example:
>>> rcd = ResidueDensity('1EWY_100w.pdb')
>>> rcd.get(cutoff=5.5)
>>> rcd.extract_features()
"""

self.pdb_data = pdb_data
Expand Down
44 changes: 26 additions & 18 deletions deeprank/generate/DataGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,23 +67,25 @@ def __init__(self, chain1, chain2,
Example :
>>> from deeprank.generate import *
>>> # sources to assemble the data base
>>> pdb_source = ['./1AK4/decoys/']
>>> pdb_native = ['./1AK4/native/']
>>> h5file = '1ak4.hdf5'
>>>
>>> #init the data assembler
>>> database = DataGenerator(chain1='A',
>>> chain2='B',
>>> pdb_source=pdb_source,
>>> pdb_native=pdb_native,
>>> data_augmentation=None,
>>> compute_targets=['deeprank.targets.dockQ'],
>>> compute_features=['deeprank.features.AtomicFeature',
>>> 'deeprank.features.PSSM_IC',
>>> 'deeprank.features.BSA'],
>>> hdf5=h5file)
>>> from deeprank.generate import *
>>> # sources to assemble the data base
>>> pdb_source = ['./1AK4/decoys/']
>>> pdb_native = ['./1AK4/native/']
>>> pssm_source = ['./1AK4/pssm_new/']
>>> h5file = '1ak4.hdf5'
>>>
>>> #init the data assembler
>>> database = DataGenerator(chain1='C',
>>> chain2='D',
>>> pdb_source=pdb_source,
>>> pdb_native=pdb_native,
>>> pssm_source=pssm_source,
>>> data_augmentation=None,
>>> compute_targets=['deeprank.targets.dockQ'],
>>> compute_features=['deeprank.features.AtomicFeature',
>>> 'deeprank.features.PSSM_IC',
>>> 'deeprank.features.BSA'],
>>> hdf5=h5file)
"""

self.chain1 = chain1
Expand Down Expand Up @@ -192,10 +194,16 @@ def create_database(
>>> # sources to assemble the data base
>>> pdb_source = ['./1AK4/decoys/']
>>> pdb_native = ['./1AK4/native/']
>>> pssm_source = ['./1AK4/pssm_new/']
>>> h5file = '1ak4.hdf5'
>>>
>>> #init the data assembler
>>> database = DataGenerator(pdb_source=pdb_source,pdb_native=pdb_native,data_augmentation=None,
>>> database = DataGenerator(chain1='C',
>>> chain2='D',
>>> pdb_source=pdb_source,
>>> pdb_native=pdb_native,
>>> pssm_source=pssm_source,
>>> data_augmentation=None,
>>> compute_targets = ['deeprank.targets.dockQ'],
>>> compute_features = ['deeprank.features.AtomicFeature',
>>> 'deeprank.features.PSSM_IC',
Expand Down
4 changes: 2 additions & 2 deletions deeprank/generate/NormalizeData.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def __init__(self, fname, shape=None):
Example:
>>> norm = NormalizeData('1ak4.hdf5')
>>> norm.get()
>>> norm = NormalizeData('1ak4.hdf5')
>>> norm.get()
"""
self.fname = fname
self.parameters = {'features': {}, 'targets': {}}
Expand Down
Loading

0 comments on commit d36f034

Please sign in to comment.