Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
Merge 70041ec into 5b3cd21
Browse files Browse the repository at this point in the history
  • Loading branch information
manonreau committed Dec 22, 2021
2 parents 5b3cd21 + 70041ec commit 03fa703
Show file tree
Hide file tree
Showing 6 changed files with 105 additions and 51 deletions.
6 changes: 4 additions & 2 deletions deeprank/learn/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -1168,7 +1168,8 @@ def load_one_molecule(self, fname, mol=None):
try:
target = mol_data.get('targets/' + self.select_target)[()]
except Exception:
logger.exception(f'No target value for: {fname} - not required for the test set')
target = None
logger.warning(f'No target value for: {fname} - not required for the test set')

# close
fh5.close()
Expand Down Expand Up @@ -1217,7 +1218,8 @@ def map_one_molecule(self, fname, mol=None, angle=None, axis=None):
try:
target = mol_data.get('targets/' + self.select_target)[()]
except Exception:
logger.exception(f'No target value for: {fname} - not required for the test set')
target = None
logger.warning(f'No target value for: {fname} - not required for the test set')

# close
fh5.close()
Expand Down
41 changes: 26 additions & 15 deletions deeprank/learn/NeuralNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,9 +244,10 @@ def __init__(self, data_set, model,
if self.state['cuda']:
for paramname in list(self.state['state_dict'].keys()):
paramname_new = paramname.lstrip('module.')
self.state['state_dict'][paramname_new] = \
self.state['state_dict'][paramname]
del self.state['state_dict'][paramname]
if paramname != paramname_new:
self.state['state_dict'][paramname_new] = \
self.state['state_dict'][paramname]
del self.state['state_dict'][paramname]
self.load_model_params()

# multi-gpu
Expand Down Expand Up @@ -397,12 +398,16 @@ def convertSeconds2Days(time):
seconds = time
return '%02d-%02d:%02d:%02d' % (day, hour, minutes, seconds)

def test(self, hdf5='test_data.hdf5', hit_cutoff=None):
def test(self, hdf5='test_data.hdf5', hit_cutoff=None, has_target=False):
"""Test a predefined model on a new dataset.
Args:
hdf5 (str, optional): hdf5 file to store the test results
hit_cutoff (float, optional): the cutoff used to define hit by
comparing with docking models' target value, e.g. IRMSD value
has_target(bool, optional): specify the presence (True) or absence (False) of
target values in the test set. No metrics can be computed if False.
Examples:
>>> # adress of the database
>>> database = '1ak4.hdf5'
Expand All @@ -428,11 +433,11 @@ def test(self, hdf5='test_data.hdf5', hit_cutoff=None):
# define the target value threshold to compute the hits if save_hitrate is True
if self.save_hitrate and hit_cutoff is not None:
self.hit_cutoff = hit_cutoff
logger.info(f'Use hit cutoff {self.hit_cutoff}')
logger.info(f'Use hit cutoff {self.hit_cutoff}')

# do test
self.data = {}
_, self.data['test'] = self._epoch(loader, train_model=False)
_, self.data['test'] = self._epoch(loader, train_model=False, has_target=has_target)

# plot results
if self.plot is True :
Expand Down Expand Up @@ -494,7 +499,10 @@ def load_nn_params(self):
"""Get NeuralNet parameters from a saved model."""
self.task = self.state['task']
self.criterion = self.state['criterion']
self.hit_cutoff = self.state['hit_cutoff']
try:
self.hit_cutoff = self.state['hit_cutoff']
except Exception:
logger.warning(f'No "hit_cutoff" found in {self.pretrained_model}. Please set it in function "test()" when doing benchmark"')

def load_data_params(self):
"""Get dataset parameters from a saved model."""
Expand Down Expand Up @@ -766,7 +774,8 @@ def _train(self, index_train, index_valid, index_test,
return torch.cat([param.data.view(-1)
for param in self.net.parameters()], 0)

def _epoch(self, data_loader, train_model):

def _epoch(self, data_loader, train_model, has_target=True):
"""Perform one single epoch iteration over a data loader.
Args:
Expand Down Expand Up @@ -821,9 +830,10 @@ def _epoch(self, data_loader, train_model):
if self.task == 'class':
targets = targets.view(-1)

# evaluate loss
loss = self.criterion(outputs, targets)
running_loss += loss.data.item() # pytorch1 compatible
if has_target:
# evaluate loss
loss = self.criterion(outputs, targets)
running_loss += loss.data.item() # pytorch1 compatible
n += len(inputs)

# zero + backward + step
Expand Down Expand Up @@ -858,12 +868,12 @@ def _epoch(self, data_loader, train_model):
data['mol'] = np.array(data['mol'], dtype=object)

# get the relevance of the ranking
if self.save_hitrate:
if self.save_hitrate and has_target:
logger.info(f'Use hit cutoff {self.hit_cutoff}')
data['hit'] = self._get_relevance(data, self.hit_cutoff)

# get classification metrics
if self.save_classmetrics:
if self.save_classmetrics and has_target:
for i in self.metricnames:
data[i] = self._get_classmetrics(data, i)

Expand All @@ -875,6 +885,7 @@ def _epoch(self, data_loader, train_model):

return running_loss, data


def _get_variables(self, inputs, targets):
# xue: why not put this step to DataSet.py?
"""Convert the feature/target in torch.Variables.
Expand All @@ -896,7 +907,7 @@ def _get_variables(self, inputs, targets):
inputs = inputs.cuda(non_blocking=True)
targets = targets.cuda(non_blocking=True)

# get the varialbe as float by default
# get the variable as float by default
inputs, targets = Variable(inputs).float(), Variable(targets).float()

# change the targets to long for classification
Expand Down
Binary file modified paper_pretrained_models/3DeepFace/best_model.pt
Binary file not shown.
63 changes: 34 additions & 29 deletions paper_pretrained_models/3DeepFace/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,34 +10,39 @@
# to set your own architecture
from arch_001_02 import cnn_class as cnn3d_class

################################################################################
# input and output settings
################################################################################
pdb_source = '../../test/1AK4/decoys'
pssm_source = '../../test/1AK4/pssm_new'

database = DataGenerator(pdb_source=pdb_source,
chain1='A', chain2='B',
pssm_source=pssm_source,
compute_features = ['deeprank.features.AtomicFeature',
'deeprank.features.FullPSSM',
'deeprank.features.PSSM_IC',
'deeprank.features.BSA',
'deeprank.features.ResidueDensity'],
data_augmentation = 30, # rotate complexes
hdf5=output.hdf5)

# compute features/targets, and write to hdf5 file
print('{:25s}'.format('Create new database') + database.hdf5)
database.create_database()

# map the features
grid_info = {
'number_of_points': [10, 10, 10],
'resolution': [3., 3., 3.],
'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
}

# map the features to the 3D grid
print('{:25s}'.format('Map features in database') + database.hdf5)
database.map_features(grid_info, try_sparse=True, time=False, prog_bar=True)

# You need to add path for the dataset
database = glob.glob('test/*hdf5')
output = open('errors.txt', 'w')

for complx in database :
# You need to set it as your output path
name=complx.split('/')[-1].split('.')[0]

if os.path.exists('./prediction/{}'.format(name)) :
continue

else:
os.mkdir('./prediction/{}'.format(name))

outpath = './prediction/{}'.format(name)

################################################################################
# Start the training
################################################################################
try:
model = NeuralNet(complx,cnn3d_class,pretrained_model='best_model.pt', outdir=outpath)
model.test()

except:
output.write('prediction on {} failed'.format(name))

output.close()
database = glob.glob('*hdf5')

model = NeuralNet(database,cnn3d_class,pretrained_model='best_model.pt', outdir=./)

# test the pre-trained model on new data
model.test()
Binary file not shown.
46 changes: 41 additions & 5 deletions paper_pretrained_models/docking_models_scoring/test.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,48 @@
import torch
from mpi4py import MPI
import sys
import os
import re
import glob
from time import time

from deeprank.generate import *
from deeprank.learn import NeuralNet
from model_280619 import cnn_class
import glob
model_data = 'best_train_model.pt'

### Change the path to point to your own graphs
database = glob.glob('./test/*.hdf5')
comm = MPI.COMM_WORLD
pdb_source = '../../test/1AK4/decoys/'
pssm_source = '../../test/1AK4/pssm_new/'

database = DataGenerator(pdb_source= pdb_source, #path to the models
pssm_source= pssm_source, #path to the pssm data
data_augmentation = None,
chain1='C', chain2='D',
compute_features = ['deeprank.features.AtomicFeature', 'deeprank.features.FullPSSM',
'deeprank.features.PSSM_IC', 'deeprank.features.BSA', 'deeprank.features.ResidueDensity'],
hdf5='output.hdf5',mpi_comm=comm)


# compute features/targets, and write to hdf5 file
print('{:25s}'.format('Create new database') + database.hdf5)
database.create_database(prog_bar=True)

# define the 3D grid
grid_info = {
'number_of_points' : [30,30,30],
'resolution' : [1.,1.,1.],
'atomic_densities': {'C': 1.7, 'N': 1.55, 'O': 1.52, 'S': 1.8},
}

# map the features to the 3D grid
print('{:25s}'.format('Map features in database') + database.hdf5)
database.map_features(grid_info,try_sparse=True, time=False, prog_bar=True)

# select the pre-trained model
model_data = 'best_train_model.pt'
database = glob.glob('*.hdf5')

model = NeuralNet(database, cnn_class,
pretrained_model=model_data, save_hitrate=False)

# test the pre-trained model on new data
model.test()

0 comments on commit 03fa703

Please sign in to comment.