Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Update to read decoy-specific pssm files #107

Merged
merged 9 commits into from
Oct 23, 2019
Merged
2 changes: 2 additions & 0 deletions deeprank/features/AtomicFeature.py
Original file line number Diff line number Diff line change
Expand Up @@ -630,6 +630,8 @@ def evaluate_pair_interaction(self, print_interactions=False,

# store in matrix form so that
# we don't have to recalculate for B
# here assumes that the chainID order is A,B...
# otherwise rowID will be different with the matrix index
indb_matrix = [i - natA for i in indsB]
matrix_elec[iA, indb_matrix] = ec
matrix_vdw[iA, indb_matrix] = evdw
Expand Down
6 changes: 5 additions & 1 deletion deeprank/features/FeatureClass.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def export_data_hdf5(self, featgrp):
# append
ds.append(feat)

ds = np.array(ds).astype('|S' + str(len(ds[0])))
if ds:
ds = np.array(ds).astype('|S' + str(len(ds[0])))
else:
ds = np.array(ds)


# create the dataset
if name + '_raw' in featgrp:
Expand Down
37 changes: 22 additions & 15 deletions deeprank/features/FullPSSM.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,16 +48,17 @@ def __init__(self, mol_name=None, pdb_file=None, pssm_path=None,
self.mol_name = mol_name
self.pdb_file = pdb_file
self.pssm_path = pssm_path
self.ref_mol_name = self.get_ref_mol_name(mol_name)
self.pssm_format = pssm_format
self.out_type = out_type.lower()

if isinstance(pdb_file, str) and mol_name is None:
self.mol_name = os.path.splitext(pdb_file)[0]
self.mol_name = os.path.basename(pdb_file).split('.')[0]

self.ref_mol_name = self.get_ref_mol_name(self.mol_name)

if self.out_type == 'pssmic' and not self.pssm_format == 'new':
raise ValueError(f"You must provide 'new' format PSSM files"
f" to generate PSSM IC features.")
f" to generate PSSM IC features for {self.mol_name}")

if self.out_type == 'pssmvalue':
# the residue order in res_names must be consistent with
Expand All @@ -74,6 +75,7 @@ def __init__(self, mol_name=None, pdb_file=None, pssm_path=None,
self.feature_data[name] = {}
self.feature_data_xyz[name] = {}


@staticmethod
def get_ref_mol_name(mol_name):
"""Get the bared mol name."""
Expand All @@ -83,7 +85,10 @@ def read_PSSM_data(self):
"""Read the PSSM data into a dictionary."""

names = os.listdir(self.pssm_path)
fnames = list(filter(lambda x: self.ref_mol_name in x, names))
fnames = list(filter(lambda x: self.mol_name in x, names))
# if decoy pssm files not exist, use reference pssm files
if not fnames:
fnames = list(filter(lambda x: self.ref_mol_name in x, names))
num_pssm_files = len(fnames)

if num_pssm_files == 0:
Expand Down Expand Up @@ -113,7 +118,7 @@ def read_PSSM_data(self):
for r in self.pssm_res_id]
self.pssm_data = np.array(raw_data)[:, 3:].astype(np.float)

# new format with 2 files (each chain has one file)
# new format with 2 files (each chain has one file)
# and aligned mapping and IC (i.e. the iScore format)
elif self.pssm_format == 'new':

Expand Down Expand Up @@ -182,28 +187,29 @@ def get_feature_value(self, cutoff=5.5):
total_res = len(ctc_res)
if total_res == 0:
raise ValueError(
f"No interface residue found with the cutoff {cutoff}Å."
f" Failed to calculate the features of FullPSSM/PSSM_IC")
f"{self.mol_name}: No interface residue found with the "
f"cutoff {cutoff}Å."
f" Failed to calculate the features of FullPSSM/PSSM_IC.")
elif total_res < 5: # this is an empirical value
warnings.warn(
f"Only {total_res} interface residues found with "
f"cutoff {cutoff}Å. Be careful with using the features "
f" FullPSSM/PSSM_IC")
f"{self.mol_name}: Only {total_res} interface residues found"
f" with cutoff {cutoff}Å. Be careful with"
f" using the features FullPSSM/PSSM_IC")

# check if interface residues have pssm values
ctc_res_set = set(ctc_res)
pssm_res_set = set(self.pssm.keys())
if len(ctc_res_set.intersection(pssm_res_set)) == 0:
raise ValueError(
f"All interface residues have no pssm values."
f"Check residue chainID/ID/name consistency "
f"{self.mol_name}: All interface residues have no pssm values."
f" Check residue chainID/ID/name consistency "
f"between PDB and PSSM files"
)
elif len(ctc_res_set.difference(pssm_res_set)) > 0:
ctc_res_wo_pssm = ctc_res_set.difference(pssm_res_set)
ctc_res_with_pssm = ctc_res_set - ctc_res_wo_pssm
warnings.warn(
f"The following interface residues have "
f"{self.mol_name}: The following interface residues have "
f" no pssm value:\n {ctc_res_wo_pssm}"
)
else:
Expand Down Expand Up @@ -267,12 +273,13 @@ def __compute_feature__(pdb_data, featgrp, featgrp_raw, out_type='pssmvalue'):
t0 = time()
base_path = os.path.dirname(os.path.dirname(os.path.dirname(
os.path.realpath(__file__))))
pdb_file = os.path.join(base_path, "test/1AK4/native/1AK4.pdb")
# pdb_file = os.path.join(base_path, "test/1AK4/native/1AK4.pdb")
pdb_file = os.path.join(base_path, "test/1AK4/decoys/1AK4_cm-itw_238w.pdb")
path = os.path.join(base_path, "test/1AK4/pssm_new")

# pssm = FullPSSM(mol_name='1AK4', pdb_file=pdb_file, pssm_path=path,
# pssm_format='new', out_type='pssmic')
pssm = FullPSSM(mol_name='1AK4', pdb_file=pdb_file, pssm_path=path,
pssm = FullPSSM(pdb_file=pdb_file, pssm_path=path,
pssm_format='new', out_type='pssmvalue')

# get the pssm smoothed sum score
Expand Down
24 changes: 9 additions & 15 deletions deeprank/learn/NeuralNet.py
Original file line number Diff line number Diff line change
Expand Up @@ -639,12 +639,18 @@ def _train(self, index_train, index_valid, index_test,
logger.info(f'\n: epoch {epoch:03d} / {nepoch:03d} {"-"*45}')
t0 = time.time()

# train the model
logger.info(f"\n\t=> train the model\n")
train_loss, self.data['train'] = self._epoch(
train_loader, train_model=True)
self.losses['train'].append(train_loss)
if self.save_classmetrics:
for i in self.metricnames:
self.classmetrics[i]['train'].append(self.data['train'][i])

# validate the model
if _valid_:

sys.stdout.flush()
logger.info(f"\n\t=> validate the model\n")

valid_loss, self.data['valid'] = self._epoch(
valid_loader, train_model=False)
self.losses['valid'].append(valid_loss)
Expand All @@ -655,9 +661,7 @@ def _train(self, index_train, index_valid, index_test,

# test the model
if _test_:
sys.stdout.flush()
logger.info(f"\n\t=> test the model\n")

test_loss, self.data['test'] = self._epoch(
test_loader, train_model=False)
self.losses['test'].append(test_loss)
Expand All @@ -666,16 +670,6 @@ def _train(self, index_train, index_valid, index_test,
self.classmetrics[i]['test'].append(
self.data['test'][i])

# train the model
sys.stdout.flush()
logger.info(f"\n\t=> train the model\n")
train_loss, self.data['train'] = self._epoch(
train_loader, train_model=True)
self.losses['train'].append(train_loss)
if self.save_classmetrics:
for i in self.metricnames:
self.classmetrics[i]['train'].append(self.data['train'][i])

# talk a bit about losse
logger.info(f'\n train loss : {train_loss:1.3e}')
if _valid_:
Expand Down
2 changes: 1 addition & 1 deletion deeprank/tools/pdb2sql.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,7 +655,7 @@ def get_contact_atoms(self,

# if no atoms were found
if len(index_contact_1) == 0:
raise ValueError(f"No contact atoms found with cutoff {cutoff}Å")
warnings.warn(f"No contact atoms found with cutoff {cutoff}Å")

# extend the list to entire residue
if extend_to_residue:
Expand Down
Loading