Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
Merge 1c35cb7 into 8ee54ac
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoRenaud committed Apr 10, 2020
2 parents 8ee54ac + 1c35cb7 commit 95f4cbb
Show file tree
Hide file tree
Showing 6 changed files with 104 additions and 51 deletions.
10 changes: 3 additions & 7 deletions deeprank/features/BSA.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,13 +117,9 @@ def get_contact_residue_sasa(self, cutoff=5.5):
# define the xyz key : (chain,x,y,z)
chain = {'A': 0, 'B': 1}[res[0]]

atcenter = 'CB'
if res[2] == 'GLY':
atcenter = 'CA'
xyz = self.sql.get(
'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
# xyz = np.mean(self.sql.get('x,y,z',resSeq=r[1],chainID=r[0]),0)
xyzkey = tuple([chain] + xyz)
# get the center
_, xyz = self.get_residue_center(self.sql, res=res)
xyzkey = tuple([chain] + xyz[0])

# put the data in dict
self.bsa_data[res] = [bsa]
Expand Down
121 changes: 96 additions & 25 deletions deeprank/features/FeatureClass.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import numpy as np


class FeatureClass(object):

def __init__(self, feature_type):
"""Master class from which all the other feature classes should be
derived.
"""Master class from which all the other feature classes should be derived.
Arguments
feature_type(str): 'Atomic' or 'Residue'
Notes:
Each subclass must compute:
- self.feature_data: dictionary of features in
Expand All @@ -28,22 +30,25 @@ def __init__(self, feature_type):
{'coulomb': data_dict_clb, 'vdwaals': data_dict_vdw}
data_dict_clb = {xyz_info: [values]}
xyz_info = (chainNum, x, y, z)
Args:
feature_type(str): 'Atomic' or 'Residue'
"""

self.type = feature_type
self.feature_data = {}
self.feature_data_xyz = {}

def export_data_hdf5(self, featgrp):
"""Export the data in human readable format to HDF5's group.
- For atomic features, the format of the data must be:
{(chainID, resSeq, resName, name): [values]}
- For residue features, the format must be:
{(chainID, resSeq, resName): [values]}
"""Export the data in xyz-val format in an HDF5 file group.
Arguments:
featgrp {[hdf5_group]} -- The hdf5 group of the feature
Notes:
- For atomic features, the format of the data must be:
{(chainID, resSeq, resName, name): [values]}
- For residue features, the format must be:
{(chainID, resSeq, resName): [values]}
"""

# loop through the datadict and name
for name, data in self.feature_data.items():

Expand Down Expand Up @@ -84,22 +89,14 @@ def export_data_hdf5(self, featgrp):
else:
featgrp.create_dataset(name + '_raw', data=ds)

########################################
#
# export the data in an HDF5 file group
# the format of the data is here
# PRO : fast when mapping
# CON : only usefull for deeprank
#
########################################


def export_dataxyz_hdf5(self, featgrp):
"""Export the data in xyz-val format in an HDF5 file group.
For atomic and residue the format of the data must be:
{(chainNum(0 or 1), x, y, z): [values]}
Arguments:
featgrp {[hdf5_group]} -- The hdf5 group of the feature
"""

# loop through the datadict and name
for name, data in self.feature_data_xyz.items():

Expand All @@ -112,3 +109,77 @@ def export_dataxyz_hdf5(self, featgrp):
old[...] = ds
else:
featgrp.create_dataset(name, data=ds)

@staticmethod
def get_residue_center(sql, centers=['CB','CA','mean'], res=None):
"""Computes the center of each residue by trying different options
Arguments:
sql {pdb2sql} -- The pdb2sql instance
Keyword Arguments:
centers {list} -- list of strings (default: {['CB','CA','mean']})
res {list} -- list of residue to be considered ([[chainID, resSeq, resName]])
Raises:
ValueError: [description]
Returns:
[type] -- list(res), list(xyz)
"""

# get all residues if None were provided
# [chainID, resName, resSeq]
if res is None:
res = [tuple(x) for x in sql.get('chainID,resSeq,resName')]
res = sorted(set(res), key=res.index)


# make sure that we have a list of res
# even if ony 1 res was provided
# res=[chainID, resSeq, resName] -> res=[[chainID, resSeq, resName]]
elif not isinstance(res[0],list):
res = [res]

# make sure that we have a list of possible centers
if not isinstance(centers,list):
centers = list(centers)

xyz = []

for r in res:

for ctr in centers:

if ctr in ['CB','CA']:

xyz_res = sql.get('x,y,z',
chainID=r[0],
resSeq=r[1],
resName=r[2],
name=ctr)

elif ctr == 'mean':
xyz_res = [np.mean(sql.get('x,y,z',
chainID=r[0],
resSeq=r[1],
resName=r[2]),axis=0).tolist()]

else:
raise ValueError('Center %s not recognized' %c)

if len(xyz_res) == 0:
continue

elif len(xyz_res) == 1:
xyz.append(xyz_res[0])
break

else:
raise ValueError('Residue center not found')

if len(xyz) == 0:
raise ValueError('Center not found')

return res, xyz

8 changes: 2 additions & 6 deletions deeprank/features/FullPSSM.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,7 @@ def get_feature_value(self, cutoff=5.5):
sql = pdb2sql.interface(self.pdb_file)

# set achors for all residues and get their xyz
xyz_info = sql.get('chainID,resSeq,resName', name='CB')
xyz_info += sql.get('chainID,resSeq,resName', name='CA',
resName='GLY')

xyz = sql.get('x,y,z', name='CB')
xyz += sql.get('x,y,z', name='CA', resName='GLY')
xyz_info, xyz = self.get_residue_center(sql)

xyz_dict = {}
for pos, info in zip(xyz, xyz_info):
Expand Down Expand Up @@ -212,6 +207,7 @@ def get_feature_value(self, cutoff=5.5):
f"{self.mol_name}: The following interface residues have "
f" no pssm value:\n {ctc_res_wo_pssm}"
)

else:
ctc_res_with_pssm = ctc_res

Expand Down
13 changes: 3 additions & 10 deletions deeprank/features/ResidueDensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,17 +123,10 @@ def extract_features(self):
# total density in raw format
self.feature_data['RCD_total'][key] = [res.density['total']]

# get the type of the center
atcenter = 'CB'
if key[2] == 'GLY':
atcenter = 'CA'
# get the center
_, xyz = self.get_residue_center(self.sql, res=key)
xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz[0])

# get the xyz of the center atom
xyz = self.sql.get(
'x,y,z', resSeq=key[1], chainID=key[0], name=atcenter)[0]
#xyz = np.mean(self.sql.get('x,y,z',resSeq=key[1],chainID=key[0]),0).tolist()

xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz)
self.feature_data_xyz['RCD_total'][xyz_key] = [
res.density['total']]

Expand Down
1 change: 0 additions & 1 deletion deeprank/generate/DataGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,6 @@ def create_database(

# names of the molecule
mol_name = os.path.splitext(os.path.basename(cplx))[0]
mol_name = mol_name.replace('-', '_')
mol_aug_name_list = []

try:
Expand Down
2 changes: 0 additions & 2 deletions test/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,6 @@ def test_7_realign(self):
database = DataGenerator(hdf5=copy_name)
database.realign_complexes(align={'axis':'z'})



if __name__ == "__main__":

# unittest.main()
Expand Down

0 comments on commit 95f4cbb

Please sign in to comment.