Skip to content
This repository has been archived by the owner on Nov 28, 2023. It is now read-only.

Commit

Permalink
intoduced get_residue_center
Browse files Browse the repository at this point in the history
  • Loading branch information
NicoRenaud committed Apr 10, 2020
1 parent 79d4c15 commit 1564d11
Show file tree
Hide file tree
Showing 6 changed files with 152 additions and 72 deletions.
26 changes: 14 additions & 12 deletions deeprank/features/BSA.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,19 +117,21 @@ def get_contact_residue_sasa(self, cutoff=5.5):
# define the xyz key : (chain,x,y,z)
chain = {'A': 0, 'B': 1}[res[0]]

atcenter = 'CB'
if res[2] == 'GLY':
atcenter = 'CA'
# atcenter = 'CB'
# if res[2] == 'GLY':
# atcenter = 'CA'

try :
xyz = self.sql.get(
'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
except IndexError :
warnings .warn('Atom ', atcenter, ' not found for residue ', key[1], \
'. Use residue center as feature center')
xyz = np.mean(self.sql.get('x,y,z',resSeq=r[1],chainID=r[0]),0)

xyzkey = tuple([chain] + xyz)
# try :
# xyz = self.sql.get(
# 'x,y,z', resSeq=res[1], chainID=res[0], name=atcenter)[0]
# except IndexError :
# warnings.warn('Atom ', atcenter, ' not found for residue ', key[1], \
# '. Use residue center as feature center')
# xyz = np.mean(self.sql.get('x,y,z',resSeq=res[1],chainID=res[0]),0)
# xyzkey = tuple([chain] + xyz)

_, xyz = self.get_residue_center(self.sql, res=res)
xyzkey = tuple([chain] + xyz[0])

# put the data in dict
self.bsa_data[res] = [bsa]
Expand Down
122 changes: 97 additions & 25 deletions deeprank/features/FeatureClass.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import numpy as np

import warnings

class FeatureClass(object):

def __init__(self, feature_type):
"""Master class from which all the other feature classes should be
derived.
"""Master class from which all the other feature classes should be derived.
Arguments
feature_type(str): 'Atomic' or 'Residue'
Notes:
Each subclass must compute:
- self.feature_data: dictionary of features in
Expand All @@ -28,22 +31,25 @@ def __init__(self, feature_type):
{'coulomb': data_dict_clb, 'vdwaals': data_dict_vdw}
data_dict_clb = {xyz_info: [values]}
xyz_info = (chainNum, x, y, z)
Args:
feature_type(str): 'Atomic' or 'Residue'
"""

self.type = feature_type
self.feature_data = {}
self.feature_data_xyz = {}

def export_data_hdf5(self, featgrp):
"""Export the data in human readable format to HDF5's group.
- For atomic features, the format of the data must be:
{(chainID, resSeq, resName, name): [values]}
- For residue features, the format must be:
{(chainID, resSeq, resName): [values]}
"""Export the data in xyz-val format in an HDF5 file group.
Arguments:
featgrp {[hdf5_group]} -- The hdf5 group of the feature
Notes:
- For atomic features, the format of the data must be:
{(chainID, resSeq, resName, name): [values]}
- For residue features, the format must be:
{(chainID, resSeq, resName): [values]}
"""

# loop through the datadict and name
for name, data in self.feature_data.items():

Expand Down Expand Up @@ -84,22 +90,14 @@ def export_data_hdf5(self, featgrp):
else:
featgrp.create_dataset(name + '_raw', data=ds)

########################################
#
# export the data in an HDF5 file group
# the format of the data is here
# PRO : fast when mapping
# CON : only usefull for deeprank
#
########################################


def export_dataxyz_hdf5(self, featgrp):
"""Export the data in xyz-val format in an HDF5 file group.
For atomic and residue the format of the data must be:
{(chainNum(0 or 1), x, y, z): [values]}
Arguments:
featgrp {[hdf5_group]} -- The hdf5 group of the feature
"""

# loop through the datadict and name
for name, data in self.feature_data_xyz.items():

Expand All @@ -112,3 +110,77 @@ def export_dataxyz_hdf5(self, featgrp):
old[...] = ds
else:
featgrp.create_dataset(name, data=ds)

@staticmethod
def get_residue_center(sql, centers=['CB','CA','mean'], res=None):
"""Computes the center of each residue by trying different options
Arguments:
sql {pdb2sql} -- The pdb2sql instance
Keyword Arguments:
centers {list} -- list of strings (default: {['CB','CA','mean']})
res {list} -- list of residue to be considered ([[chainID, resSeq, resName]])
Raises:
ValueError: [description]
Returns:
[type] -- list(res), list(xyz)
"""

# get all residues if None were provided
# [chainID, resName, resSeq]
if res is None:
res = [tuple(x) for x in sql.get('chainID,resSeq,resName')]
res = sorted(set(res), key=res.index)


# make sure that we have a list of res
# even if ony 1 res was provided
# res=[chainID, resSeq, resName] -> res=[[chainID, resSeq, resName]]
elif not isinstance(res[0],list):
res = [res]

# make sure that we have a list of possible centers
if not isinstance(centers,list):
centers = list(centers)

xyz = []

for r in res:

for ctr in centers:

if ctr in ['CB','CA']:

xyz_res = sql.get('x,y,z',
chainID=r[0],
resSeq=r[1],
resName=r[2],
name=ctr)

elif ctr == 'mean':
xyz_res = [np.mean(sql.get('x,y,z',
chainID=r[0],
resSeq=r[1],
resName=r[2]),axis=0).tolist()]

else:
raise ValueError('Center %s not recognized' %c)

if len(xyz_res) == 0:
continue

elif len(xyz_res) == 1:
xyz.append(xyz_res[0])
break

else:
raise ValueError('Residue center not found')

if len(xyz) == 0:
raise ValueError('Center not found')

return res, xyz

29 changes: 16 additions & 13 deletions deeprank/features/FullPSSM.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,12 +166,13 @@ def get_feature_value(self, cutoff=5.5):
sql = pdb2sql.interface(self.pdb_file)

# set achors for all residues and get their xyz
xyz_info = sql.get('chainID,resSeq,resName', name='CB')
xyz_info += sql.get('chainID,resSeq,resName', name='CA',
resName='GLY')
# xyz_info = sql.get('chainID,resSeq,resName', name='CB')
# xyz_info += sql.get('chainID,resSeq,resName', name='CA',
# resName='GLY')
# xyz = sql.get('x,y,z', name='CB')
# xyz += sql.get('x,y,z', name='CA', resName='GLY')

xyz = sql.get('x,y,z', name='CB')
xyz += sql.get('x,y,z', name='CA', resName='GLY')
xyz_info, xyz = self.get_residue_center(sql)

xyz_dict = {}
for pos, info in zip(xyz, xyz_info):
Expand Down Expand Up @@ -212,14 +213,16 @@ def get_feature_value(self, cutoff=5.5):
f"{self.mol_name}: The following interface residues have "
f" no pssm value:\n {ctc_res_wo_pssm}"
)
elif len(pssm_res_set.difference(ctc_res_set)) > 0:
# can happen if CA/CB is missing in the res
pssm_res_wo_ctc = pssm_res_set.difference(ctc_res_set)
ctc_res_with_pssm = pssm_res_set.intersection(ctc_res_set)
warnings.warn(
f"{self.mol_name}: The following interface residues have "
f" a CA or CB missing :\n {pssm_res_wo_ctc}"
)

# elif len(pssm_res_set.difference(ctc_res_set)) > 0:
# # can happen if CA/CB is missing in the res
# pssm_res_wo_ctc = pssm_res_set.difference(ctc_res_set)
# ctc_res_with_pssm = pssm_res_set.intersection(ctc_res_set)
# warnings.warn(
# f"{self.mol_name}: The following interface residues have "
# f" a CA or CB missing :\n {pssm_res_wo_ctc}"
# )

else:
ctc_res_with_pssm = ctc_res

Expand Down
33 changes: 18 additions & 15 deletions deeprank/features/ResidueDensity.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,21 +123,24 @@ def extract_features(self):
# total density in raw format
self.feature_data['RCD_total'][key] = [res.density['total']]

# get the type of the center
atcenter = 'CB'
if key[2] == 'GLY':
atcenter = 'CA'

# get the xyz of the center atom
try:
xyz = self.sql.get(
'x,y,z', resSeq=key[1], chainID=key[0], name=atcenter)[0]
except IndexError :
warnings .warn('Atom ', atcenter, ' not found for residue ', key[1], \
'. Use residue center as feature center')
xyz = np.mean(self.sql.get('x,y,z',resSeq=key[1],chainID=key[0]),0).tolist()

xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz)
# # get the type of the center
# atcenter = 'CB'
# if key[2] == 'GLY':
# atcenter = 'CA'

# # get the xyz of the center atom
# try:
# xyz = self.sql.get(
# 'x,y,z', resSeq=key[1], chainID=key[0], name=atcenter)[0]
# except IndexError :
# warnings.warn('Atom ', atcenter, ' not found for residue ', key[1], \
# '. Use residue center as feature center')
# xyz = np.mean(self.sql.get('x,y,z',resSeq=key[1],chainID=key[0]),0).tolist()
# xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz)

_, xyz = self.get_residue_center(self.sql, res=key)
xyz_key = tuple([{'A': 0, 'B': 1}[key[0]]] + xyz[0])

self.feature_data_xyz['RCD_total'][xyz_key] = [
res.density['total']]

Expand Down
2 changes: 1 addition & 1 deletion deeprank/generate/DataGenerator.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def create_database(

# names of the molecule
mol_name = os.path.splitext(os.path.basename(cplx))[0]
mol_name = mol_name.replace('-', '_')
#mol_name = mol_name.replace('-', '_')
mol_aug_name_list = []

try:
Expand Down
12 changes: 6 additions & 6 deletions test/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,9 +262,9 @@ def test_7_realign(self):
# unittest.main()
inst = TestGenerateData()
inst.test_1_generate()
inst.test_1_generate_mapfly()
inst.test_3_add_unique_target()
inst.test_4_add_feature()
inst.test_5_align()
inst.test_6_align_interface()
inst.test_7_realign()
# inst.test_1_generate_mapfly()
# inst.test_3_add_unique_target()
# inst.test_4_add_feature()
# inst.test_5_align()
# inst.test_6_align_interface()
# inst.test_7_realign()

0 comments on commit 1564d11

Please sign in to comment.