Skip to content

Commit

Permalink
Merge pull request #272 from DeepRank/271_aminoacid_properties_dbodor
Browse files Browse the repository at this point in the history
feat: improve component features
  • Loading branch information
DaniBodor committed Dec 9, 2022
2 parents bc02f79 + 426ba74 commit 5229e9d
Show file tree
Hide file tree
Showing 15 changed files with 399 additions and 287 deletions.
Binary file added deeprankcore/domain/aminoacid_summary.xlsx
Binary file not shown.
424 changes: 255 additions & 169 deletions deeprankcore/domain/aminoacidlist.py

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions deeprankcore/domain/nodestorage.py
Expand Up @@ -8,16 +8,20 @@

## residue core features
RESTYPE = "res_type" # AminoAcid object; former FEATURENAME_AMINOACID
RESCHARGE = "res_charge" # float(<0); former FEATURENAME_CHARGE (was not assigned)
RESSIZE = "res_size" # int; former FEATURENAME_SIZE
RESCHARGE = "charge" # float(<0); former FEATURENAME_CHARGE (was not assigned)
POLARITY = "polarity" # Polarity object; former FEATURENAME_POLARITY
RESSIZE = "res_size" # int; former FEATURENAME_SIZE
RESMASS = "res_mass"
RESPI = "res_pI"
HBDONORS = "hb_donors" # int; former FEATURENAME_HYDROGENBONDDONORS
HBACCEPTORS = "hb_acceptors"# int; former FEATURENAME_HYDROGENBONDACCEPTORS

## variant residue features
VARIANTRES = "variant_res" # AminoAcid object; former FEATURENAME_VARIANTAMINOACID
DIFFCHARGE = "diff_charge" # float
DIFFSIZE = "diff_size" # int; former FEATURENAME_SIZEDIFFERENCE
DIFFMASS = "diff_mass"
DIFFPI = "diff_pI"
DIFFPOLARITY = "diff_polarity" # [type?]; former FEATURENAME_POLARITYDIFFERENCE
DIFFHBDONORS = "diff_hb_donors" # int; former FEATURENAME_HYDROGENBONDDONORSDIFFERENCE
DIFFHBACCEPTORS = "diff_hb_acceptors" # int; former FEATURENAME_HYDROGENBONDACCEPTORSDIFFERENCE
Expand Down
21 changes: 13 additions & 8 deletions deeprankcore/features/components.py
Expand Up @@ -31,27 +31,32 @@ def add_features( # pylint: disable=unused-argument

node.features[Nfeat.RESTYPE] = residue.amino_acid.onehot
node.features[Nfeat.RESCHARGE] = residue.amino_acid.charge
node.features[Nfeat.RESSIZE] = residue.amino_acid.size
node.features[Nfeat.POLARITY] = residue.amino_acid.polarity.onehot
node.features[Nfeat.HBDONORS] = residue.amino_acid.count_hydrogen_bond_donors
node.features[Nfeat.HBACCEPTORS] = residue.amino_acid.count_hydrogen_bond_acceptors
node.features[Nfeat.RESSIZE] = residue.amino_acid.size
node.features[Nfeat.RESMASS] = residue.amino_acid.mass
node.features[Nfeat.RESPI] = residue.amino_acid.pI
node.features[Nfeat.HBDONORS] = residue.amino_acid.hydrogen_bond_donors
node.features[Nfeat.HBACCEPTORS] = residue.amino_acid.hydrogen_bond_acceptors

if single_amino_acid_variant is not None:

wildtype = single_amino_acid_variant.wildtype_amino_acid
variant = single_amino_acid_variant.variant_amino_acid

if residue == single_amino_acid_variant.residue:
node.features[Nfeat.VARIANTRES] = variant.onehot
node.features[Nfeat.DIFFCHARGE] = variant.charge - wildtype.charge
node.features[Nfeat.DIFFSIZE] = variant.size - wildtype.size
node.features[Nfeat.DIFFPOLARITY] = variant.polarity.onehot - wildtype.polarity.onehot
node.features[Nfeat.DIFFHBDONORS] = variant.count_hydrogen_bond_donors - wildtype.count_hydrogen_bond_donors
node.features[Nfeat.DIFFHBACCEPTORS] = variant.count_hydrogen_bond_acceptors - wildtype.count_hydrogen_bond_acceptors
node.features[Nfeat.DIFFSIZE] = variant.size - wildtype.size
node.features[Nfeat.DIFFMASS] = variant.mass - wildtype.mass
node.features[Nfeat.DIFFPI] = variant.pI - wildtype.pI
node.features[Nfeat.DIFFHBDONORS] = variant.hydrogen_bond_donors - wildtype.hydrogen_bond_donors
node.features[Nfeat.DIFFHBACCEPTORS] = variant.hydrogen_bond_acceptors - wildtype.hydrogen_bond_acceptors
else:
node.features[Nfeat.VARIANTRES] = residue.amino_acid.onehot
node.features[Nfeat.DIFFCHARGE] = 0
node.features[Nfeat.DIFFSIZE] = 0
node.features[Nfeat.DIFFPOLARITY] = np.zeros(residue.amino_acid.polarity.onehot.shape)
node.features[Nfeat.DIFFSIZE] = 0
node.features[Nfeat.DIFFMASS] = 0
node.features[Nfeat.DIFFPI] = 0
node.features[Nfeat.DIFFHBDONORS] = 0
node.features[Nfeat.DIFFHBACCEPTORS] = 0
86 changes: 52 additions & 34 deletions deeprankcore/molstruct/aminoacid.py
Expand Up @@ -5,7 +5,7 @@
class Polarity(Enum):
"a value to express a residue's polarity"

APOLAR = 0
NONPOLAR = 0
POLAR = 1
NEGATIVE_CHARGE = 2
POSITIVE_CHARGE = 3
Expand All @@ -26,35 +26,45 @@ def __init__( # pylint: disable=too-many-arguments
name: str,
three_letter_code: str,
one_letter_code: str,
charge: float,
charge: int,
polarity: Polarity,
size: int,
count_hydrogen_bond_donors: int,
count_hydrogen_bond_acceptors: int,
mass: float,
pI: float,
hydrogen_bond_donors: int,
hydrogen_bond_acceptors: int,
index: int,
):
"""
Args:
name(str): unique name for the amino acid
three_letter_code(str): code of the amino acid, as in PDB
one_letter_code(str): letter of the amino acid, as in fasta
charge(float, optional): the charge property of the amino acid
polarity(deeprank polarity enum, optional): the polarity property of the amino acid
size(int, optional): the number of heavy atoms in the side chain
index(int, optional): the rank of the amino acid, used for computing one-hot encoding
name (str): full name of the amino acid
three_letter_code (str): three-letter code of the amino acid (as in PDB)
one_letter_code (str): one-letter of the amino acid (as in fasta)
charge (int): charge of the amino acid
polarity (deeprank polarity enum): the polarity of the amino acid
size (int): the number of non-hydrogen atoms in the side chain
mass (float): average residue mass (i.e. mass of amino acid - H20) in Daltons
pI (float): isolectric point; pH at which the molecule has no net electric charge
hydrogen_bond_donors (int): number of hydrogen bond donors
hydrogen_bond_acceptors (int): number of hydrogen bond acceptors
index (int): the rank of the amino acid, used for computing one-hot encoding
"""

# amino acid nomenclature
self._name = name
self._three_letter_code = three_letter_code
self._one_letter_code = one_letter_code

# these settings apply to the side chain
self._size = size
# side chain properties
self._charge = charge
self._polarity = polarity
self._count_hydrogen_bond_donors = count_hydrogen_bond_donors
self._count_hydrogen_bond_acceptors = count_hydrogen_bond_acceptors
self._size = size
self._mass = mass
self._pI = pI
self._hydrogen_bond_donors = hydrogen_bond_donors
self._hydrogen_bond_acceptors = hydrogen_bond_acceptors

# one hot encoding
self._index = index

@property
Expand All @@ -70,37 +80,45 @@ def one_letter_code(self) -> str:
return self._one_letter_code

@property
def onehot(self) -> np.ndarray:
if self._index is None:
raise ValueError(
"amino acid {self._name} index is not set, thus no onehot can be computed"
)
def charge(self) -> int:
return self._charge

# assumed that there are only 20 different amino acids
a = np.zeros(20)
a[self._index] = 1.0
@property
def polarity(self) -> Polarity:
return self._polarity

return a
@property
def size(self) -> int:
return self._size

@property
def count_hydrogen_bond_donors(self) -> int:
return self._count_hydrogen_bond_donors
def mass(self) -> float:
return self._mass

@property
def count_hydrogen_bond_acceptors(self) -> int:
return self._count_hydrogen_bond_acceptors
def pI(self) -> float:
return self._pI

@property
def charge(self) -> float:
return self._charge
def hydrogen_bond_donors(self) -> int:
return self._hydrogen_bond_donors

@property
def polarity(self) -> Polarity:
return self._polarity
def hydrogen_bond_acceptors(self) -> int:
return self._hydrogen_bond_acceptors

@property
def size(self) -> int:
return self._size
def onehot(self) -> np.ndarray:
if self._index is None:
raise ValueError(
"amino acid {self._name} index is not set, thus no onehot can be computed"
)
# 20 canonical amino acids
# selenocysteine and pyrrolysine are indexed as cysteine and lysine, respectively
a = np.zeros(20)
a[self._index] = 1.0

return a

@property
def index(self) -> int:
Expand Down
Empty file.
Binary file modified tests/data/hdf5/1ATN_ppi.hdf5
Binary file not shown.

0 comments on commit 5229e9d

Please sign in to comment.