Merge bb6f5d5 into 66184e7

DeepRank · Mar 24, 2021 · 53f243a · 53f243a
2 parents 66184e7 + bb6f5d5
commit 53f243a
Show file tree

Hide file tree

Showing 9 changed files with 776 additions and 0 deletions.
diff --git a/deeprank/features/ResidueContacts.py b/deeprank/features/ResidueContacts.py
diff --git a/deeprank/models/patch.py b/deeprank/models/patch.py
@@ -0,0 +1,7 @@
+from enum import Enum
+
+
+class PatchActionType(Enum):
+    MODIFY = 1
+    ADD = 2
+
diff --git a/deeprank/parse/param.py b/deeprank/parse/param.py
@@ -0,0 +1,37 @@
+import re
+import logging
+
+
+_log = logging.getLogger(__name__)
+
+class VanderwaalsParam:
+    def __init__(self, epsilon, sigma):
+        self.epsilon = epsilon
+        self.sigma = sigma
+
+
+class ParamParser:
+
+    LINE_PATTERN = re.compile(r"^NONBonded\s+([A-Z0-9]{1,4})((\s+\-?[0-9]+\.[0-9]+){4})\s*$")
+
+    @staticmethod
+    def parse(file_):
+        result = {}
+        for line in file_:
+            if line.startswith('#') or len(line.strip()) == 0:
+                continue
+
+            m = ParamParser.LINE_PATTERN.match(line)
+            if not m:
+                raise ValueError("unmatched param line: {}".format(repr(line)))
+
+            atom_type = m.group(1)
+            if atom_type in result:
+                raise ValueError("duplicate atom type: {}".format(repr(atom_type)))
+
+            number_strings = m.group(2).split()
+            epsilon = float(number_strings[0])
+            sigma = float(number_strings[1])
+
+            result[atom_type] = VanderwaalsParam(epsilon, sigma)
+        return result
diff --git a/deeprank/parse/patch.py b/deeprank/parse/patch.py
@@ -0,0 +1,59 @@
+import re
+import logging
+from enum import Enum
+
+from deeprank.models.patch import PatchActionType
+
+
+_log = logging.getLogger(__name__)
+
+
+class PatchSelection:
+    def __init__(self, residue_type, atom_name):
+        self.residue_type = residue_type
+        self.atom_name = atom_name
+
+
+class PatchAction:
+    def __init__(self, type_, selection, kwargs):
+        self.type = type_
+        self.selection = selection
+        self.kwargs = kwargs
+
+
+class PatchParser:
+    STRING_VAR_PATTERN = re.compile(r"([A-Z]+)=([A-Z0-9]+)")
+    NUMBER_VAR_PATTERN = re.compile(r"([A-Z]+)=(\-?[0-9]+\.[0-9]+)")
+    ACTION_PATTERN = re.compile(r"^([A-Z]{3,4})\s+([A-Z]+)\s+ATOM\s+([A-Z0-9]{1,3})\s+(.*)$")
+
+    @staticmethod
+    def _parse_action_type(s):
+        for type_ in PatchActionType:
+            if type_.name == s:
+                return type_
+
+        raise ValueError("unmatched residue action: {}".format(repr(s)))
+
+    @staticmethod
+    def parse(file_):
+        result = []
+        for line in file_:
+            if line.startswith('#') or line.startswith("!") or len(line.strip()) == 0:
+                continue
+
+            m = PatchParser.ACTION_PATTERN.match(line)
+            if not m:
+                raise ValueError("Unmatched patch action: {}".format(repr(line)))
+
+            residue_type = m.group(1)
+            action_type = PatchParser._parse_action_type(m.group(2))
+            atom_name = m.group(3)
+
+            kwargs = {}
+            for w in PatchParser.STRING_VAR_PATTERN.finditer(m.group(4)):
+                kwargs[w.group(1)] = w.group(2)
+            for w in PatchParser.NUMBER_VAR_PATTERN.finditer(m.group(4)):
+                kwargs[w.group(1)] = float(w.group(2))
+
+            result.append(PatchAction(action_type, PatchSelection(residue_type, atom_name), kwargs))
+        return result
diff --git a/deeprank/parse/top.py b/deeprank/parse/top.py
@@ -0,0 +1,46 @@
+import re
+
+
+class TopRowObject:
+    def __init__(self, residue_name, atom_name, kwargs):
+        self.residue_name = residue_name
+        self.atom_name = atom_name
+        self.kwargs = kwargs
+
+
+class TopParser:
+    VAR_PATTERN = re.compile(r"([^\s]+)\s*=\s*([^\s\(\)]+|\(.*\))")
+    LINE_PATTERN = re.compile(
+        r"^([A-Z0-9]{3})\s+atom\s+([A-Z0-9]{1,4})\s+(.+)\s+end\s*(\s+\!\s+[ _A-Za-z0-9]+)?$")
+    NUMBER_PATTERN = re.compile(r"\-?[0-9]+(\.[0-9]+)?")
+
+    @staticmethod
+    def parse(file_):
+        result = []
+        for line in file_:
+            # parse the line
+            m = TopParser.LINE_PATTERN.match(line)
+            if not m:
+                raise ValueError("Unmatched top line: {}".format(repr(line)))
+
+            residue_name = m.group(1).upper()
+            atom_name = m.group(2).upper()
+
+            kwargs = {}
+            for w in TopParser.VAR_PATTERN.finditer(m.group(3)):
+                kwargs[w.group(1).lower().strip()] = TopParser._parse_value(w.group(2).strip())
+
+            result.append(TopRowObject(residue_name, atom_name, kwargs))
+
+        return result
+
+    @staticmethod
+    def _parse_value(s):
+        # remove parentheses
+        if s[0] == '(' and s[-1] == ')':
+            return TopParser._parse_value(s[1:-1])
+
+        if TopParser.NUMBER_PATTERN.match(s):
+            return float(s)
+        else:
+            return s
diff --git a/test/parse/test_param.py b/test/parse/test_param.py
@@ -0,0 +1,18 @@
+import os
+import pkg_resources
+
+from nose.tools import eq_, ok_
+
+from deeprank.parse.param import ParamParser
+
+
+
+_param_path = os.path.join(pkg_resources.resource_filename('deeprank.features', ''),
+                           "forcefield/protein-allhdg5-4_new.param")
+
+def test_parse():
+    with open(_param_path, 'rt') as f:
+        result = ParamParser.parse(f)
+
+    ok_(len(result) > 0)
+    eq_(type(list(result.values())[0].epsilon), float)
diff --git a/test/parse/test_patch.py b/test/parse/test_patch.py
@@ -0,0 +1,22 @@
+import pkg_resources
+import os
+
+from nose.tools import eq_, ok_
+
+from deeprank.parse.patch import PatchParser
+
+
+_patch_path = os.path.join(pkg_resources.resource_filename('deeprank.features', ''),
+                         'forcefield/patch.top')
+
+
+def test_parse():
+    with open(_patch_path, 'rt') as f:
+        result = PatchParser.parse(f)
+
+    ok_(len(result) > 0)
+    for obj in result:
+        eq_(type(obj.kwargs['CHARGE']), float)
+
+    ok_(any([obj.selection.residue_type == "NTER" and obj.selection.atom_name == "HT1"
+             for obj in result]))
diff --git a/test/parse/test_top.py b/test/parse/test_top.py
@@ -0,0 +1,21 @@
+import pkg_resources
+import os
+
+from nose.tools import eq_, ok_
+
+from deeprank.parse.top import TopParser
+
+
+_top_path = os.path.join(pkg_resources.resource_filename('deeprank.features', ''),
+                         'forcefield/protein-allhdg5-4_new.top')
+
+
+def test_parse():
+    with open(_top_path, 'rt') as f:
+        result = TopParser.parse(f)
+
+    eq_(len(result), 705)
+
+    for obj in result:
+        eq_(type(obj.kwargs['type']), str)
+        eq_(type(obj.kwargs['charge']), float)
diff --git a/test/test_residue_contacts.py b/test/test_residue_contacts.py
@@ -0,0 +1,26 @@
+import os
+import h5py
+import tempfile
+import shutil
+
+from deeprank.features.ResidueContacts import __compute_feature__
+
+test_path = os.path.dirname(os.path.realpath(__file__))
+
+def test_compute_feature():
+    pdb_path = os.path.join(test_path, '1AK4/atomic_features/1AK4_100w.pdb')
+
+    tmp_path = tempfile.mkdtemp()
+    try:
+
+        with h5py.File(os.path.join(tmp_path, "test.hdf5"), 'w') as f:
+
+            molgrp = f.require_group('1AK4')
+
+            molgrp.require_group('features')
+            molgrp.require_group('features_raw')
+
+            __compute_feature__(pdb_path, molgrp['features'], molgrp['features_raw'], 'A', 25)
+
+    finally:
+        shutil.rmtree(tmp_path)