Conversion to PyWLGK

Old-Shatterhand · May 12, 2024 · 5e0ff24 · 5e0ff24
1 parent 1a7ed22
commit 5e0ff24
Show file tree

Hide file tree

Showing 13 changed files with 140 additions and 140 deletions.
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -35,7 +35,7 @@ jobs:
         shell: bash -l {0}
         run: |
           cd tests
-          pytest --cov=../pywlk/ --cov-report=xml
+          pytest --cov=../pywlgk/ --cov-report=xml
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v3

diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,6 @@
-pywlk/dev/
+pywlgk/dev/
 docs/build/
 *.pclprof
 .idea/
-pywlk/__pycache__/
+pywlgk/__pycache__/
 tests/__pycache__/
diff --git a/README.md b/README.md
@@ -8,13 +8,13 @@ This package is an alternative to FastWLK, GraKel, and other implementations of
 By design, PyWLK is installable from PyPI and Anaconda. To install the package with `pip`, run the following command:
 
 ```bash
-pip install pywlk
+pip install pywlgk
 ```
 
 or with `conda` (alternatively with `mamba` by replacing `conda` with `mamba`
 
 ```bash
-conda install pywlk
+conda install pywlgk
 ```
 
 ## Usage
@@ -23,7 +23,7 @@ PyWLK is designed to be easy to use. The following example shows how to use PyWL
 two graphs.
 
 ```python
-from pywlk import wlk
+from pywlgk import wlk
 import numpy as np
 
 adjs = np.random.randint(0, 1, size=(2, 10, 10))

diff --git a/docs/conf.py b/docs/conf.py
@@ -7,7 +7,7 @@
 
 sys.path.insert(0, os.path.abspath("./.."))
 
-import pywlk
+import pywlgk
 
 extensions = [
     "sphinx.ext.autodoc",
@@ -53,7 +53,7 @@
     "navigation_depth": 2,
 }
 
-rst_context = {"pywlk": pywlk}
+rst_context = {"pywlgk": pywlgk}
 
 add_module_names = False
 fail_on_warning = True
diff --git a/docs/examples/usage.rst b/docs/examples/usage.rst
@@ -1,4 +1,4 @@
 Code Documentation
 ==================
 
-.. autofunction:: pywlk.wlk
+.. autofunction:: pywlgk.wlk
diff --git a/docs/index.rst b/docs/index.rst
@@ -12,13 +12,13 @@ PyWLK can be installed from PyPI using pip:
 
 .. code-block:: bash
 
-    pip install pywlk
+    pip install pywlgk
 
 or from Anaconda using conda:
 
 .. code-block:: bash
 
-    conda install -c conda-forge pywlk
+    conda install -c conda-forge pywlgk
 
 Usage
 -----
@@ -27,7 +27,7 @@ After installation, PyWLK can be used as follows:
 
 .. code-block:: python
 
-    from pywlk import wlk
+    from pywlgk import wlk
     import numpy as np
 
     adjs = np.random.randint(0, 1, size=(2, 10, 10))

diff --git a/examples/Random_Graph.ipynb b/examples/Random_Graph.ipynb
@@ -16,7 +16,7 @@
    "execution_count": 1,
    "outputs": [],
    "source": [
-    "from pywlk import wlk\n",
+    "from pywlgk import wlk\n",
     "import numpy as np\n",
     "import networkx as nx\n",
     "import matplotlib.pyplot as plt\n",

diff --git a/pywlk/__init__.py → pywlgk/__init__.py b/pywlk/__init__.py → pywlgk/__init__.py
@@ -1 +1 @@
-from .wlk import wlk
+from .wlk import wlk
diff --git a/pywlk/utils.py → pywlgk/utils.py b/pywlk/utils.py → pywlgk/utils.py
@@ -1,61 +1,61 @@
-from typing import List, Optional, Tuple
-
-import numpy as np
-
-
-class L:
-    def __init__(self, value: Optional[List[int]] = None):
-        self.value: List[int] = value or []
-
-    def __mul__(self, other):
-        if other == 0:
-            return L()
-        return L(self.value)
-
-    def __add__(self, other):
-        return L(self.value + other.value)
-
-    def __radd__(self, other):
-        if isinstance(other, int):
-            return L(self.value + [other])
-        return L(other.value + self.value)
-
-    def __lt__(self, other):
-        for a, b in zip(self.value, other.value):
-            if type(a) != type(b):
-                return str(type(a)) < str(type(b))
-            if a < b:
-                return True
-            elif a > b:
-                return False
-        return len(self.value) < len(other.value)
-
-    def __lshift__(self, other):
-        return L(other.value + sorted(self.value))
-
-    def __eq__(self, other):
-        return self.value == other.value
-
-    def __repr__(self):
-        return f"L({self.value})"
-
-    def __hash__(self):
-        return hash(str(self.value))
-
-
-def prep_labels(labels: List) -> Tuple[List[List[L]], int]:
-    output = []
-    mapping = {}
-    current_id = -1
-    for i, graph_labels in enumerate(labels):
-        output.append([])
-        for l in graph_labels:
-            if l not in mapping:
-                current_id += 1
-                mapping[l] = current_id
-            output[-1].append(L([mapping[l]]))
-    return output, current_id
-
-
-def adj_mat2list(adj):
-    return [set(np.where(neighbors == 1)[0]) for neighbors in adj]
+from typing import List, Optional, Tuple
+
+import numpy as np
+
+
+class L:
+    def __init__(self, value: Optional[List[int]] = None):
+        self.value: List[int] = value or []
+
+    def __mul__(self, other):
+        if other == 0:
+            return L()
+        return L(self.value)
+
+    def __add__(self, other):
+        return L(self.value + other.value)
+
+    def __radd__(self, other):
+        if isinstance(other, int):
+            return L(self.value + [other])
+        return L(other.value + self.value)
+
+    def __lt__(self, other):
+        for a, b in zip(self.value, other.value):
+            if type(a) != type(b):
+                return str(type(a)) < str(type(b))
+            if a < b:
+                return True
+            elif a > b:
+                return False
+        return len(self.value) < len(other.value)
+
+    def __lshift__(self, other):
+        return L(other.value + sorted(self.value))
+
+    def __eq__(self, other):
+        return self.value == other.value
+
+    def __repr__(self):
+        return f"L({self.value})"
+
+    def __hash__(self):
+        return hash(str(self.value))
+
+
+def prep_labels(labels: List) -> Tuple[List[List[L]], int]:
+    output = []
+    mapping = {}
+    current_id = -1
+    for i, graph_labels in enumerate(labels):
+        output.append([])
+        for l in graph_labels:
+            if l not in mapping:
+                current_id += 1
+                mapping[l] = current_id
+            output[-1].append(L([mapping[l]]))
+    return output, current_id
+
+
+def adj_mat2list(adj):
+    return [set(np.where(neighbors == 1)[0]) for neighbors in adj]
diff --git a/pywlk/wlk.py → pywlgk/wlk.py b/pywlk/wlk.py → pywlgk/wlk.py
@@ -1,52 +1,52 @@
-from typing import Callable, Optional, List
-
-import numpy as np
-
-from pywlk.utils import L, prep_labels, adj_mat2list
-
-
-def wlk(
-        adjs: List[np.ndarray],
-        labels: List[np.ndarray],
-        k: int = 4,
-        normalize: bool = False,
-        kernel_fn: Optional[Callable] = None,
-):
-    """
-    Main function implementing the logic for the WLK algorithm.
-
-    :param adjs: List of adjacency matrices
-    :param labels: List of labels which can have any type
-    :param k: number of iterations of the WLK algorithm, must be non-negative
-    :param normalize: whether to normalize the kernel matrix or not
-    :param kernel_fn: Kernel function to use. Default is dot product (if kernel_fn=None) as used in the original paper. Alternative functions can be used to compute the kernel matrix. These have to be provided as callables.
-    :return: A symmetric matrix storing the pairwise metric values between graphs. Depending on the kernel function it can be distances or similarities.
-    """
-    sizes = [len(graph_labels) for graph_labels in labels]
-    edges = [adj_mat2list(adj) for adj in adjs]
-    labels, current_id = prep_labels(labels)
-    classes = []
-
-    for _ in range(k):
-        tmp_labels = [[
-            L([graph_labels[i].value[0]] + list(sorted(graph_labels[-sizes[j] + n].value[0] for n in neighbors)))
-            for i, neighbors in enumerate(edges[j])
-        ] for j, graph_labels in enumerate(labels)]
-        mapping = {}
-        for graph_labels in tmp_labels:
-            for l in graph_labels:
-                if l not in mapping:
-                    current_id += 1
-                    x = L([current_id])
-                    mapping[l] = x
-        for i, tmp_graph_labels in enumerate(tmp_labels):
-            labels[i] += [mapping[label] for label in tmp_graph_labels]
-
-    classes += [L([c]) for c in range(current_id + 1)]
-    tmp = np.eye(len(classes), dtype=int)
-    mapping = {val: tmp[i] for i, val in enumerate(classes)}
-    subtree_k = np.array([np.array([mapping[label] for label in graph_labels]).sum(axis=0) for graph_labels in labels])
-    matrix = np.dot(subtree_k, subtree_k.T) if not kernel_fn else kernel_fn(subtree_k)
-    if not normalize:
-        return matrix
-    return np.nan_to_num(np.divide(matrix, np.sqrt(np.outer(np.diagonal(matrix), np.diagonal(matrix)))))
+from typing import Callable, Optional, List
+
+import numpy as np
+
+from pywlgk.utils import L, prep_labels, adj_mat2list
+
+
+def wlk(
+        adjs: List[np.ndarray],
+        labels: List[np.ndarray],
+        k: int = 4,
+        normalize: bool = False,
+        kernel_fn: Optional[Callable] = None,
+):
+    """
+    Main function implementing the logic for the WLK algorithm.
+
+    :param adjs: List of adjacency matrices
+    :param labels: List of labels which can have any type
+    :param k: number of iterations of the WLK algorithm, must be non-negative
+    :param normalize: whether to normalize the kernel matrix or not
+    :param kernel_fn: Kernel function to use. Default is dot product (if kernel_fn=None) as used in the original paper. Alternative functions can be used to compute the kernel matrix. These have to be provided as callables.
+    :return: A symmetric matrix storing the pairwise metric values between graphs. Depending on the kernel function it can be distances or similarities.
+    """
+    sizes = [len(graph_labels) for graph_labels in labels]
+    edges = [adj_mat2list(adj) for adj in adjs]
+    labels, current_id = prep_labels(labels)
+    classes = []
+
+    for _ in range(k):
+        tmp_labels = [[
+            L([graph_labels[i].value[0]] + list(sorted(graph_labels[-sizes[j] + n].value[0] for n in neighbors)))
+            for i, neighbors in enumerate(edges[j])
+        ] for j, graph_labels in enumerate(labels)]
+        mapping = {}
+        for graph_labels in tmp_labels:
+            for l in graph_labels:
+                if l not in mapping:
+                    current_id += 1
+                    x = L([current_id])
+                    mapping[l] = x
+        for i, tmp_graph_labels in enumerate(tmp_labels):
+            labels[i] += [mapping[label] for label in tmp_graph_labels]
+
+    classes += [L([c]) for c in range(current_id + 1)]
+    tmp = np.eye(len(classes), dtype=int)
+    mapping = {val: tmp[i] for i, val in enumerate(classes)}
+    subtree_k = np.array([np.array([mapping[label] for label in graph_labels]).sum(axis=0) for graph_labels in labels])
+    matrix = np.dot(subtree_k, subtree_k.T) if not kernel_fn else kernel_fn(subtree_k)
+    if not normalize:
+        return matrix
+    return np.nan_to_num(np.divide(matrix, np.sqrt(np.outer(np.diagonal(matrix), np.diagonal(matrix)))))
diff --git a/recipe/meta.yaml b/recipe/meta.yaml
@@ -1,5 +1,5 @@
 package:
-  name: "pywlk"
+  name: "pywlgk"
   version: "1.0.0"
 
 source:

diff --git a/setup.py b/setup.py
@@ -4,7 +4,7 @@
     long_description = desc_file.read()
 
 setup(
-    name="PyWLK",
+    name="PyWLGK",
     version="1.0.0",
     description="Python package computing Weisfeiler-Lehman graph kernels",
     long_description=long_description,
@@ -21,12 +21,12 @@
         "Programming Language :: Python :: 3.12",
         "Intended Audience :: Science/Research",
         "Natural Language :: English",
-        "Topic :: Scientific/Engineering :: Computer-Science",
+        "Topic :: Scientific/Engineering",
     ],
     packages=find_packages(),
     include_package_data=False,
     install_requires=[],
     package_data={},
     python_requires=">=3.8, <4.0.0",
-    keywords="bioinformatics, computer-science, graph-kernels, machine-learning, python, wlk, weisfeiler-lehman-kernel",
+    keywords="bioinformatics, computer-science, graph-kernels, machine-learning, python, wlgk, weisfeiler-lehman-graph-kernel",
 )