Skip to content

Commit

Permalink
Conversion to PyWLGK
Browse files Browse the repository at this point in the history
  • Loading branch information
Old-Shatterhand committed May 12, 2024
1 parent 1a7ed22 commit 5e0ff24
Show file tree
Hide file tree
Showing 13 changed files with 140 additions and 140 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ jobs:
shell: bash -l {0}
run: |
cd tests
pytest --cov=../pywlk/ --cov-report=xml
pytest --cov=../pywlgk/ --cov-report=xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v3
Expand Down
4 changes: 2 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
pywlk/dev/
pywlgk/dev/
docs/build/
*.pclprof
.idea/
pywlk/__pycache__/
pywlgk/__pycache__/
tests/__pycache__/
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,13 @@ This package is an alternative to FastWLK, GraKel, and other implementations of
By design, PyWLK is installable from PyPI and Anaconda. To install the package with `pip`, run the following command:

```bash
pip install pywlk
pip install pywlgk
```

or with `conda` (alternatively with `mamba` by replacing `conda` with `mamba`

```bash
conda install pywlk
conda install pywlgk
```

## Usage
Expand All @@ -23,7 +23,7 @@ PyWLK is designed to be easy to use. The following example shows how to use PyWL
two graphs.

```python
from pywlk import wlk
from pywlgk import wlk
import numpy as np

adjs = np.random.randint(0, 1, size=(2, 10, 10))
Expand Down
4 changes: 2 additions & 2 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

sys.path.insert(0, os.path.abspath("./.."))

import pywlk
import pywlgk

extensions = [
"sphinx.ext.autodoc",
Expand Down Expand Up @@ -53,7 +53,7 @@
"navigation_depth": 2,
}

rst_context = {"pywlk": pywlk}
rst_context = {"pywlgk": pywlgk}

add_module_names = False
fail_on_warning = True
2 changes: 1 addition & 1 deletion docs/examples/usage.rst
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Code Documentation
==================

.. autofunction:: pywlk.wlk
.. autofunction:: pywlgk.wlk
6 changes: 3 additions & 3 deletions docs/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,13 @@ PyWLK can be installed from PyPI using pip:

.. code-block:: bash
pip install pywlk
pip install pywlgk
or from Anaconda using conda:

.. code-block:: bash
conda install -c conda-forge pywlk
conda install -c conda-forge pywlgk
Usage
-----
Expand All @@ -27,7 +27,7 @@ After installation, PyWLK can be used as follows:

.. code-block:: python
from pywlk import wlk
from pywlgk import wlk
import numpy as np
adjs = np.random.randint(0, 1, size=(2, 10, 10))
Expand Down
2 changes: 1 addition & 1 deletion examples/Random_Graph.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
"execution_count": 1,
"outputs": [],
"source": [
"from pywlk import wlk\n",
"from pywlgk import wlk\n",
"import numpy as np\n",
"import networkx as nx\n",
"import matplotlib.pyplot as plt\n",
Expand Down
2 changes: 1 addition & 1 deletion pywlk/__init__.py → pywlgk/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from .wlk import wlk
from .wlk import wlk
122 changes: 61 additions & 61 deletions pywlk/utils.py → pywlgk/utils.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,61 @@
from typing import List, Optional, Tuple

import numpy as np


class L:
def __init__(self, value: Optional[List[int]] = None):
self.value: List[int] = value or []

def __mul__(self, other):
if other == 0:
return L()
return L(self.value)

def __add__(self, other):
return L(self.value + other.value)

def __radd__(self, other):
if isinstance(other, int):
return L(self.value + [other])
return L(other.value + self.value)

def __lt__(self, other):
for a, b in zip(self.value, other.value):
if type(a) != type(b):
return str(type(a)) < str(type(b))
if a < b:
return True
elif a > b:
return False
return len(self.value) < len(other.value)

def __lshift__(self, other):
return L(other.value + sorted(self.value))

def __eq__(self, other):
return self.value == other.value

def __repr__(self):
return f"L({self.value})"

def __hash__(self):
return hash(str(self.value))


def prep_labels(labels: List) -> Tuple[List[List[L]], int]:
output = []
mapping = {}
current_id = -1
for i, graph_labels in enumerate(labels):
output.append([])
for l in graph_labels:
if l not in mapping:
current_id += 1
mapping[l] = current_id
output[-1].append(L([mapping[l]]))
return output, current_id


def adj_mat2list(adj):
return [set(np.where(neighbors == 1)[0]) for neighbors in adj]
from typing import List, Optional, Tuple

import numpy as np


class L:
def __init__(self, value: Optional[List[int]] = None):
self.value: List[int] = value or []

def __mul__(self, other):
if other == 0:
return L()
return L(self.value)

def __add__(self, other):
return L(self.value + other.value)

def __radd__(self, other):
if isinstance(other, int):
return L(self.value + [other])
return L(other.value + self.value)

def __lt__(self, other):
for a, b in zip(self.value, other.value):
if type(a) != type(b):
return str(type(a)) < str(type(b))
if a < b:
return True
elif a > b:
return False
return len(self.value) < len(other.value)

def __lshift__(self, other):
return L(other.value + sorted(self.value))

def __eq__(self, other):
return self.value == other.value

def __repr__(self):
return f"L({self.value})"

def __hash__(self):
return hash(str(self.value))


def prep_labels(labels: List) -> Tuple[List[List[L]], int]:
output = []
mapping = {}
current_id = -1
for i, graph_labels in enumerate(labels):
output.append([])
for l in graph_labels:
if l not in mapping:
current_id += 1
mapping[l] = current_id
output[-1].append(L([mapping[l]]))
return output, current_id


def adj_mat2list(adj):
return [set(np.where(neighbors == 1)[0]) for neighbors in adj]
104 changes: 52 additions & 52 deletions pywlk/wlk.py → pywlgk/wlk.py
Original file line number Diff line number Diff line change
@@ -1,52 +1,52 @@
from typing import Callable, Optional, List

import numpy as np

from pywlk.utils import L, prep_labels, adj_mat2list


def wlk(
adjs: List[np.ndarray],
labels: List[np.ndarray],
k: int = 4,
normalize: bool = False,
kernel_fn: Optional[Callable] = None,
):
"""
Main function implementing the logic for the WLK algorithm.
:param adjs: List of adjacency matrices
:param labels: List of labels which can have any type
:param k: number of iterations of the WLK algorithm, must be non-negative
:param normalize: whether to normalize the kernel matrix or not
:param kernel_fn: Kernel function to use. Default is dot product (if kernel_fn=None) as used in the original paper. Alternative functions can be used to compute the kernel matrix. These have to be provided as callables.
:return: A symmetric matrix storing the pairwise metric values between graphs. Depending on the kernel function it can be distances or similarities.
"""
sizes = [len(graph_labels) for graph_labels in labels]
edges = [adj_mat2list(adj) for adj in adjs]
labels, current_id = prep_labels(labels)
classes = []

for _ in range(k):
tmp_labels = [[
L([graph_labels[i].value[0]] + list(sorted(graph_labels[-sizes[j] + n].value[0] for n in neighbors)))
for i, neighbors in enumerate(edges[j])
] for j, graph_labels in enumerate(labels)]
mapping = {}
for graph_labels in tmp_labels:
for l in graph_labels:
if l not in mapping:
current_id += 1
x = L([current_id])
mapping[l] = x
for i, tmp_graph_labels in enumerate(tmp_labels):
labels[i] += [mapping[label] for label in tmp_graph_labels]

classes += [L([c]) for c in range(current_id + 1)]
tmp = np.eye(len(classes), dtype=int)
mapping = {val: tmp[i] for i, val in enumerate(classes)}
subtree_k = np.array([np.array([mapping[label] for label in graph_labels]).sum(axis=0) for graph_labels in labels])
matrix = np.dot(subtree_k, subtree_k.T) if not kernel_fn else kernel_fn(subtree_k)
if not normalize:
return matrix
return np.nan_to_num(np.divide(matrix, np.sqrt(np.outer(np.diagonal(matrix), np.diagonal(matrix)))))
from typing import Callable, Optional, List

import numpy as np

from pywlgk.utils import L, prep_labels, adj_mat2list


def wlk(
adjs: List[np.ndarray],
labels: List[np.ndarray],
k: int = 4,
normalize: bool = False,
kernel_fn: Optional[Callable] = None,
):
"""
Main function implementing the logic for the WLK algorithm.
:param adjs: List of adjacency matrices
:param labels: List of labels which can have any type
:param k: number of iterations of the WLK algorithm, must be non-negative
:param normalize: whether to normalize the kernel matrix or not
:param kernel_fn: Kernel function to use. Default is dot product (if kernel_fn=None) as used in the original paper. Alternative functions can be used to compute the kernel matrix. These have to be provided as callables.
:return: A symmetric matrix storing the pairwise metric values between graphs. Depending on the kernel function it can be distances or similarities.
"""
sizes = [len(graph_labels) for graph_labels in labels]
edges = [adj_mat2list(adj) for adj in adjs]
labels, current_id = prep_labels(labels)
classes = []

for _ in range(k):
tmp_labels = [[
L([graph_labels[i].value[0]] + list(sorted(graph_labels[-sizes[j] + n].value[0] for n in neighbors)))
for i, neighbors in enumerate(edges[j])
] for j, graph_labels in enumerate(labels)]
mapping = {}
for graph_labels in tmp_labels:
for l in graph_labels:
if l not in mapping:
current_id += 1
x = L([current_id])
mapping[l] = x
for i, tmp_graph_labels in enumerate(tmp_labels):
labels[i] += [mapping[label] for label in tmp_graph_labels]

classes += [L([c]) for c in range(current_id + 1)]
tmp = np.eye(len(classes), dtype=int)
mapping = {val: tmp[i] for i, val in enumerate(classes)}
subtree_k = np.array([np.array([mapping[label] for label in graph_labels]).sum(axis=0) for graph_labels in labels])
matrix = np.dot(subtree_k, subtree_k.T) if not kernel_fn else kernel_fn(subtree_k)
if not normalize:
return matrix
return np.nan_to_num(np.divide(matrix, np.sqrt(np.outer(np.diagonal(matrix), np.diagonal(matrix)))))
2 changes: 1 addition & 1 deletion recipe/meta.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
package:
name: "pywlk"
name: "pywlgk"
version: "1.0.0"

source:
Expand Down
6 changes: 3 additions & 3 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
long_description = desc_file.read()

setup(
name="PyWLK",
name="PyWLGK",
version="1.0.0",
description="Python package computing Weisfeiler-Lehman graph kernels",
long_description=long_description,
Expand All @@ -21,12 +21,12 @@
"Programming Language :: Python :: 3.12",
"Intended Audience :: Science/Research",
"Natural Language :: English",
"Topic :: Scientific/Engineering :: Computer-Science",
"Topic :: Scientific/Engineering",
],
packages=find_packages(),
include_package_data=False,
install_requires=[],
package_data={},
python_requires=">=3.8, <4.0.0",
keywords="bioinformatics, computer-science, graph-kernels, machine-learning, python, wlk, weisfeiler-lehman-kernel",
keywords="bioinformatics, computer-science, graph-kernels, machine-learning, python, wlgk, weisfeiler-lehman-graph-kernel",
)
Loading

0 comments on commit 5e0ff24

Please sign in to comment.