tests passing for dask backend, improved coverage

MKLab-ITI · Jun 8, 2024 · ce116ff · ce116ff
1 parent 7b71884
commit ce116ff
Show file tree

Hide file tree

Showing 63 changed files with 368 additions and 319 deletions.
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
@@ -44,8 +44,17 @@ jobs:
           pytest -x --cov=pygrank --cov-report=xml tests/test_autorefs.py tests/test_core.py tests/test_measures.py tests/test_filters.py tests/test_autotune.py tests/test_filter_optimization.py tests/test_gnn.py tests/test_postprocessing.py tests/test_benchmarks.py tests/test_fairness.py tests/test_preprocessor.py
       - name: Generate coverage badge
         run: coverage-badge -o coverage.svg -f
-      - name: Upload coverage badge
-        uses: actions/upload-artifact@v4
-        with:
-          name: coverage-badge
-          path: coverage.svg
+      - name: Commit coverage badge
+        if: ${{ matrix.python-version == '3.11' }}
+        run: |
+          git config --global user.name 'github-actions'
+          git config --global user.email 'github-actions@github.com'
+          git add coverage.svg
+          if [ -n "$(git status --porcelain)" ]; then
+            git commit -m 'Update coverage badge'
+            git push
+          else
+            echo "No changes to commit"
+          fi
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/docgenerator.py b/docgenerator.py
@@ -87,10 +87,10 @@ def combine_attributes(text, descriptions):
             continue
         if line.strip().startswith("Example"):
             if hasexample:
-                line = "E"+line.strip()[1:]
+                line = "E" + line.strip()[1:]
             else:
                 hasexample = True
-                ret += "\n<b class=\"parameters\">Example</b>\n"
+                ret += '\n<b class="parameters">Example</b>\n'
                 continue
         if not line.startswith("* "):
             if in_attributes:
@@ -101,12 +101,13 @@ def combine_attributes(text, descriptions):
             in_attributes = False
         if line == "Attributes: " or line == "Args: ":
             in_attributes = True
-            line = "<br><b class=\"parameters\">Parameters</b>\n"
-        ret += line + "\n"
+            line = '<br><b class="parameters">Parameters</b>\n'
+        if line not in ret:
+            ret += line + "\n"
     if in_attributes:
         for desc in descriptions:
             to_add = extract_attributes(desc)
-            if to_add not in ret:  # handles case of inherited constructors
+            if to_add.strip() not in ret:  # handles case of inherited constructors
                 ret += to_add
 
     return ret
@@ -115,9 +116,13 @@ def combine_attributes(text, descriptions):
 def base_description(obj, abstract):
     extends = [cls.__name__ for cls in inspect.getmro(obj)][1]
     class_text = (
-        f"\n## <span class=\"component\">{obj.__name__}</span>\n"
-        + ("*This is an abstract class*" if abstract else f"<b class=\"parameters\">Extends</b><br> *{extends}*")
-        + "<br><b class=\"parameters\">About</b><br>"
+        f'\n## <span class="component">{obj.__name__}</span>\n'
+        + (
+            "*This is an abstract class*"
+            if abstract
+            else f'<b class="parameters">Extends</b><br> *{extends}*'
+        )
+        + '<br><b class="parameters">About</b><br>'
         + format(obj.__doc__)[:-1]
     )
     for name, method in inspect.getmembers(obj):
@@ -139,8 +144,8 @@ def generate_filter_docs():
     base_descriptions = dict()
     abstract = dict()
 
-    base_descriptions[pygrank.algorithms.abstract.GraphFilter] = (
-        base_description(pygrank.algorithms.abstract.GraphFilter, True)
+    base_descriptions[pygrank.algorithms.abstract.GraphFilter] = base_description(
+        pygrank.algorithms.abstract.GraphFilter, True
     )
     abstract[pygrank.algorithms.abstract.GraphFilter] = True
     for name, obj in inspect.getmembers(sys.modules["pygrank.algorithms"]):

diff --git a/docs/advanced/backends.md b/docs/advanced/backends.md
@@ -84,10 +84,11 @@ distributed works), and keyword arguments to pass to the instantiated dask clien
     Under development.
 
 
-## <span class="component">mkl</span>
-<b class="parameters">About</b><br>Running computations on the Intel® Distribution for Python.
+## <span class="component">sparse_dot_mkl</span>
+<b class="parameters">About</b><br>Running computations on parallelized scipy multiplications.
+If you use Intel's Python distribution, this is only marginally faster than `"numpy"`.
 <br>
-<b class="parameters">Installation</b><br> --- <br>
+<b class="parameters">Installation</b><br> `pip install sparse_dot_mkl` <br>
 <b class="parameters">Links</b><br> [mkl](https://www.intel.com/content/www/us/en/developer/tools/oneapi/distribution-for-python.html)
 
 !!! warning

diff --git a/docs/advanced/fairness.md b/docs/advanced/fairness.md
@@ -1,4 +1,34 @@
 # Fairness
 
-Fairness-aware postprocessors always require an additional keyword argument 
-`sensitive=...` to be passed to their rank or transform methods. 
+Fairness-aware graph filters and
+postprocessors are provided. Below
+we provide two example node ranking algorithms
+that make personalized PageRank fairer by
+supplying it with a postprocessor or straight
+up using a variation.
+
+```python
+import pygrank as pg
+
+ppro = pg.PageRank(alpha=0.9, max_iters=1000, tol=1.E-9) >> pg.AdHocFairness("O")
+lfpr = pg.LFPR(alpha=0.9, max_iters=1000, tol=1.E-9)
+```
+
+When calling fairness-aware methods, add a *sensitive* keyword
+argument that holds information that together with a graph
+can construct a graph signal.
+
+Several measures also provide fairness assessment.
+
+```python
+_, graph, groups = next(pg.load_datasets_multiple_communities(['citeseer'], max_group_number=2, directed=False))
+measure = pg.pRule(groups[1])
+
+pg.benchmark_print_line("ppro", ppro(graph, groups[0], sensitive=groups[1]) >> measure)
+pg.benchmark_print_line("lfpr", lfpr(graph, groups[0], sensitive=groups[1]) >> measure)
+```
+
+!!! info
+    For multidimensional fairness evaluation of node
+    ranking algorithms, prefer using the
+    `fairbench` library.
diff --git a/docs/advanced/graph_preprocessing.md b/docs/advanced/graph_preprocessing.md
@@ -16,20 +16,24 @@ can have the following values:
 | `"auto"`      | The above-described default behavior.                                                                                                                                                           |
 | `"col"`       | Column-wise normalization.                                                                                                                                                                      |
 | `"symmetric"` | Symmetric normalization.                                                                                                                                                                        |
+| `"laplacian"` | Generates the Laplacian of the graph.                                                                                                                                                           |                                                                                                                                                             |
+| `"salsa"`     | The row-wise normalization employed by the salsa algorithm.                                                                                                                                     | 
 | `"none"`      | (A string with text "none".) Avoids any normalization, for example, because edge weights already hold the normalization.                                                                        |
 | callable      | A callable applied to a `scipy` sparse adjacency matrix of the "numpy" backend (irrespective of the actually active backend). When applied, it ignores the preprocessor's *reduction* argument. |
 
-Additionally, a *renormalization* argument may be provided
+Additionally, a *transform_adjacency* method can be provided
+to modify the final adjacency matrix after all computations conclude.
+This method runs within the currently active backend.
+By default this is a tautology `lambda x: x`.
+To create smoother versions of adjacency matrices,
+*renormalization* argument may be provided
 to add a multiple of the unit matrix to the adjacency matrix,
 a concept called the renormalization trick.
-This by default 0, but can help shrink the spectrum.
-Furthermore, a *transform_adjacency* method can be provided
-to modify the final adjacency matrix. For example,
-you can use these arguments to use the Laplacian matrix
-instead of the adjacency for an algorithm class:
+This by default 0, but can help shrink the spectrum.For example,
+you can create a strongly local version of the adjacency matrix like this:
 
 ```python
-alg = Algorithm(transform_adjacency=lambda x:-x, renormalization=-1)
+alg = Algorithm(renormalization=2)
 ```
 
 
@@ -51,13 +55,13 @@ normalization if it points at a different memory location.
 
 !!! warning
     Do not alter graph objects after passing them to
-    `rank(...)` methods of algorithms with
-    `assume_immutability=True` for the first time. If altering the
+    calls of node ranking algorithms for the first time
+    if you set `assume_immutability=True`. If altering the
     graph is necessary midway through your code, create a copy
-    instance with one of *networkx*'s in-built methods and
+    instance, for example with one of *networkx*'s in-built methods and
     edit that one.
 
-For example, hashing the outcome of graph normalization to
+Hashing the outcome of graph normalization to
 speed up multiple calls to the same graph can be achieved
 as per the following code:
 

diff --git a/docs/generated/graph_filters.md b/docs/generated/graph_filters.md
@@ -31,14 +31,8 @@ from pygrank.algorithms import AbsorbingWalks
 algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 Example (same outcome, explicit absorption rate definition):
-```python 
-from pygrank.algorithms import AbsorbingWalks 
-algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) # tol passed to the ConvergenceManager 
-graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}, absorption={v: 1 for v in graph}) 
-```
 ## <span class="component">DijkstraRank</span>
 <b class="parameters">Extends</b><br> *RecursiveGraphFilter*<br><b class="parameters">About</b><br>
 A ranking algorithm that assigns node ranks loosely increasing with the minimum distance from a seed.
@@ -70,7 +64,6 @@ Defines a graph filter via its hop weight parameters. The constructor initialize
 ```python 
 from pygrank import GenericGraphFilter 
 algorithm = GenericGraphFilter([0.5, 0.25, 0.125], tol=1.E-9) # tol passed to ConvergenceManager 
-```
 ## <span class="component">HeatKernel</span>
 <b class="parameters">Extends</b><br> *ClosedFormGraphFilter*<br><b class="parameters">About</b><br>
 Heat kernel filter. The constructor initializes filter parameters. 
@@ -101,7 +94,6 @@ from pygrank.algorithms import HeatKernel
 algorithm = HeatKernel(t=3, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 ## <span class="component">ImpulseGraphFilter</span>
 <b class="parameters">Extends</b><br> *GraphFilter*<br><b class="parameters">About</b><br>
 Defines a graph filter with a specific vector of impulse response parameters. The constructor initializes the graph filter. 
@@ -127,7 +119,6 @@ Defines a graph filter with a specific vector of impulse response parameters. Th
 ```python 
 from pygrank import GenericGraphFilter 
 algorithm = ImpulseGraphFilter([0.5, 0.5, 0.5], tol=None)  # tol=None runs all iterations 
-```
 ## <span class="component">LowPassRecursiveGraphFilter</span>
 <b class="parameters">Extends</b><br> *GraphFilter*<br><b class="parameters">About</b><br>
 Defines a low-pass graph filter with specific yet changing recursive terms. The constructor initializes the graph filter. 
@@ -153,7 +144,6 @@ Defines a low-pass graph filter with specific yet changing recursive terms. The
 ```python 
 from pygrank import LowPassRecursiveGraphFilter 
 algorithm = LowPassRecursiveGraphFilter([0.9]*10, tol=None)  # tol=None runs all iterations 
-```
 ## <span class="component">PageRank</span>
 <b class="parameters">Extends</b><br> *RecursiveGraphFilter*<br><b class="parameters">About</b><br>
 A Personalized PageRank power method algorithm. The constructor initializes the PageRank scheme parameters. 
@@ -182,7 +172,6 @@ import pygrank as pg
 algorithm = pg.PageRank(alpha=0.99, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 ## <span class="component">PageRankClosed</span>
 <b class="parameters">Extends</b><br> *ClosedFormGraphFilter*<br><b class="parameters">About</b><br>
 PageRank closed filter. The constructor initializes the PageRank scheme parameters. 
@@ -213,7 +202,6 @@ import pygrank as pg
 algorithm = pg.PageRankClosed(alpha=0.99, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 ## <span class="component">SymmetricAbsorbingRandomWalks</span>
 <b class="parameters">Extends</b><br> *RecursiveGraphFilter*<br><b class="parameters">About</b><br>
 Implementation of partial absorbing random walks for *Lambda = (1-alpha)/alpha diag(absorption vector)*. The constructor initializes the symmetric random walk strategy for appropriate parameter values. 
@@ -242,14 +230,8 @@ from pygrank.algorithms import AbsorbingWalks
 algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 Example (same outcome, explicit absorption rate definition):
-```python 
-from pygrank.algorithms import AbsorbingWalks 
-algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) 
-graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}, absorption={v: 1 for v in graph}) 
-```
 ## <span class="component">GenericGraphFilter</span>
 <b class="parameters">Extends</b><br> *ClosedFormGraphFilter*<br><b class="parameters">About</b><br>
 Defines a graph filter via its hop weight parameters. The constructor initializes the graph filter. 
@@ -278,7 +260,6 @@ Defines a graph filter via its hop weight parameters. The constructor initialize
 ```python 
 from pygrank import GenericGraphFilter 
 algorithm = GenericGraphFilter([0.5, 0.25, 0.125], tol=1.E-9) # tol passed to ConvergenceManager 
-```
 ## <span class="component">HeatKernel</span>
 <b class="parameters">Extends</b><br> *ClosedFormGraphFilter*<br><b class="parameters">About</b><br>
 Heat kernel filter. The constructor initializes filter parameters. 
@@ -309,7 +290,6 @@ from pygrank.algorithms import HeatKernel
 algorithm = HeatKernel(t=3, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 ## <span class="component">PageRankClosed</span>
 <b class="parameters">Extends</b><br> *ClosedFormGraphFilter*<br><b class="parameters">About</b><br>
 PageRank closed filter. The constructor initializes the PageRank scheme parameters. 
@@ -340,7 +320,6 @@ import pygrank as pg
 algorithm = pg.PageRankClosed(alpha=0.99, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 ## <span class="component">AbsorbingWalks</span>
 <b class="parameters">Extends</b><br> *RecursiveGraphFilter*<br><b class="parameters">About</b><br>
 Implementation of partial absorbing random walks for Lambda = (1-alpha)/alpha diag(absorption vector). 
@@ -371,14 +350,8 @@ from pygrank.algorithms import AbsorbingWalks
 algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 Example (same outcome, explicit absorption rate definition):
-```python 
-from pygrank.algorithms import AbsorbingWalks 
-algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) # tol passed to the ConvergenceManager 
-graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}, absorption={v: 1 for v in graph}) 
-```
 ## <span class="component">DijkstraRank</span>
 <b class="parameters">Extends</b><br> *RecursiveGraphFilter*<br><b class="parameters">About</b><br>
 A ranking algorithm that assigns node ranks loosely increasing with the minimum distance from a seed.
@@ -410,7 +383,6 @@ import pygrank as pg
 algorithm = pg.PageRank(alpha=0.99, tol=1.E-9) # tol passed to the ConvergenceManager 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 ## <span class="component">SymmetricAbsorbingRandomWalks</span>
 <b class="parameters">Extends</b><br> *RecursiveGraphFilter*<br><b class="parameters">About</b><br>
 Implementation of partial absorbing random walks for *Lambda = (1-alpha)/alpha diag(absorption vector)*. The constructor initializes the symmetric random walk strategy for appropriate parameter values. 
@@ -439,11 +411,5 @@ from pygrank.algorithms import AbsorbingWalks
 algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) 
 graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}) 
-```
 Example (same outcome, explicit absorption rate definition):
-```python 
-from pygrank.algorithms import AbsorbingWalks 
-algorithm = AbsorbingWalks(1-1.E-6, tol=1.E-9) 
-graph, seed_nodes = ... 
 ranks = algorithm(graph, {v: 1 for v in seed_nodes}, absorption={v: 1 for v in graph}) 
-```