# NAC coloring search

In this notebook we provide utils to run benchmarks and experiment with our code.

In the first section we start with utility functions, in the second part we load/generate benchmark data. After we run individual benchmarks on selected graph classes with selected algorithms. The algorithms are described in that section.

```bash
tensorboard --logdir benchmarks/logs/nac
```

In [None]:
from typing import *
from dataclasses import dataclass
from collections import defaultdict
import random
from random import Random
from enum import Enum

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline
from matplotlib.backends import backend_agg
from matplotlib.figure import Figure

import numpy as np
import pandas as pd
import networkx as nx
import os
import time
import datetime
import signal
import itertools
import base64

from tqdm import tqdm

import nac as nac
import nac.util
from benchmarks import dataset as dataset

seed=42
TEST=False

OUTPUT_DIR = os.path.join("benchmarks", "runs")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Preparation

In [None]:
# https://stackoverflow.com/a/75898999
from typing import Callable, TypeVar, ParamSpec

P = ParamSpec("P")
T = TypeVar("T")

def copy_doc(wrapper: Callable[P, T]):
    """An implementation of functools.wraps."""

    def decorator(func: Callable) -> Callable[P, T]:
        func.__doc__ = wrapper.__doc__
        return func

    return decorator

In [None]:
@copy_doc(plt.figure)
def figure(num: Any = 1, *args, **kwargs) -> Figure:
    """Creates a figure that is independent on the global plt state"""
    fig = Figure(*args, **kwargs)
    def show():
        manager = backend_agg.new_figure_manager_given_figure(num, fig)
        display(
            manager.canvas.figure,
            metadata=backend_inline._fetch_figure_metadata(manager.canvas.figure),
        )
        manager.destroy()
    fig.show = show
    return fig

# Loading locally stored graphs

In [None]:
class Graphs:
    laman = list(dataset.load_laman_graphs())
    laman_deg_3_plus = list(dataset.load_laman_degree_3_plus())
    no_3_nor_4_cycles = dataset.load_no_3_nor_4_cycle_graphs()
    sparse_graphs = dataset.generate_sparse_graphs(30, 40)

# Running benchmarks

Target columns are
- `graph` - graph6 encoded graph
- `dataset` - class of the graph, `laman`, `laman_deg_3_plus`, `no_3_nor_4_cycles`, `sparse`
- `mode` - search mode: [`single`, `all`]
- `vertex_no` - the number of vertices of the graph
- `edge_no` - the number of edges of the graph
- `triangle_components_no` - the number of triangle components of the graph
- `monochromatic_classes_no` - the number of triangle components of the graph
- `relabel` - relabel strategy
- `split` - splitting strategy
- `merging` - merging strategy
- `subgraph_size` - the initial size of subgraphs in components
- `nac_coloring_no` - the number of NAC colorings of the graph
- `nac_mean_time` - the time required to find all the colorings with the given strategy in miliseconds
- `nac_rounds` - number of rounds used to run the chat

In [None]:
COLUMNS: List[str] =[
    "graph",
    "dataset",
    "mode",
    "vertex_no",
    "edge_no",
    "triangle_components_no",
    "monochromatic_classes_no",
    "relabel",
    "split",
    "merging",
    "subgraph_size",
    "nac_coloring_no",
    "nac_mean_time",
    "nac_rounds",
]
MODE_SINGLE = 'single'
MODE_ALL = 'all'

@dataclass
class MeasurementResult:
    graph: str
    dataset: str
    mode: str
    vertex_no: int
    edge_no: int
    triangle_components_no: int
    monochromatic_classes_no: int
    relabel: str
    split: str
    merging: str
    subgraph_size: int
    nac_coloring_no: int
    nac_mean_time: int
    nac_rounds: int

    def to_list(self) -> List:
        return [
            self.graph,
            self.dataset,
            self.mode,
            self.vertex_no,
            self.edge_no,
            self.triangle_components_no,
            self.monochromatic_classes_no,
            self.relabel,
            self.split,
            self.merging,
            self.subgraph_size,
            self.nac_coloring_no,
            self.nac_mean_time,
            self.nac_rounds,
        ]

In [None]:
class Promissing:
    RELABELING = [
        "none",
        "random",
        "bfs",
    ]
    SPLITTING = [
        "none",
        "neighbors",
        "neighbors_degree",
    ]
    MERGING_OFFLINE = [
        "linear",
        "log",
        "score",
        "shared_vertices"
    ]
    MERGING_ONLINE = [
        "linear",
        "log",
        "shared_vertices"
    ]
    SIZES = [6]

    strategies_offline = list(itertools.product(
        RELABELING, SPLITTING, MERGING_OFFLINE, SIZES,
    ))
    strategies_online = list(itertools.product(
        RELABELING, SPLITTING, MERGING_ONLINE, SIZES,
    ))

In [None]:

def graph_id(graph: nx.Graph) -> str:
    return base64.standard_b64encode(nx.graph6.to_graph6_bytes(graph, header=False).strip()).decode()

def graph_from_id(id: str) -> nx.Graph:
    return nac.util.NiceGraph(nx.graph6.from_graph6_bytes(base64.standard_b64decode(id)))

In [None]:
def new_DataFrame(data: List[MeasurementResult] = []) -> pd.DataFrame:
    return pd.DataFrame(
        [x.to_list() for x in data],
        columns=COLUMNS,
    )

In [None]:
_BENCH_FILE_START = "bench_res"
def load_records(file_name: str | None = None, dir = OUTPUT_DIR) -> pd.DataFrame:
    """
    Loads the results from the last run or the run specified by `file_name` in the `dir` given.
    """
    if file_name == None:
        def filter_cond(name: str) -> bool:
            return name.startswith(_BENCH_FILE_START) and name.endswith(".csv")
        data = sorted(filter(filter_cond, os.listdir(dir)), reverse=True)

        if len(data) == 0:
            print("No file with results found!")
            return new_DataFrame()
        file_name = data[0]
        print(f"Found file: {file_name}")

    path = os.path.join(dir, file_name)
    return pd.read_csv(path)

def store_results(
    df: pd.DataFrame,
    file_name: str | None = None,
    dir = OUTPUT_DIR,
) -> str:
    """
    Stores results in the given file
    """
    if file_name is None:
        current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        file_name = f"{_BENCH_FILE_START}_{current_time}.csv"
    path = os.path.join(dir, file_name)
    df.to_csv(path, header=True, index=False)
    return file_name

def update_stored_data(dfs: List[pd.DataFrame]) -> pd.DataFrame:
    df = load_records()
    display(df)
    df = pd.concat((df, pd.concat(dfs)))
    df = df.drop_duplicates(
        subset=["graph", "mode", "dataset"],
        keep='last',
    )
    store_results(df)
    return df

In [None]:
def create_strategy(param: Tuple[str, str, str, int]) -> Tuple[str, str]:
    relabel, split, merge, subgraph = param
    algo_name = "subgraphs-{}-{}-{}-smart".format(
        merge, split, subgraph
    )
    return (relabel, algo_name)

In [None]:
def run_NAC_search_benchmark[T, R](
    function: Callable[[T, int], R],
    param: T,
    rounds: int,
    limit_sec:int|None = 3,
    seed: int|None = 42,
) -> Optional[Tuple[int, R]]:
    """
    Runs the given function multiple times for each parameter
    and measures the run time.
    Returns a dictionary mapping from the param set to mean runtime in miliseconds.
    """
    if seed is None:
        seed = random.randint(0, 2*32)
    assert rounds > 0

    def measure(param: T, seed: int) -> int:
        start = time.time()
        r = function(param, seed)
        end = time.time()
        return (int((end - start) * 1000), r)

    try:
        # signals are not exact, but generally work
        if (limit_sec):
            def timeout_handler(signum, frame):
                raise TimeoutError("Benchmark timeout")
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(limit_sec * rounds)

        rand = Random(seed)
        acu_mean, acu_other = measure(param, rand.randint(0, 2**32))
        for _ in range(1, rounds):
            r1, r2 = measure(param, rand.randint(0, 2**32))
            acu_mean += r1
        acu_mean  // rounds

        if (limit_sec):
            signal.alarm(0)
    except TimeoutError:
        # mean = limit_sec*1000
        return None

    return [acu_mean, acu_other]

In [None]:
def nac_benchmark_core(
    graph: nx.Graph,
    coloring_no_limit: int | None,
    rounds: int,
    strategy: Tuple[str, str],
    seed: int | None = 42,
) -> Tuple[int, int]:
    """
    Runs benchmarks for NAC coloring search
    Returns results grouped by relabel, split, merge and subgraph size strategies
    """
    if coloring_no_limit is None:
        coloring_no_limit = 2**30

    def find_colorings(strategy: Tuple[str, str], seed: int) -> int:
        j = -1
        for j, coloring in zip(
            range(coloring_no_limit),
            nac.NAC_colorings(
                graph=graph,
                algorithm=strategy[1],
                relabel_strategy=strategy[0],
                seed=seed,
            ),
        ): pass
        return j

    mean_time, coloring_no = run_NAC_search_benchmark(
        function=find_colorings,
        param=strategy,
        rounds=rounds,
        seed=seed,
    )
    return (mean_time, coloring_no)

In [None]:
def create_measurement_result(
    graph: nx.Graph,
    dataset_name: str,
    mode: str,
    nac_mean_time: int,
    nac_coloring_no: int,
    nac_rounds: int,
    relabel_strategy: str,
    split_strategy: str,
    merge_strategy: str,
    subgraph_size: int,
) -> MeasurementResult:
    vertex_no = nx.number_of_nodes(graph)
    edge_no = nx.number_of_edges(graph)
    trianlge_classes = len(nac.find_triangle_components(graph=graph, use_triangles_over_component=False)[1])
    monochromatic_classes = len(nac.find_triangle_components(graph=graph, use_triangles_over_component=True)[1])
    return MeasurementResult(
        graph=graph_id(graph),
        dataset=dataset_name,
        mode=mode,
        vertex_no=vertex_no,
        edge_no=edge_no,
        triangle_components_no=trianlge_classes,
        monochromatic_classes_no=monochromatic_classes,
        nac_coloring_no=nac_coloring_no,
        nac_mean_time=nac_mean_time,
        nac_rounds=nac_rounds,
        relabel=relabel_strategy,
        split=split_strategy,
        merging=merge_strategy,
        subgraph_size=subgraph_size,
    )

In [None]:

def measure_for_class(
    dataset_name: str,
    graphs: List[nx.Graph],
    coloring_all_range: Tuple[int, int],
    coloring_single_range: Tuple[int, int],
    rounds:int,
) -> pd.DataFrame:
    dataset_name = dataset_name.replace(" ", "_").lower()
    results: List[MeasurementResult] = []
    for graph in tqdm(graphs):
        for mode_name, strategies, coloring_range, coloring_limit in zip(
            (MODE_ALL, MODE_SINGLE),
            (Promissing.strategies_offline, Promissing.strategies_online),
            (coloring_all_range, coloring_single_range),
            (None, 1),
        ):
            n = graph.number_of_nodes()
            if n < coloring_range[0] or n > coloring_range[1]:
                continue
            for strategy in strategies:
                mean_time, coloring_no = nac_benchmark_core(
                    graph,
                    coloring_no_limit=coloring_limit,
                    rounds=rounds,
                    strategy=create_strategy(strategy),
                )

                relabel, split, merge, subgraph_size = strategy
                res = create_measurement_result(
                    graph=graph,
                    dataset_name=dataset_name,
                    mode=mode_name,
                    nac_mean_time=mean_time,
                    nac_rounds=rounds,
                    nac_coloring_no=coloring_no,
                    relabel_strategy=relabel,
                    split_strategy=split,
                    merge_strategy=merge,
                    subgraph_size=subgraph_size,
                )
                results.append(res)

    df = new_DataFrame(results)
    df = df.sort_values(by="nac_mean_time")
    return df

In [None]:
df_laman = measure_for_class(
    "Laman",
    Graphs.laman,
    (8, 15),
    (8, 100),
    rounds=3,
)
update_stored_data([df_laman])

In [None]:
df_laman_deg_3_plus = measure_for_class(
    "Laman deg 3+",
    Graphs.laman_deg_3_plus,
    (8, 15),
    (8, 100),
    rounds=3
)
update_stored_data([df_laman_deg_3_plus])

In [None]:
df_no_3_nor_4_cycles = measure_for_class(
    "No 3 nor 4 cycles",
    Graphs.no_3_nor_4_cycles,
    (8, 15),
    (8, 100),
    rounds=3
)
update_stored_data([df_no_3_nor_4_cycles])

In [None]:
df_sparse = measure_for_class(
    "Sparse",
    Graphs.sparse_graphs,
    (8, 15),
    (8, 100),
    rounds=3
)
update_stored_data([df_sparse])