# NAC coloring search

In this notebook we provide utils to run benchmarks and experiment with our code.

In the first section we start with utility functions, in the second part we load/generate benchmark data. After we run individual benchmarks on selected graph classes with selected algorithms. The algorithms are described in that section.

```bash
tensorboard --logdir benchmarks/logs/nac
```

In [None]:
from typing import *
from dataclasses import dataclass
from collections import defaultdict
import random
import importlib
from random import Random
from enum import Enum

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline
from matplotlib.backends import backend_agg
from matplotlib.figure import Figure
from matplotlib.ticker import MaxNLocator

import numpy as np
import pandas as pd
import networkx as nx
import os
import time
import datetime
import signal
import itertools
import base64

from tqdm import tqdm

import nac as nac
import nac.util
from benchmarks import dataset
importlib.reload(nac)
importlib.reload(nac.util)
importlib.reload(dataset)

seed=42
TEST=False

OUTPUT_DIR = os.path.join("benchmarks", "runs")
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Preparation

In [None]:
# https://stackoverflow.com/a/75898999
from typing import Callable, TypeVar, ParamSpec

P = ParamSpec("P")
T = TypeVar("T")

def copy_doc(wrapper: Callable[P, T]):
    """An implementation of functools.wraps."""

    def decorator(func: Callable) -> Callable[P, T]:
        func.__doc__ = wrapper.__doc__
        return func

    return decorator

In [None]:
@copy_doc(plt.figure)
def figure(num: Any = 1, *args, **kwargs) -> Figure:
    """Creates a figure that is independent on the global plt state"""
    fig = Figure(*args, **kwargs)
    def show():
        manager = backend_agg.new_figure_manager_given_figure(num, fig)
        display(
            manager.canvas.figure,
            metadata=backend_inline._fetch_figure_metadata(manager.canvas.figure),
        )
        manager.destroy()
    fig.show = show
    return fig

# Loading locally stored graphs

In [None]:
class Graphs:
    laman = list(dataset.load_laman_graphs())
    laman_random = list(dataset.load_laman_random_graphs())
    laman_deg_3_plus = list(dataset.load_laman_degree_3_plus())
    no_3_nor_4_cycles = dataset.load_no_3_nor_4_cycle_graphs()
    sparse_graphs = dataset.generate_sparse_graphs(30, 40)

The cell bellow generates random laman graphs and stores them as `./benchmarks/graph-store/laman-random/laman_{n}.g6`.

In [164]:
# takes ~1h 30m on my laptop
def generate_random_laman_graphs(
    LAMAN_DIR: str = os.path.join("benchmarks", "graphs-store", "laman-random"),
) -> List[Tuple[int, List[nx.Graph]]]:
    os.makedirs(LAMAN_DIR, exist_ok=True)

    ranges = (
        (10, 20, 128),
        (20, 30, 64),
        (30, 40, 32),
        (40, 50, 16),
        (50, 60, 8),
    )
    configs = [(n, c) for l, h, c in ranges for n in range(l, h)]
    results: List[Tuple[int, List[nx.Graph]]] = []
    for n, count in tqdm(configs):
        graphs = dataset.generate_laman_graphs(
            nodes_l=n,
            nodes_h=n,
            count=count,
            seed=42,
        )

        path = os.path.join(LAMAN_DIR, f"laman_{n}.g6")
        with open(path, "wb") as f:
            for graph in graphs:
                f.write(nx.graph6.to_graph6_bytes(graph, header=False))

        results.append((n, graphs))
    return results

if False:
    random_laman_graphs = generate_random_laman_graphs()

# Running benchmarks

Target columns are
- `graph` - graph6 encoded graph
- `dataset` - class of the graph, `laman`, `laman_deg_3_plus`, `no_3_nor_4_cycles`, `sparse`
- `mode` - search mode: [`single`, `all`]
- `vertex_no` - the number of vertices of the graph
- `edge_no` - the number of edges of the graph
- `triangle_components_no` - the number of triangle components of the graph
- `monochromatic_classes_no` - the number of triangle components of the graph
- `relabel` - relabel strategy
- `split` - splitting strategy
- `merging` - merging strategy
- `subgraph_size` - the initial size of subgraphs in components
- `nac_coloring_no` - the number of NAC colorings of the graph
- `nac_mean_time` - the time required to find all the colorings with the given strategy in miliseconds
- `nac_rounds` - number of rounds used to run the chat

In [None]:
COLUMNS: List[str] = [
    "graph",
    "dataset",
    "vertex_no",
    "edge_no",
    "triangle_components_no",
    "monochromatic_classes_no",
    "relabel",
    "split",
    "merging",
    "subgraph_size",
    "nac_any_finished",
    "nac_first_coloring_no",
    "nac_first_mean_time",
    "nac_first_rounds",
    "nac_first_checks",
    "nac_all_coloring_no",
    "nac_all_mean_time",
    "nac_all_rounds",
    "nac_all_checks",
]

@dataclass
class MeasurementResult:
    graph: str
    dataset: str
    vertex_no: int
    edge_no: int
    triangle_components_no: int
    monochromatic_classes_no: int
    relabel: str
    split: str
    merging: str
    subgraph_size: int
    nac_any_finished: bool
    nac_first_coloring_no: Optional[int]
    nac_first_mean_time: Optional[int]
    nac_first_rounds: Optional[int]
    nac_first_checks: Optional[int]
    nac_all_coloring_no: Optional[int]
    nac_all_mean_time: Optional[int]
    nac_all_rounds: Optional[int]
    nac_all_checks: Optional[int]

    def to_list(self) -> List:
        return [
            self.graph,
            self.dataset,
            self.vertex_no,
            self.edge_no,
            self.triangle_components_no,
            self.monochromatic_classes_no,
            self.relabel,
            self.split,
            self.merging,
            self.subgraph_size,
            self.nac_any_finished,
            self.nac_first_coloring_no,
            self.nac_first_mean_time,
            self.nac_first_rounds,
            self.nac_first_checks,
            self.nac_all_coloring_no,
            self.nac_all_mean_time,
            self.nac_all_rounds,
            self.nac_all_checks,
        ]

In [None]:
class Promissing:
    RELABELING = [
        "none",
        "random",
        # "bfs",
    ]
    SPLITTING = [
        "none",
        "neighbors",
        "neighbors_degree",
    ]
    MERGING_OFFLINE = [
        "linear",
        "log",
        "score",
        "shared_vertices"
    ]
    MERGING_ONLINE = [
        "linear",
        "log",
        "shared_vertices"
    ]
    SIZES = [6]

    strategies_offline = list(itertools.product(
        RELABELING, SPLITTING, MERGING_OFFLINE, [4], #SIZES,
    ))
    strategies_online = list(itertools.product(
        RELABELING, SPLITTING, MERGING_ONLINE, [6], #SIZES,
    ))
print(f"Offline strategies: {len(Promissing.strategies_offline)}")
print(f"Online strategies:  {len(Promissing.strategies_online)}")

In [None]:

def graph_id(graph: nx.Graph) -> str:
    return base64.standard_b64encode(nx.graph6.to_graph6_bytes(graph, header=False).strip()).decode()

def graph_from_id(id: str) -> nx.Graph:
    return nac.util.NiceGraph(nx.graph6.from_graph6_bytes(base64.standard_b64decode(id)))

In [None]:
def new_DataFrame(data: List[MeasurementResult] = []) -> pd.DataFrame:
    return pd.DataFrame(
        [x.to_list() for x in data],
        columns=COLUMNS,
    )

In [None]:
_BENCH_FILE_START = "bench_res_v1"
def load_records(file_name: str | None = None, dir = OUTPUT_DIR) -> pd.DataFrame:
    """
    Loads the results from the last run or the run specified by `file_name` in the `dir` given.
    """
    if file_name == None:
        def filter_cond(name: str) -> bool:
            return name.startswith(_BENCH_FILE_START) and name.endswith(".csv")
        data = sorted(filter(filter_cond, os.listdir(dir)), reverse=True)

        if len(data) == 0:
            print("No file with results found!")
            return new_DataFrame()
        file_name = data[0]
        print(f"Found file: {file_name}")

    path = os.path.join(dir, file_name)
    df = pd.read_csv(path)
    return df

def store_results(
    df: pd.DataFrame,
    file_name: str | None = None,
    dir = OUTPUT_DIR,
) -> str:
    """
    Stores results in the given file
    """
    if file_name is None:
        current_time = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
        file_name = f"{_BENCH_FILE_START}_{current_time}.csv"
    path = os.path.join(dir, file_name)
    df.to_csv(path, header=True, index=False)
    return file_name

def update_stored_data(dfs: List[pd.DataFrame]) -> pd.DataFrame:
    df = load_records()
    display(df)
    df = pd.concat((df, pd.concat(dfs)))
    df = df.drop_duplicates(
        subset=["graph", "dataset"],
        keep='last',
    )
    store_results(df)
    return df

In [None]:
def create_strategy(param: Tuple[str, str, str, int]) -> Tuple[str, str]:
    relabel, split, merge, subgraph = param
    algo_name = "subgraphs-{}-{}-{}-smart".format(
        merge, split, subgraph
    )
    return (relabel, algo_name)

In [None]:
class BenchmarkTimeoutException(Exception):
    def __init__(self, msg: str = "The benchmark timed out", *args, **kwargs):
        super().__init__(msg, *args, **kwargs)


def with_timeout[**T, R, D](function: Callable[T, R], time_limit: int | None, default: D) -> Callable[T, R|D]:
    if time_limit is None:
        return function

    def impl(*args: P.args, **kwargs: P.kwargs):
        try:
            # signals are not exact, but generally work
            def timeout_handler(signum, frame):
                raise BenchmarkTimeoutException()
            signal.signal(signal.SIGALRM, timeout_handler)
            signal.alarm(time_limit)

            res = function(*args, **kwargs)

            signal.alarm(0)
            return res
        except BenchmarkTimeoutException:
            return default
    return impl

In [None]:
@dataclass
class MeasuredRecord:
    time_sum: int
    coloring_no: int
    rounds: int
    checks_performed: int

    @property
    def mean_time(self) -> int:
        if self.rounds == 0:
            return 0
        return self.time_sum / self.rounds

@dataclass
class MeasuredData:
    first: Optional[MeasuredRecord]
    all: Optional[MeasuredRecord]

In [None]:
def nac_benchmark_core(
    graph: nx.Graph,
    rounds: int,
    first_only: bool,
    strategy: Tuple[str, str],
    time_limit: int,
    seed: int | None = 42,
) -> MeasuredData:
    """
    Runs benchmarks for NAC coloring search
    Returns results grouped by relabel, split, merge and subgraph size strategies
    """

    result = MeasuredData(None, None)
    rand = random.Random(seed)

    def find_colorings():
        start_time = time.time()

        itr = iter(
            nac.NAC_colorings(
                graph=graph,
                algorithm=strategy[1],
                relabel_strategy=strategy[0],
                seed=rand.randint(0, 2**30),
            )
        )

        first_col = next(itr, None)
        first_time = time.time()

        if result.first is None:
            result.first = MeasuredRecord(0, 0, 0, 0)
        result.first = MeasuredRecord(
            time_sum=result.first.time_sum + first_time - start_time,
            coloring_no=0 if first_col is None else 1,
            rounds=result.first.rounds + 1,
            checks_performed=nac.NAC_check_called()[1],
        )

        if first_only:
            return

        j = 0
        for j, coloring in enumerate(itr): pass
        end_time = time.time()

        if result.all is None:
            result.all = MeasuredRecord(0, 0, 0, 0)
        result.all = MeasuredRecord(
            time_sum=result.all.time_sum + end_time - start_time,
            coloring_no=j+1,
            rounds=result.all.rounds + 1,
            checks_performed=nac.NAC_check_called()[1],
        )

    def run() -> None:
        [find_colorings() for _ in range(rounds)]

    with_timeout(
        run,
        time_limit=time_limit*rounds,
        default=None,
    )()

    return result


In [None]:
def create_measurement_result(
    graph: nx.Graph,
    dataset_name: str,
    trianlge_classes: int,
    monochromatic_classes: int,
    nac_first: Optional[MeasuredRecord],
    nac_all: Optional[MeasuredRecord],
    relabel_strategy: str,
    split_strategy: str,
    merge_strategy: str,
    subgraph_size: int,
) -> MeasurementResult:
    vertex_no = nx.number_of_nodes(graph)
    edge_no = nx.number_of_edges(graph)

    nac_first_coloring_no=0
    nac_first_mean_time=0
    nac_first_rounds=0
    nac_first_checks=0
    nac_all_coloring_no=0
    nac_all_mean_time=0
    nac_all_rounds=0
    nac_all_checks=0
    nac_any_finished = (nac_first or nac_all) is not None

    if nac_first is not None:
        nac_first_coloring_no=nac_first.coloring_no
        nac_first_mean_time=int(nac_first.mean_time*1000)
        nac_first_rounds=nac_first.rounds
        nac_first_checks=nac_first.checks_performed
    if nac_all is not None:
        nac_all_coloring_no=nac_all.coloring_no
        nac_all_mean_time=int(nac_all.mean_time*1000)
        nac_all_rounds=nac_all.rounds
        nac_all_checks=nac_all.checks_performed

    return MeasurementResult(
        graph=graph_id(graph),
        dataset=dataset_name,
        vertex_no=vertex_no,
        edge_no=edge_no,
        triangle_components_no=trianlge_classes,
        monochromatic_classes_no=monochromatic_classes,
        relabel=relabel_strategy,
        split=split_strategy,
        merging=merge_strategy,
        subgraph_size=subgraph_size,
        nac_any_finished=nac_any_finished,
        nac_first_coloring_no=nac_first_coloring_no,
        nac_first_mean_time=nac_first_mean_time,
        nac_first_rounds=nac_first_rounds,
        nac_first_checks=nac_first_checks,
        nac_all_coloring_no=nac_all_coloring_no,
        nac_all_mean_time=nac_all_mean_time,
        nac_all_rounds=nac_all_rounds,
        nac_all_checks=nac_all_checks,
    )

In [None]:

def measure_for_class(
    dataset_name: str,
    graphs: List[nx.Graph],
    all_max_vertex_no: int,
    rounds:int,
    graph_timeout: int,
) -> pd.DataFrame:
    dataset_name = dataset_name.replace(" ", "_").lower()
    results: List[MeasurementResult] = []
    for graph in tqdm(graphs):
        all_colorings = all_max_vertex_no >= graph.number_of_nodes()
        trianlge_classes = len(nac.find_triangle_components(graph=graph, use_triangles_over_component=False)[1])
        monochromatic_classes = len(nac.find_triangle_components(graph=graph, use_triangles_over_component=True)[1])

        strategies = Promissing.strategies_offline if all_colorings else Promissing.strategies_online

        for strategy in strategies:
            try:
                search_res = nac_benchmark_core(
                    graph,
                    rounds=rounds,
                    first_only=not all_colorings,
                    strategy=create_strategy(strategy),
                    time_limit=graph_timeout,
                )

                relabel, split, merge, subgraph_size = strategy
                res = create_measurement_result(
                    graph=graph,
                    dataset_name=dataset_name,
                    trianlge_classes=trianlge_classes,
                    monochromatic_classes=monochromatic_classes,
                    nac_first=search_res.first,
                    nac_all=search_res.all,
                    relabel_strategy=relabel,
                    split_strategy=split,
                    merge_strategy=merge,
                    subgraph_size=subgraph_size,
                )
                results.append(res)
            except Exception as e:
                print(e)

    df = new_DataFrame(results)
    df = df.sort_values(by=["nac_all_mean_time", "nac_first_mean_time"])
    return df

In [None]:
int("Fail here, don't run the remaining cells automatically")

In [None]:
if False:
    df_test = measure_for_class(
        "test",
        # [g for g in Graphs.laman_deg_3_plus if g.number_of_nodes() == 8][:8],
        [g for g in Graphs.sparse_graphs if g.number_of_nodes() == 13][:8],
        all_max_vertex_no=10,
        rounds=3,
        graph_timeout=3,
    )

In [None]:
if False:
    df_laman = measure_for_class(
        "Laman",
        Graphs.laman,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
    )
    update_stored_data([df_laman])

In [None]:
if True:
    df_laman_random = measure_for_class(
        "Laman random",
        Graphs.laman_random,
        all_max_vertex_no=16,
        rounds=3,
        graph_timeout=3,
    )
    update_stored_data([df_laman_random])

In [None]:
if False:
    df_laman_deg_3_plus = measure_for_class(
        "Laman deg 3+",
        Graphs.laman_deg_3_plus,
        # All with 36 strtegies, 3 rounds
        #  8 - 1s/it
        #  9 - 1s/it
        # 10 - 2s/it
        # 11 - 7s/it
        # 12 - 15s/it -> ~20 mon. classes
        # First coloring with 27 strategies, 3 rounds
        # 15 - 5s/it
        # 16 - 5s/it
        # 17 - 90s/it
        all_max_vertex_no=12,
        rounds=3,
        graph_timeout=3,
    )
    update_stored_data([df_laman_deg_3_plus])

In [None]:
if False:
    display(pd.Series([g.number_of_nodes() for g in Graphs.no_3_nor_4_cycles]).value_counts())
    df_no_3_nor_4_cycles = measure_for_class(
        "No 3 nor 4 cycles",
        Graphs.no_3_nor_4_cycles,
        # 24 strategies
        # 10 - 5 s/it
        # 11 - 10 s/it
        # 12 - 28 s/it
        # 13 -
        all_max_vertex_no=13,
        rounds=3,
        graph_timeout=3,
    )
    update_stored_data([df_no_3_nor_4_cycles])

In [None]:
if False:
    df_sparse = measure_for_class(
        "Sparse",
        Graphs.sparse_graphs,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
    )
    update_stored_data([df_sparse])

# Analytics

Base graphs show the time required to find
a first/all NAC coloring based on vertex no./monochromatic classes no.
First graphs are separated for each class of graphs and
in the end for all the classes combined.
Graphs are drawn for each strategy cathegory to compare them easily.
Graphs show mean, median and 1st quartil values of running times to lower bias.

Second group of graphs shows our contribution of decresing
the number of `is_NAC_coloring` checks called compared to
the naive approach without or with triangle/monochromatic classes.

In [None]:
df_benchmarks = load_records()
display(df_benchmarks.info())
display(list(df_benchmarks["dataset"].unique()))
display(list(df_benchmarks["relabel"].unique()))
display(list(df_benchmarks["split"].unique()))
display(list(df_benchmarks["merging"].unique()))

In [None]:
def _group_and_plot(
    df: pd.DataFrame,
    ax: plt.Axes,
    x_column: Literal["vertex_no", "monochromatic_classes_no"],
    based_on: Literal["relabel", "split", "merging"],
    value_column: Literal["nac_first_mean_time", "nac_all_mean_time"],
    aggregation: Literal["mean", "median", "quartil"]
):
    df = df.loc[:, [x_column, based_on, value_column]]
    groupped = df.groupby([x_column, based_on])
    match aggregation:
        case "mean":
            aggregated = groupped.mean()
        case "median":
            aggregated = groupped.median()
        case "quartil":
            aggregated = groupped.quantile(.25)

    aggregated = aggregated.reorder_levels([based_on, x_column], axis=0)

    for name in aggregated.index.get_level_values(based_on).unique():
        data = aggregated.loc[name]
        ax.plot(data.index, data[value_column], label=name)

    ax.set_title(f"{x_column} {based_on} ({aggregation})")
    ax.set_yscale("log")
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.legend()

def plot_frame(
    title: str,
    df: pd.DataFrame,
) -> List[Figure]:
    print(f"Plotting {df.shape[0]} records...")
    figs = []

    ops_value_column = ["nac_first_mean_time", "nac_all_mean_time",]
    ops_x_column = ["vertex_no", "monochromatic_classes_no",]
    ops_based_on = [
        #  "relabel",
        "split",
        "merging",
        ]
    ops_aggregation = ["mean", "median", "quartil",]

    for value_column in ops_value_column:
        nrows = len(ops_x_column) * len(ops_based_on)
        ncols = len(ops_aggregation)
        fig = figure(nrows * ncols, (20, 3 * nrows), layout='constrained')
        fig.suptitle(f"{title} - time of NAC coloring search ({value_column})", fontsize=20)
        figs.append(fig)

        local_df = df[df[value_column] != 0]
        if local_df.shape[0] == 0:
            continue

        row = 0
        for x_column in ops_x_column:
            for based_on in ops_based_on:
                axs = [
                    fig.add_subplot(nrows, ncols, i+ncols*row+1)
                    for i in range(3)]
                for ax, aggregation in zip(axs,ops_aggregation):
                    _group_and_plot(local_df, ax, x_column, based_on, value_column, aggregation)
                row += 1
    return figs

In [None]:
[display(fig) for fig in plot_frame("Laman", df_benchmarks.query("dataset == 'laman'"))]

In [None]:
[display(fig) for fig in plot_frame("Laman random", df_benchmarks.query("dataset == 'laman_random'"))]

In [None]:
[display(fig) for fig in plot_frame("Laman deg 3+", df_benchmarks.query("dataset == 'laman_deg_3+'"))]

In [None]:
[display(fig) for fig in plot_frame("No 3 nor 4 cycles", df_benchmarks.query("dataset == 'no_3_nor_4_cycles'"))]

In [None]:
[display(fig) for fig in plot_frame("Sparse", df_benchmarks.query("dataset == 'sparse'"))]

In [None]:
# TODO weighted average, maybe?
# display(plot_frame("General", df_benchmarks))