# NAC coloring search

In this notebook we provide utils to run benchmarks and experiment with our code.

In the first section we start with utility functions, in the second part we load/generate benchmark data. After we run individual benchmarks on selected graph classes with selected algorithms. The algorithms are described in that section.

If you are using VScode, add this option to your `.vscode/settings.json` file.
```json
{
    "jupyter.notebookFileRoot": "${workspaceFolder}"
}
```

In [None]:
from typing import *
from dataclasses import dataclass
from collections import defaultdict, deque
import random
import importlib
from random import Random
from enum import Enum

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline
from matplotlib.backends import backend_agg
from matplotlib.figure import Figure
from matplotlib.ticker import MaxNLocator

import numpy as np
import pandas as pd
import networkx as nx
import os
import time
import datetime
import signal
import itertools
import base64

from tqdm import tqdm

import nac as nac
import nac.util
from nac import MonochromaticClassType

import benchmarks
from benchmarks import dataset
from benchmarks import generators
import benchmarks.notebook_utils
from benchmarks.notebook_utils import *

seed=42
TEST=False
BENCHMARKS=False
ANALYTICS=False
SEARCH=False


In [None]:
importlib.reload(nac)
importlib.reload(nac.util)
importlib.reload(dataset)
importlib.reload(generators)
importlib.reload(benchmarks)
importlib.reload(benchmarks.notebook_utils)

_BENCH_FILE_START_V2 = "bench_res_v2"
_BENCH_FILE_START_V3 = "bench_res_v3"
_BENCH_FILE_START_V4 = "bench_res_v4"

OUTPUT_DIR = os.path.join("benchmarks", "runs")
os.makedirs(OUTPUT_DIR, exist_ok=True)

benchmarks.notebook_utils.OUTPUT_DIR = OUTPUT_DIR
benchmarks.notebook_utils.OUTPUT_BENCH_FILE_START = _BENCH_FILE_START_V4
benchmarks.notebook_utils.OUTPUT_VERBOSE = False

# Loading locally stored graphs

In [None]:
class Graphs:
    """
    Randomly generated laman graphs of various sizes
    """
    laman_random = LazyList(lambda: dataset.load_laman_random_graphs())
    """
    Graphs with no 3 nor 4 cycles up to 42 vertices
    """
    no_3_nor_4_cycles = LazyList(lambda: dataset.load_no_3_nor_4_cycle_graphs())
    """
    Graphs generated according to yet unpublished formula that guaranties that these graphs should either have none or small number of NAC-colorings
    """
    sparse_with_few_colorings = LazyList(lambda: dataset.load_sparse_with_few_colorings_graphs())
    """
    Globally rigid graphs
    """
    globally_rigid = LazyList(lambda: dataset.load_globally_rigid_graphs())
    """
    Graphs gathered from other cathegories that have no NAC-coloring and more than one triangle-connected component
    """
    no_NAC_coloring_gathered = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_gathered())
    """
    Random (globally rigid) graphs that have no NAC-coloring and more than 2*sqrt(n) triangle-connected components
    """
    no_NAC_coloring_generated_40 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(40))
    no_NAC_coloring_generated_50 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(50))
    no_NAC_coloring_generated_60 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(60))
    no_NAC_coloring_generated_70 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(70))
    no_NAC_coloring_generated_80 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(80))
    no_NAC_coloring_generated_90 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(90))
    no_NAC_coloring_generated_100 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(100))
    no_NAC_coloring_generated_110 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(110))
    no_NAC_coloring_generated_120 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(120))
    no_NAC_coloring_generated_130 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(130))

    laman_nauty = LazyList(lambda: dataset.load_laman_graphs())
    laman_deg_3_plus = LazyList(lambda: dataset.load_laman_degree_3_plus())
    sparse_graphs = LazyList(lambda: (
        dataset.generate_sparse_graphs(30, 40, count=64) +
        dataset.generate_sparse_graphs(40, 50, count=32) +
        dataset.generate_sparse_graphs(50, 60, count=16) +
        dataset.generate_sparse_graphs(60, 70, count=8)
    ))

    """
    Loads all the Laman graphs of the given size, pregenerated files allow the range of [5, 11]
    In case you want to use it in benchmarks, list all the graphs first.
    """
    def load_all_laman(vertex_no: int) -> Iterator[nx.Graph]:
        return dataset.load_laman_all(vertices_no=vertex_no)

The cell bellow generates random laman graphs and stores them as `./benchmarks/graph-store/laman-random/laman_{n}.g6`.

In [None]:
# mapping = defaultdict(list)
# for graph in Graphs.globally_rigid:
#     mapping[graph.number_of_nodes()].append(graph)
# display([(k, len(v)) for k, v in sorted(mapping.items())])
# for k, v in sorted(mapping.items()):
#     path = os.path.join(dataset.RANDOM_DIR, f"globally_rigid")
#     os.makedirs(path, exist_ok=True)
#     name = f"globally_rigid_{k}.g6"
#     path = os.path.join(path, name)
#     generators._write_graphs_to_file(path, v)


# Storing and loading benchmark results

Each row represents performance of a graph with a given strategy.
The difference between the first and all variant is that
in the all variants we search for all NAC-colorings,
but in the first variant we search only.

The export CSV columns are:
- `timestamp` - date time of the test in UTC
- `graph` - base64 encoded bytes of graph6 encoded graph
- `dataset` - class of the graph, `minimally_ridig_random`, `no_3_nor_4_cycles`, `globally_rigid`, ...
- `vertex_no` - the number of vertices of the graph
- `edge_no` - the number of edges of the graph
- `triangle_components_no` - the number of triangle components of the graph
- `monochromatic_classes_no` - the number of monochromatic classes of the graph
- `relabel` - relabel strategy (relabels vertices before the main algorithm is run, here we have only `none` or `random`)
- `split` - splitting strategy
- `merge` - merging strategy
- `subgraph_size` - the target initial size of subgraphs in monochromatic components
- `used_monochromatic_classes` - if monochromatic classes were used to run the test, `False` means triangle components were used
- `nac_any_finished` - if any of the tests finished in time
- `nac_{first|all}_coloring_no` - the number of NAC-colorings of the graph, for the first variant limited to 1
- `nac_{first|all}_mean_time` - the time required to find first/all NAC-colorings in milliseconds
- `nac_{first|all}_rounds` - the number of rounds used to run the benchmarks
- `nac_{first|all}_check_cycle_mask` - the number of cycle mask checks performed
- `nac_{first|all}_check_is_NAC` - the number of `IsNACColorng` checks performed
- `nac_{first|all}_merge` - the number of merges performed
- `nac_{first|all}_merge_no_common_vertex` - the number of merges with no common vertex (these are simple to compute, but produce large no of colorings slowing down the algorithm)

In [None]:
class Promising:
    RELABELING = [
        "none",
        # "random",
        # "bfs",
    ]
    SPLITTING = [
        "none",
        # "cycles_match_chunks",
        "neighbors",
        # "neighbors_degree",
        # "beam_neighbors",
    ]
    MERGING_OFFLINE = [
        "linear",
        # "score",
        # "shared_vertices",
        # "sorted_size",
    ]
    MERGING_ONLINE = [
        "linear",
        # "shared_vertices",
        # "sorted_size",
    ]
    SIZES = [4] # [4, 5, 6, 7]

    strategies_offline = list(itertools.product(
        RELABELING, SPLITTING, MERGING_OFFLINE, SIZES,
    ))
    strategies_online = list(itertools.product(
        RELABELING, SPLITTING, MERGING_ONLINE, SIZES,
    ))
print(f"Offline strategies: {len(Promising.strategies_offline)}")
print(f"Online strategies:  {len(Promising.strategies_online)}")

In [None]:
# display(COLUMNS)

### File storage management

In [None]:
def new_DataFrame(data: List[MeasurementResult] = []) -> pd.DataFrame:
    return pd.DataFrame(
        [x.to_list() for x in data],
        columns=COLUMNS,
    )

def update_stored_data(dfs: List[pd.DataFrame] = [], head_loaded: bool = True) -> pd.DataFrame:
    df = load_records()
    if head_loaded:
        display(df)
    if len(dfs) != 0:
        df = pd.concat((df, pd.concat(dfs)))
    df = df.drop_duplicates(
        subset=["graph", "dataset", "split", "relabel", "merging", "subgraph_size", "use_smart_split", "used_monochromatic_classes"],
        keep='last',
    )
    store_results(df)
    return df

def migrate_v2_to_v3(dir = OUTPUT_DIR) -> pd.DataFrame:
    file_name_v2 = find_latest_record_file(_BENCH_FILE_START_V2, dir)
    path = os.path.join(dir, file_name_v2)
    df = pd.read_csv(path)
    df["use_smart_split"] = True
    df["used_monochromatic_classes"] = True
    df.loc[df["dataset"] == 'laman_random_no_smart_split', "use_smart_split"] = False
    df.loc[df["dataset"] == 'laman_random_no_smart_split', "dataset"] = 'laman_random'
    df = df[COLUMNS]
    store_results(df, None, dir)

def migrate_v3_to_v4(dir = OUTPUT_DIR) -> pd.DataFrame:
    file_name_v3 = find_latest_record_file(_BENCH_FILE_START_V3, dir)
    path = os.path.join(dir, file_name_v3)
    df = pd.read_csv(path)
    df["timestamp"] = datetime.datetime(1970, 1, 1)
    df["nac_first_merge"] = -1
    df["nac_first_merge_no_common_vertex"] = -1
    df["nac_all_merge"] = -1
    df["nac_all_merge_no_common_vertex"] = -1
    df = df[COLUMNS]
    store_results(df, None, dir)

### Running and recording benchmarks

In [None]:
def create_strategy(param: Tuple[str, str, str, int], use_smart_split: bool) -> Tuple[str, str]:
    relabel, split, merge, subgraph = param
    algo_name = "subgraphs-{}-{}-{}{}".format(
        merge, split, subgraph, "-smart" if use_smart_split else ""
    )
    return (relabel, algo_name)

In [None]:
def measure_for_graph_class(
    dataset_name: str,
    graphs: Iterable[nx.Graph],
    all_max_vertex_no: int,
    rounds:int,
    graph_timeout: int,
    use_smart_split: bool = True,
    use_monochromatic_classes: bool = True,
    df_seen: pd.DataFrame | None = load_records(),
    save_every: int | None = 5*60,
    cycles_all_max_vertices: int = 20,
    cycles_first_max_vertices: int = 42,
) -> pd.DataFrame:
    """
    Runs benchmarks for the given graph class.

    Parameters:
        dataset_name: Name of the dataset stored in the output csv
        graphs: Iterable of graphs to benchmark
        all_max_vertex_no: Maximum vertex number to search for all NAC-colorings
        rounds: Number of rounds to run for each graph
        graph_timeout: Timeout for each graph in seconds
        use_monochromatic_classes: Whether to use monochromatic classes or tiriangle connected components
        df_seen: Dataframe with already measured data, so already tried graphs and strategies can be skipped
        save_every: save progress every number of seconds
    """


    dataset_name = dataset_name.replace(" ", "_").lower()
    if df_seen is None:
        df_seen = new_DataFrame()
    df_seen = df_seen.query(f"dataset == '{dataset_name}'")

    results: List[MeasurementResult] = []
    all_results: List[MeasurementResult] = []

    last_save = time.time()

    for graph in tqdm(graphs):
        # this would be a functin if python would not have broken scoping
        if save_every is not None:
            now = time.time()
            if now - last_save > save_every:
                all_results.extend(results)
                df = new_DataFrame(results)
                update_stored_data([df], head_loaded=False)
                results = []
                last_save = now


        all_colorings = all_max_vertex_no >= graph.number_of_nodes()
        trianlge_classes = len(nac.find_monochromatic_classes(graph=graph, class_type=MonochromaticClassType.TRIANGLES)[1])
        monochromatic_classes = len(nac.find_monochromatic_classes(graph=graph, class_type=MonochromaticClassType.MONOCHROMATIC)[1])

        strategies = Promising.strategies_offline if all_colorings else Promising.strategies_online

        # add cycle strategy
        if (all_colorings and graph.number_of_nodes() < cycles_all_max_vertices) or (not all_colorings and graph.number_of_nodes() < cycles_first_max_vertices):
            strategies = itertools.chain(strategies, [None])

        graph_id = graph_to_id(graph)
        df_graph = df_seen.query(f"graph == '{graph_id}'")

        for strategy in strategies:
            # skip test that already run
            if strategy is not None:
                prev_record = df_graph.query(
                    f"relabel == '{strategy[0]}'"
                    + f" and split == '{strategy[1]}'"
                    + f" and merging == '{strategy[2]}'"
                    + f" and subgraph_size == {strategy[3]}"
                    + f" and use_smart_split == {use_smart_split}"
                    + f" and used_monochromatic_classes == {use_monochromatic_classes}"
                )
            else:
                prev_record = df_graph.query(
                    f"relabel == 'none'"
                    + f" and split == 'naive-cycles'"
                    + f" and merging == 'naive-cycles'"
                    + f" and subgraph_size == 0"
                    + f" and use_smart_split == {use_smart_split}"
                    + f" and used_monochromatic_classes == {use_monochromatic_classes}"
                )
            if len(prev_record) > 0:
                # ensureds graphs are recomputed if all_max_vertex_no is increased
                if graph.number_of_nodes() > all_max_vertex_no or list(prev_record["nac_all_mean_time"])[-1] > 0:
                    continue

            try:
                # print(strategy)
                search_res = nac_benchmark_core(
                    graph,
                    rounds=rounds,
                    first_only=not all_colorings,
                    strategy=create_strategy(strategy, use_smart_split=use_smart_split) if strategy else ("none", "cycles"),
                    use_monochromatic_classes=use_monochromatic_classes,
                    time_limit=graph_timeout,
                )

                relabel, split, merge, subgraph_size = strategy if strategy else ("none", "naive-cycles", "naive-cycles", 0)
                res = create_measurement_result(
                    graph=graph,
                    dataset_name=dataset_name,
                    trianlge_classes=trianlge_classes,
                    monochromatic_classes=monochromatic_classes,
                    nac_first=search_res.first,
                    nac_all=search_res.all,
                    relabel_strategy=relabel,
                    split_strategy=split,
                    merge_strategy=merge,
                    subgraph_size=subgraph_size,
                    use_smart_split=use_smart_split,
                    used_monochromatic_classes=use_monochromatic_classes,
                )
                results.append(res)
                # print(res.nac_first_mean_time)
            except Exception as e:
                print(f"Exception for strategy {strategy}: {e}")
                raise e

    all_results.extend(results)
    df = new_DataFrame(results)
    update_stored_data([df], head_loaded=False)

    df = new_DataFrame(all_results)
    df = df.sort_values(by=["nac_all_mean_time", "nac_first_mean_time"])
    return df

# Running benchmarks

### Testing

In [None]:
if False:
    df_test = measure_for_graph_class(
        "test",
        # [g for g in Graphs.laman if g.number_of_nodes() == 8][:8],
        [g for g in Graphs.laman_nauty if g.number_of_nodes() < 12][:32],
        # [g for g in Graphs.laman_deg_3_plus if g.number_of_nodes() == 8][:8],
        # [g for g in Graphs.sparse_graphs if g.number_of_nodes() == 13][:8],
        # Graphs.sparse_graphs,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
        df_seen=df_benchmarks,
        save_every=None,
    )

### Laman Nauty

In [None]:
if BENCHMARKS:
    df_laman = measure_for_graph_class(
        "Laman",
        Graphs.laman_nauty,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
    )

### Laman Random

In [None]:
if BENCHMARKS:
    df_laman_random = measure_for_graph_class(
        "Laman random",
        Graphs.laman_random,
        all_max_vertex_no=18,
        rounds=2,
        graph_timeout=3,
    )

### Laman deg 3+

In [None]:
if BENCHMARKS:
    df_laman_deg_3_plus = measure_for_graph_class(
        "Laman deg 3+",
        Graphs.laman_deg_3_plus,
        # All with 36 strtegies, 3 rounds
        #  8 - 1s/it
        #  9 - 1s/it
        # 10 - 2s/it
        # 11 - 7s/it
        # 12 - 15s/it -> ~20 mon. classes
        # First coloring with 27 strategies, 3 rounds
        # 15 - 5s/it
        # 16 - 5s/it
        # 17 - 90s/it
        all_max_vertex_no=12,
        rounds=3,
        graph_timeout=3,
    )

### No 3 nor 4 cycles

In [None]:
if BENCHMARKS:
    display(pd.Series([g.number_of_nodes() for g in Graphs.no_3_nor_4_cycles]).value_counts())
    df_no_3_nor_4_cycles = measure_for_graph_class(
        "No 3 nor 4 cycles",
        Graphs.no_3_nor_4_cycles,
        # 24 strategies
        # 10 - 5 s/it
        # 11 - 10 s/it
        # 12 - 28 s/it
        # 13 -
        all_max_vertex_no=0,
        rounds=2,
        graph_timeout=3,
    )

### Line graphs of no 3 nor 4 cycles

In [None]:
if BENCHMARKS:
    line_graphs = [
        nx.convert_node_labels_to_integers(nx.line_graph(g))
        for g in Graphs.no_3_nor_4_cycles
        if max(deg for _, deg in g.degree) >= 3
    ]

    df_line_graphs_of_no_3_nor_4_cycles = measure_for_graph_class(
        "Line graph of no 3 nor 4 cycles",
        line_graphs,
        all_max_vertex_no=13,
        rounds=3,
        graph_timeout=3,
    )

### Sparse

In [None]:
if BENCHMARKS:
    df_sparse = measure_for_graph_class(
        "Sparse",
        Graphs.sparse_graphs,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
    )

### Few coloring (formula)

In [None]:
if BENCHMARKS:
    df_few_colorings = measure_for_graph_class(
        "few_colorings",
        dataset.generate_NAC_critical_graphs(30, 50, seed=None),
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
        df_seen=None,
    )

In [None]:
if BENCHMARKS:
    measure_for_graph_class(
        "few_colorings",
        Graphs.sparse_with_few_colorings,
        all_max_vertex_no=0,
        rounds=2,
        graph_timeout=3,
    )

### Globally rigid

In [None]:
if BENCHMARKS:
    measure_for_graph_class(
        "globally_rigid",
        dataset.generate_globally_rigid_graphs(30, 50, seed=None),
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
        df_seen=None,
    )

In [None]:
if BENCHMARKS:
    measure_for_graph_class(
        "globally_rigid",
        dataset.generate_globally_rigid_graphs(10, 17, seed=None),
        all_max_vertex_no=20,
        rounds=3,
        graph_timeout=3,
        df_seen=None,
    )

In [None]:
if BENCHMARKS:
    df_benchmarks = load_records()
    globary_rigid_dataset = df_benchmarks.query("dataset == 'globally_rigid'")['graph']
    globary_rigid_dataset = list(graph_from_id(id) for id in globary_rigid_dataset.unique())

    measure_for_graph_class(
        "globally_rigid",
        globary_rigid_dataset,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
    )

In [None]:
if BENCHMARKS:
    measure_for_graph_class(
        "globally_rigid",
        Graphs.globally_rigid,
        all_max_vertex_no=17,
        rounds=2,
        graph_timeout=5,
        cycles_first_max_vertices=100,
    )

### Triangle components

In [None]:
if BENCHMARKS:
    df_benchmarks = load_records()
    groupped = df_benchmarks.query("nac_any_finished == True and nac_first_coloring_no == 0 and triangle_components_no > 1")[['graph', 'dataset']].drop_duplicates().groupby('dataset')

    for dataset_name in groupped.groups.keys():
        group = groupped.get_group(dataset_name)

        print(dataset_name, len(group))
        measure_for_graph_class(
            dataset_name,
            [graph_from_id(id) for id in group["graph"]],
            all_max_vertex_no=1,
            rounds=1,
            graph_timeout=40,
            df_seen=df_benchmarks,
            # This is the difference
            use_monochromatic_classes=False,
            use_smart_split=False,
        )

### No NAC coloring

In [None]:
if BENCHMARKS or True:
    for name, data in {
        "no_NAC_coloring_generated_40": Graphs.no_NAC_coloring_generated_40,
        "no_NAC_coloring_generated_50": Graphs.no_NAC_coloring_generated_50,
        "no_NAC_coloring_generated_60": Graphs.no_NAC_coloring_generated_60,
        "no_NAC_coloring_generated_70": Graphs.no_NAC_coloring_generated_70,
        "no_NAC_coloring_generated_80": Graphs.no_NAC_coloring_generated_80,
        "no_NAC_coloring_generated_90": Graphs.no_NAC_coloring_generated_90,
        "no_NAC_coloring_generated_100": Graphs.no_NAC_coloring_generated_100,
        "no_NAC_coloring_generated_110": Graphs.no_NAC_coloring_generated_110,
        # "no_NAC_coloring_generated_120": Graphs.no_NAC_coloring_generated_120,
        # "no_NAC_coloring_generated_130": Graphs.no_NAC_coloring_generated_130,
    }.items():
        for smart_split in [False]:
        # for smart_split in [False, True]:
            print(name, smart_split)
            measure_for_graph_class(
                name,
                data[:500],
                all_max_vertex_no=0,
                rounds=2,
                graph_timeout=15,
                cycles_first_max_vertices=0,
                use_monochromatic_classes=False, # Most of the graphs have a single monochromatic class only -> it makes no sense for benchmarking
                use_smart_split=smart_split,
            )

### Gather graphs with no NAC-coloring

In [None]:
def export_graph_class(
    dataset_name: str,
    name: str,
):
    df_benchmarks = load_records()
    display(df_benchmarks["dataset"].unique())
    graphs = df_benchmarks.query(f"nac_any_finished == True and dataset == '{dataset_name}'")['graph'].drop_duplicates()
    output_dir = f"graphs_store/random"
    os.makedirs(output_dir, exist_ok=True)
    path = os.path.join(output_dir, f"{name}.g6")
    lines = 0
    with open(path, "wb") as f:
        for graph in graphs:
            lines += 1
            graph = graph_from_id(graph)
            # graph = nx.graph6.from_graph6_bytes(graph)
            graph = nx.graph6.to_graph6_bytes(graph, header=False)
            f.write(graph)
    print(lines)

def export_sparse_graphs_with_few_colorings():
    graphs = [g for g, _ in zip(dataset.generate_NAC_critical_graphs(30, 60, seed=None), range(2000))]
    output_dir = f"graphs_store/random"
    os.makedirs(output_dir, exist_ok=True)
    name = "test"
    path = os.path.join(output_dir, f"{name}.g6")
    lines = 0
    with open(path, "wb") as f:
        for graph in graphs:
            lines += 1
            # graph = graph_from_id(graph)
            # graph = nx.graph6.from_graph6_bytes(graph)
            graph = nx.graph6.to_graph6_bytes(graph, header=False)
            f.write(graph)
    print(lines)

def export_no_NAC_coloring():
    df_benchmarks = load_records()
    df = df_benchmarks.query("nac_any_finished == True and nac_first_coloring_no == 0 and triangle_components_no > 1")
    print(f"Export from groups: {df["dataset"].unique()}")
    graphs = df['graph'].drop_duplicates()

    output_dir = f"graphs_store/extracted"
    os.makedirs(output_dir, exist_ok=True)
    name = "no_nac_coloring"
    path = os.path.join(output_dir, f"{name}.g6")
    lines = 0
    with open(path, "wb") as f:
        for graph in graphs:
            lines += 1
            graph = graph_from_id(graph)
            # graph = nx.graph6.from_graph6_bytes(graph)
            graph = nx.graph6.to_graph6_bytes(graph, header=False)
            f.write(graph)
    print(lines)

# export_no_NAC_coloring()

# Analytics

Base graphs show the time required to find
a first/all NAC coloring based on vertex no./monochromatic classes no.
First graphs are separated for each class of graphs and
in the end for all the classes combined.
Graphs are drawn for each strategy cathegory to compare them easily.
Graphs show mean, median and 1st quartil values of running times to lower bias.

Second group of graphs shows our contribution of decresing
the number of `is_NAC_coloring` checks called compared to
the naive approach without or with triangle/monochromatic classes.

In [None]:
df_analytics = load_records()

print(
    "Graphs with no NAC-coloring and more monochromatic classes",
    df_analytics.query("nac_any_finished == True and nac_first_coloring_no == 0 and monochromatic_classes_no > 1")['graph'].nunique(),
)


df_analytics = df_analytics.query("dataset != 'test'")
df_analytics = df_analytics.query("split != 'kernighan_lin' and split != 'cuts'")
df_analytics = df_analytics.query("merging != 'log' and merging != 'score' and merging != 'promising_cycles'")

df_analytics = df_analytics.assign(split_merging=lambda x: (x["split"] + " & " + x["merging"]).str.replace("naive-cycles & naive-cycles", "naive cycles"))
df_analytics = df_analytics.assign(split_merging_smart=lambda x: x["split_merging"] + " & " + x["use_smart_split"].astype(str))

graphs_where_all_strategies_finished = df_analytics[["graph", "nac_any_finished"]].groupby("graph").all()
print(f"{graphs_where_all_strategies_finished.index.nunique()}/{df_analytics["graph"].nunique()} graphs finished on all strategies.")
df_analytics = df_analytics.query("nac_any_finished == True")

df_analytics_no_nac = df_analytics.query("nac_first_coloring_no == 0 and triangle_components_no > 1 and used_monochromatic_classes == False")
df_analytics = df_analytics.query("monochromatic_classes_no > 1")

# display(df_analytics.info())
print("Records:", df_analytics.shape[0], "graphs:", df_analytics["graph"].nunique())
display(df_analytics.columns)
display(list(df_analytics["dataset"].unique()))
display(list(df_analytics["relabel"].unique()))
display(list(df_analytics_no_nac["split"].unique()))
display(list(df_analytics["merging"].unique()))

# make sure this spelling mistatake is forever fixed
assert "globaly_rigid" not in df_analytics["dataset"].unique()

In [None]:
def _group_and_plot(
    df: pd.DataFrame,
    axs: List[plt.Axes],
    x_column: Literal["vertex_no", "monochromatic_classes_no"],
    based_on: Literal["relabel", "split", "merging"],
    value_columns: List[Literal["nac_first_mean_time", "nac_all_mean_time"]],
):
    aggregations = ["mean", "median", "3rd quartile"]
    df = df.loc[:, [x_column, based_on, *value_columns]]
    groupped = df.groupby([x_column, based_on])

    for ax, aggregation in zip(axs, aggregations):
        match aggregation:
            case "mean":
                aggregated = groupped.mean()
            case "median":
                aggregated = groupped.median()
            case "3rd quartile":
                aggregated = groupped.quantile(.75)

        aggregated = aggregated.reorder_levels([based_on, x_column], axis=0)

        for name in aggregated.index.get_level_values(based_on).unique():
            data = aggregated.loc[name]
            for value_column in value_columns:
                title = ",".join([name, value_column]) if len(value_columns) > 1 else name
                ax.plot(data.index, data[value_column], label=title)

        ax.set_title(f"{x_column} {based_on} ({aggregation})")
        if "time" in value_columns[0]:
            ax.set_ylabel("Time [ms]")
        if "check" in value_columns[0]:
            ax.set_ylabel("#Check calls")
        ax.set_yscale("log")
        ax.xaxis.set_major_locator(MaxNLocator(integer=True))
        ax.legend()

def plot_frame(
    title: str,
    df: pd.DataFrame,
    ops_value_columns_sets = [
        [ "nac_first_mean_time", ],
        [ "nac_first_check_cycle_mask", ],
        [ "nac_all_mean_time", ],
        [ "nac_all_check_cycle_mask", ],
    ],
    ops_x_column = ["vertex_no", "monochromatic_classes_no",],
    ops_based_on = [
        # "relabel",
        # "split",
        # "merging",
        "split_merging",
        "use_smart_split",
        "subgraph_size",
    ],
    ops_aggregation = ["mean", "median", "3rd quartile",]
) -> List[Figure]:
    print(f"Plotting {df.shape[0]} records...")
    figs = []

    for value_columns in ops_value_columns_sets:
        local_df = df[(df[value_columns] != 0).all(axis=1)]
        if local_df.shape[0] == 0:
            continue

        nrows = len(ops_x_column) * len(ops_based_on)
        ncols = len(ops_aggregation)
        fig = figure(nrows * ncols, (20, 6 * nrows), layout='constrained')
        fig.suptitle(f"{title} - time of NAC coloring search ({value_columns})", fontsize=20)
        figs.append(fig)

        row = 0
        for x_column in ops_x_column:
            for based_on in ops_based_on:
                axs = [
                    fig.add_subplot(nrows, ncols, i+ncols*row+1)
                    for i in range(3)]
                _group_and_plot(local_df, axs, x_column, based_on, value_columns)
                row += 1
    return figs

# [display(fig) for fig in plot_frame("Laman", df_analytics.query("dataset == 'laman'"))]

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Laman", df_analytics.query("dataset == 'laman'"))]

In [None]:
if ANALYTICS:
    [display(fig) for fig in plot_frame("Laman random", df_analytics.query("dataset == 'laman_random'"))]

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Laman deg 3+", df_analytics.query("dataset == 'laman_deg_3+'"))]

In [None]:
if ANALYTICS:
    [display(fig) for fig in plot_frame("No 3 nor 4 cycles", df_analytics.query("dataset == 'no_3_nor_4_cycles'"))]

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Line graphs of no 3 nor 4 cycles", df_analytics.query("dataset == 'line_graph_of_no_3_nor_4_cycles'"))]

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Sparse", df_analytics.query("dataset == 'sparse'"))]

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Few colorings - None", df_analytics.query("dataset == 'few_colorings'"))]

In [None]:
if ANALYTICS:
    [display(fig) for fig in plot_frame("Globally rigid", df_analytics.query("dataset == 'globally_rigid'"))]

In [None]:
if ANALYTICS:
    [display(fig) for fig in plot_frame("No NAC-coloring gathered, ▵-components", df_analytics_no_nac, ops_x_column = ["vertex_no", "triangle_components_no",],)]

In [None]:
display(df_analytics_no_nac["merging"].unique())
display(df_analytics_no_nac["split"].unique())

In [None]:
if ANALYTICS or True:
    base = df_analytics_no_nac.query("merging!='naive-cycles' and merging!='min_max' and merging!='sorted_bits' and split!='neighbors_iterative' and split!='beam_neighbors'")
    base_40 = base.query("dataset == 'no_nac_coloring_generated_40'")
    base_50 = base.query("dataset == 'no_nac_coloring_generated_50'")
    base_60 = base.query("dataset == 'no_nac_coloring_generated_60'")
    base_70 = base.query("dataset == 'no_nac_coloring_generated_70'")
    base_80 = base.query("dataset == 'no_nac_coloring_generated_80'")
    base_90 = base.query("dataset == 'no_nac_coloring_generated_90'")
    base_100 = base.query("dataset == 'no_nac_coloring_generated_100'")
    base_110 = base.query("dataset == 'no_nac_coloring_generated_110'")
    df = pd.concat([base_40, base_50, base_60, base_70, base_80, base_90, base_100, base_110], ignore_index=True)
    [display(fig) for fig in plot_frame(
        "No NAC-coloring generated 40, ▵-components",
        # and merging=='shared_vertices'
        # and split=='cycles_match_chunks'
        df,#.query(""),
        ops_x_column = ["vertex_no", "triangle_components_no",],
    )]

In [None]:
def _plot_is_NAC_coloring_calls_groups(
    title: str,
    df: pd.DataFrame,
    ax: plt.Axes,
    x_column: Literal["vertex_no", "monochromatic_classes_no"],
    value_columns: List[Literal["nac_first_mean_time", "nac_all_mean_time"]],
    aggregation: Literal["mean", "median", "3rd quartile"]
):
    df = df.loc[:, [x_column, *value_columns]]
    groupped = df.groupby([x_column])
    match aggregation:
        case "mean":
            aggregated = groupped.mean()
        case "median":
            aggregated = groupped.median()
        case "3rd quartile":
            aggregated = groupped.quantile(.75)

    # display(aggregated)
    aggregated.plot(ax=ax)
    ax.set_title(f"{title} {x_column} ({aggregation})")
    ax.set_yscale("log")
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.legend()

def plot_is_NAC_coloring_calls(
    df: pd.DataFrame,
) -> List[Figure]:
    figs = []

    df = df.query("nac_all_coloring_no != 0").copy()
    print(f"Plotting {df.shape[0]} records...")

    related_columns = ["vertex_no", "edge_no", "triangle_components_no", "monochromatic_classes_no", "nac_all_coloring_no", "nac_all_check_is_NAC", "nac_all_check_cycle_mask"]
    df = df.loc[:, related_columns]
    # this does not help our algorithm to stand out, but the graphs can be drawn more easily

    df["exp_edge_no"]               = 2**(df["edge_no"]-1)
    df["exp_triangle_component_no"] = 2**(df["triangle_components_no"]-1)
    df["exp_monochromatic_class_no"] = 2**(df["monochromatic_classes_no"]-1)

    df["scaled_edge_no"]                  = df["edge_no"]                  /df["nac_all_coloring_no"]
    df["scaled_triangle_component_no"]    = df["triangle_components_no"]   /df["nac_all_coloring_no"]
    df["scaled_monochromatic_class_no"]    = df["monochromatic_classes_no"] /df["nac_all_coloring_no"]
    df["scaled_nac_all_check_cycle_mask"] = df["nac_all_check_cycle_mask"] /df["nac_all_coloring_no"]

    df["inv_edge_no"]                  = df["nac_all_coloring_no"] / df["edge_no"]
    df["inv_triangle_component_no"]    = df["nac_all_coloring_no"] / df["triangle_components_no"]
    df["inv_monochromatic_class_no"]    = df["nac_all_coloring_no"] / df["monochromatic_classes_no"]
    df["inv_nac_all_check_is_NAC"]     = df["nac_all_coloring_no"] / df["nac_all_check_is_NAC"]
    df["inv_nac_all_check_cycle_mask"] = df["nac_all_coloring_no"] / df["nac_all_check_cycle_mask"]

    df["new_edge_no"]                  = df["edge_no"]                  /df["exp_monochromatic_class_no"]
    df["new_triangle_component_no"]    = df["triangle_components_no"]   /df["exp_monochromatic_class_no"]
    df["new_monochromatic_class_no"]    = df["monochromatic_classes_no"] /df["exp_monochromatic_class_no"]
    df["new_nac_all_check_cycle_mask"] = df["nac_all_check_cycle_mask"] /df["exp_monochromatic_class_no"]

    ops_x_column = ["vertex_no", "monochromatic_classes_no",]
    ops_value_groups = [
        ["exp_edge_no",    "exp_triangle_component_no",    "exp_monochromatic_class_no",    "nac_all_check_cycle_mask",        "nac_all_check_is_NAC",     "nac_all_coloring_no"],
        # ["scaled_edge_no", "scaled_triangle_component_no", "scaled_monochromatic_class_no", "scaled_nac_all_check_cycle_mask"],
        ["inv_edge_no",    "inv_triangle_component_no",    "inv_monochromatic_class_no",    "inv_nac_all_check_cycle_mask",    "inv_nac_all_check_is_NAC", ],
        # ["new_edge_no",    "new_triangle_component_no",    "new_monochromatic_class_no",    "new_nac_all_check_cycle_mask" ],
    ]
    ops_aggregation = ["mean", "median", "3rd quartile",]

    nrows = len(ops_x_column) * len(ops_value_groups)
    ncols = len(ops_aggregation)
    fig = figure(nrows * ncols, (20, 4 * nrows), layout='constrained')
    fig.suptitle(f"Reduction of is_NAC_coloring checks against the naive algorithm", fontsize=20)
    figs.append(fig)

    row = 0
    for x_column in ops_x_column:
        for title, value_columns in zip(
            [
                "Base: #is_NAC_coloring",
                "Scaled: #is_NAC_coloring() calls/#NAC(G)",
                "Inverse: #NAC(G)/#is_NAC_coloring() calls",
                "Count: metric / monochromatic classes no.",
            ],
            ops_value_groups,
        ):
            axs = [
                fig.add_subplot(nrows, ncols, i+ncols*row+1)
                for i in range(3)]
            for ax, aggregation in zip(axs,ops_aggregation):
                _plot_is_NAC_coloring_calls_groups(title, df, ax, x_column, value_columns, aggregation)
            row += 1

    return figs

In [None]:
if ANALYTICS:
    [display(fig) for fig in plot_is_NAC_coloring_calls(df_analytics.query("split != 'naive-cycles'"))]

# Comparisson with naive approach

In this section of benchmarks we run our algorithm on all the Laman graphs of specified size.
The goal is to show the performance improvement over the previous SOTA - naive approach.
For clarity and simplicity we use only a single strategy - `neighbors_degree` with `linear` merging.

In [None]:
def find_colorings_on_all_graphs(
    dataset_name: str,
    graphs: Callable[[], Iterable[nx.Graph]],
    vertex_no: int,
    algorithm: str = "subgraphs-linear-neighbors_degree-4-smart",
    DIR: str = os.path.join("benchmarks", "results", "iteration"),
    time_log: str | None =  None
):
    os.makedirs(DIR, exist_ok=True)
    time_log = time_log or os.path.join(DIR, "log.csv")

    stats = defaultdict(int)
    rand = random.Random(42)

    # print(f"Running {algorithm} on {len(graphs)} graphs with {vertex_no} vertices")
    print(f"Running {algorithm} on graphs with {vertex_no} vertices")
    graphs = graphs()
    start = time.time()
    for graph in tqdm(graphs):
        iterable = nac.NAC_colorings(
            graph,
            algorithm=algorithm,
            relabel_strategy="none",
            use_decompositions=False,
            use_has_coloring_check=False,
            seed=rand.randint(0, 2**32 - 1),
        )
        counter = itertools.count()
        deque(zip(iterable, counter), maxlen=0)
        coloring_no = next(counter) // 2
        stats[coloring_no] += 1
    end = time.time()

    print(f"The operation took {int(1_000_000*(end-start))}s")
    with open(time_log, "a") as f:
        print(f"{vertex_no},{algorithm},{int(1000*(end-start))}", file=f, flush=True)

    data = np.array(list(stats.items()))
    df = pd.DataFrame(data, columns=["coloring_cnt", "graph_cnt"])
    df.sort_values(by="coloring_cnt", inplace=True)
    df["graph_cnt"]
    print(
        f"Most colorings: {tuple(df.iloc[-1])}, Most common: {tuple(df.loc[df["graph_cnt"].idxmax()])} (coloring_cnt, graph_cnt)"
    )
    # print(df.tail(n=50))

    df.to_csv(os.path.join(DIR, f"{dataset_name}_{vertex_no}_{algorithm}.csv"))

In [None]:

# dataset = list(dataset.load_laman_degree_3_plus_all(vertex_no))

if False:
    for n in list(range(7, 12 + 1)):
        RUN_AS_BENCHMARK_NOT_JUST_FOR_THE_STATS = False
        if RUN_AS_BENCHMARK_NOT_JUST_FOR_THE_STATS:
            # TODO improve so the dataset is not loaded every time
            graphs = lambda: list(dataset.load_laman_all(n))
        else:
            graphs = lambda: dataset.load_laman_all(n)

        find_colorings_on_all_graphs("laman", graphs, n,)
        find_colorings_on_all_graphs("laman", graphs, n, "naive") if n <= 10 else None

# Search for big graph with more monochromatic components but no NAC coloring

In [None]:
def search_large_graph_no_NAC_coloring(
    nl: int,
    nh: int,
) -> None:
    rand = random.Random()

    while True:
        n = rand.randint(nl, nh)
        m = 2*n-2 + rand.randint(0, 8*n)
        graph = nx.gnm_random_graph(n, m, seed=rand.randint(0, 2**30))
        if not nx.is_connected(graph):
            continue
        classes_no = nac.find_monochromatic_classes(graph)[1]
        if not len(classes_no) > 10:
            continue
        coloring = next(nac.NAC_colorings(
            graph,
            algorithm=create_strategy(("", "neighbors_degree", "shared_vertices", 6))[1],
        ), None)
        if coloring is not None:
            continue
        print(f"{classes_no}: {nx.graph6.to_graph6_bytes(header=False).strip()}")
if SEARCH:
    search_large_graph_no_NAC_coloring(40, 50)

In [None]:
if SEARCH:
    df_benchmarks.query("dataset == 'sparse' and nac_first_coloring_no == 0")

In [None]:
def build_monter_3_trism_with_edge(prism_no: int) -> nx.Graph:
    graph = nac.util.NiceGraph()
    graph.add_edges_from([
        ('A', 'B'), ('B', 'C'), ('C', 'A'),
        ('A', 'D'), ('B', 'D'),
        ('D', 'E'), ('B', 'E'),
    ])
    for i in range(1, prism_no+1):
        graph.add_edges_from([
            (f'A{i}', f'B{i}'), (f'B{i}', f'C{i}'), (f'C{i}', f'A{i}'),
            ('A', f'A{i}'), ('B', f'B{i}'), ('C', f'C{i}'),
            ('A', f'E{i}'), # the problems causing edge
        ])

    graph = nx.convert_node_labels_to_integers(graph)
    return graph
for i in range(5):
    graph = build_monter_3_trism_with_edge(i)
    print(len(nac.find_monochromatic_classes(graph=graph, class_type=MonochromaticClassType.MONOCHROMATIC)[1]))
    print(next(iter(nac.NAC_colorings(graph)), None))