# NAC coloring search

In this notebook we provide utils to run benchmarks and experiment with our code.

In the first section we start with utility functions, in the second part we load/generate benchmark data. After we run individual benchmarks on selected graph classes with selected algorithms. The algorithms are described in that section.

If you are using VScode, add this option to your `.vscode/settings.json` file.
```json
{
    "jupyter.notebookFileRoot": "${workspaceFolder}"
}
```

In [None]:
from typing import *
from dataclasses import dataclass
from collections import defaultdict, deque
import random
import importlib
from random import Random
from enum import Enum

import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline as backend_inline
from matplotlib.backends import backend_agg
from matplotlib.figure import Figure
from matplotlib.ticker import MaxNLocator

import numpy as np
import pandas as pd
import networkx as nx
import os
import time
import datetime
import signal
import itertools
import base64

from tqdm import tqdm

import nac as nac
from nac import MonochromaticClassType

import benchmarks
from benchmarks import dataset
from benchmarks import generators
import benchmarks.notebook_utils
from benchmarks.notebook_utils import *

seed=42
TEST=False
BENCHMARKS=False
ANALYTICS=True
SEARCH=False


In [None]:
importlib.reload(nac)
importlib.reload(benchmarks.dataset)
importlib.reload(benchmarks.generators)
importlib.reload(benchmarks)
importlib.reload(benchmarks.notebook_utils)

_BENCH_FILE_START_V2 = "bench_res_v2"
_BENCH_FILE_START_V3 = "bench_res_v3"
_BENCH_FILE_START_V4 = "bench_res_v4"

OUTPUT_DIR = os.path.join("benchmarks", "runs")
os.makedirs(OUTPUT_DIR, exist_ok=True)

benchmarks.notebook_utils.OUTPUT_DIR = OUTPUT_DIR
benchmarks.notebook_utils.OUTPUT_BENCH_FILE_START = _BENCH_FILE_START_V4
benchmarks.notebook_utils.OUTPUT_VERBOSE = False

# Loading locally stored graphs

In [None]:
class Graphs:
    """
    Randomly generated laman graphs of various sizes
    """
    laman_random = LazyList(lambda: dataset.load_laman_random_graphs())
    """
    Graphs with no 3 nor 4 cycles up to 42 vertices
    """
    no_3_nor_4_cycles = LazyList(lambda: dataset.load_no_3_nor_4_cycle_graphs())
    """
    Graphs generated according to yet unpublished formula that guaranties that these graphs should either have none or small number of NAC-colorings
    """
    sparse_with_few_colorings = LazyList(lambda: dataset.load_sparse_with_few_colorings_graphs())
    """
    Globally rigid graphs
    """
    globally_rigid = LazyList(lambda: dataset.load_globally_rigid_graphs())
    """
    Graphs gathered from other cathegories that have no NAC-coloring and more than one triangle-connected component
    """
    no_NAC_coloring_gathered = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_gathered())
    """
    Random (globally rigid) graphs that have no NAC-coloring and more than 2*sqrt(n) triangle-connected components
    """
    no_NAC_coloring_generated_40 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(40))
    no_NAC_coloring_generated_50 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(50))
    no_NAC_coloring_generated_60 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(60))
    no_NAC_coloring_generated_70 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(70))
    no_NAC_coloring_generated_80 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(80))
    no_NAC_coloring_generated_90 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(90))
    no_NAC_coloring_generated_100 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(100))
    no_NAC_coloring_generated_110 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(110))
    no_NAC_coloring_generated_120 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(120))
    no_NAC_coloring_generated_130 = LazyList(lambda: dataset.load_no_NAC_coloring_graphs_generated(130))

    laman_nauty = LazyList(lambda: dataset.load_laman_graphs())
    laman_deg_3_plus = LazyList(lambda: dataset.load_laman_degree_3_plus())
    sparse_graphs = LazyList(lambda: (
        dataset.generate_sparse_graphs(30, 40, count=64) +
        dataset.generate_sparse_graphs(40, 50, count=32) +
        dataset.generate_sparse_graphs(50, 60, count=16) +
        dataset.generate_sparse_graphs(60, 70, count=8)
    ))

    """
    Loads all the Laman graphs of the given size, pregenerated files allow the range of [5, 11]
    In case you want to use it in benchmarks, list all the graphs first.
    """
    def load_all_laman(vertex_no: int) -> Iterator[nx.Graph]:
        return dataset.load_laman_all(vertices_no=vertex_no)

The cell bellow generates random laman graphs and stores them as `./benchmarks/graph-store/laman-random/laman_{n}.g6`.

In [None]:
# mapping = defaultdict(list)
# for graph in Graphs.globally_rigid:
#     mapping[graph.number_of_nodes()].append(graph)
# display([(k, len(v)) for k, v in sorted(mapping.items())])
# for k, v in sorted(mapping.items()):
#     path = os.path.join(dataset.RANDOM_DIR, f"globally_rigid")
#     os.makedirs(path, exist_ok=True)
#     name = f"globally_rigid_{k}.g6"
#     path = os.path.join(path, name)
#     generators._write_graphs_to_file(path, v)


# Storing and loading benchmark results

Each row represents performance of a graph with a given strategy.
The difference between the first and all variant is that
in the all variants we search for all NAC-colorings,
but in the first variant we search only.

The export CSV columns are:
- `timestamp` - date time of the test in UTC
- `graph` - base64 encoded bytes of graph6 encoded graph
- `dataset` - class of the graph, `minimally_ridig_random`, `no_3_nor_4_cycles`, `globally_rigid`, ...
- `vertex_no` - the number of vertices of the graph
- `edge_no` - the number of edges of the graph
- `triangle_components_no` - the number of triangle components of the graph
- `monochromatic_classes_no` - the number of monochromatic classes of the graph
- `relabel` - relabel strategy (relabels vertices before the main algorithm is run, here we have only `none` or `random`)
- `split` - splitting strategy
- `merge` - merging strategy
- `subgraph_size` - the target initial size of subgraphs in monochromatic components
- `used_monochromatic_classes` - if monochromatic classes were used to run the test, `False` means triangle components were used
- `nac_any_finished` - if any of the tests finished in time
- `nac_{first|all}_coloring_no` - the number of NAC-colorings of the graph, for the first variant limited to 1
- `nac_{first|all}_mean_time` - the time required to find first/all NAC-colorings in milliseconds
- `nac_{first|all}_rounds` - the number of rounds used to run the benchmarks
- `nac_{first|all}_check_cycle_mask` - the number of cycle mask checks performed
- `nac_{first|all}_check_is_NAC` - the number of `IsNACColorng` checks performed
- `nac_{first|all}_merge` - the number of merges performed
- `nac_{first|all}_merge_no_common_vertex` - the number of merges with no common vertex (these are simple to compute, but produce large no of colorings slowing down the algorithm)

In [None]:
class Promising:
    RELABELING = [
        "none",
        # "random",
        # "bfs",
    ]
    SPLITTING = [
        "none",
        # "cycles_match_chunks",
        "neighbors",
        # "neighbors_degree",
        # "beam_neighbors",
    ]
    MERGING_OFFLINE = [
        "linear",
        # "score",
        # "shared_vertices",
        # "sorted_size",
    ]
    MERGING_ONLINE = [
        "linear",
        # "shared_vertices",
        # "sorted_size",
    ]
    SIZES = [4, 5] # [4, 5, 6, 7]

    strategies_offline = list(itertools.product(
        RELABELING, SPLITTING, MERGING_OFFLINE, SIZES,
    ))
    strategies_online = list(itertools.product(
        RELABELING, SPLITTING, MERGING_ONLINE, SIZES,
    ))
print(f"Offline strategies: {len(Promising.strategies_offline)}")
print(f"Online strategies:  {len(Promising.strategies_online)}")

In [None]:
# display(COLUMNS)

### File storage management

In [None]:
def new_DataFrame(data: List[MeasurementResult] = []) -> pd.DataFrame:
    return pd.DataFrame(
        [x.to_list() for x in data],
        columns=COLUMNS,
    )

def update_stored_data(dfs: List[pd.DataFrame] = [], head_loaded: bool = True) -> pd.DataFrame:
    df = load_records()
    if head_loaded:
        display(df)
    if len(dfs) != 0:
        df = pd.concat((df, pd.concat(dfs)))
    df = df.drop_duplicates(
        subset=["graph", "dataset", "split", "relabel", "merging", "subgraph_size", "use_smart_split", "used_monochromatic_classes"],
        keep='last',
    )
    store_results(df)
    return df

def migrate_v2_to_v3(dir = OUTPUT_DIR) -> pd.DataFrame:
    file_name_v2 = find_latest_record_file(_BENCH_FILE_START_V2, dir)
    path = os.path.join(dir, file_name_v2)
    df = pd.read_csv(path)
    df["use_smart_split"] = True
    df["used_monochromatic_classes"] = True
    df.loc[df["dataset"] == 'laman_random_no_smart_split', "use_smart_split"] = False
    df.loc[df["dataset"] == 'laman_random_no_smart_split', "dataset"] = 'laman_random'
    df = df[COLUMNS]
    store_results(df, None, dir)

def migrate_v3_to_v4(dir = OUTPUT_DIR) -> pd.DataFrame:
    file_name_v3 = find_latest_record_file(_BENCH_FILE_START_V3, dir)
    path = os.path.join(dir, file_name_v3)
    df = pd.read_csv(path)
    df["timestamp"] = datetime.datetime(1970, 1, 1)
    df["nac_first_merge"] = -1
    df["nac_first_merge_no_common_vertex"] = -1
    df["nac_all_merge"] = -1
    df["nac_all_merge_no_common_vertex"] = -1
    df = df[COLUMNS]
    store_results(df, None, dir)

### Running and recording benchmarks

In [None]:
def create_strategy(param: Tuple[str, str, str, int], use_smart_split: bool) -> Tuple[str, str]:
    relabel, split, merge, subgraph = param
    algo_name = "subgraphs-{}-{}-{}{}".format(
        merge, split, subgraph, "-smart" if use_smart_split else ""
    )
    return (relabel, algo_name)

In [None]:
def measure_for_graph_class(
    dataset_name: str,
    graphs: Iterable[nx.Graph],
    all_max_vertex_no: int,
    rounds:int,
    graph_timeout: int,
    use_smart_split: bool = True,
    use_monochromatic_classes: bool = True,
    df_seen: pd.DataFrame | None | Callable[[], pd.DataFrame] = load_records,
    save_every: int | None = 5*60,
    cycles_all_max_vertices: int = 20,
    cycles_first_max_vertices: int = 42,
) -> pd.DataFrame:
    """
    Runs benchmarks for the given graph class.

    Parameters:
        dataset_name: Name of the dataset stored in the output csv
        graphs: Iterable of graphs to benchmark
        all_max_vertex_no: Maximum vertex number to search for all NAC-colorings
        rounds: Number of rounds to run for each graph
        graph_timeout: Timeout for each graph in seconds
        use_monochromatic_classes: Whether to use monochromatic classes or tiriangle connected components
        df_seen: Dataframe with already measured data, so already tried graphs and strategies can be skipped
        save_every: save progress every number of seconds
    """
    if callable(df_seen):
        df_seen = df_seen()

    dataset_name = dataset_name.replace(" ", "_").lower()
    if df_seen is None:
        df_seen = new_DataFrame()
    df_seen = df_seen.query(f"dataset == '{dataset_name}'")

    results: List[MeasurementResult] = []
    all_results: List[MeasurementResult] = []

    last_save = time.time()

    # def safe_iterator(graphs: Iterable[nx.Graph])->Iterator[nx.Graph]:
    #     graphs = iter(graphs)
    #     failures = 0
    #     while failures < 128:
    #         try:
    #             graph = next(graphs, None)
    #         except nx.NetworkXError as e:
    #             print(e)
    #             failures += 1
    #             continue

    #         if graph is None:
    #             break
    #         yield graph
    # graphs = safe_iterator(graphs)

    for graph in tqdm(graphs):
        # this would be a functin if python would not have broken scoping
        if save_every is not None and len(results) > 0:
            now = time.time()
            if now - last_save > save_every:
                all_results.extend(results)
                df = new_DataFrame(results)
                update_stored_data([df], head_loaded=False)
                results = []
                last_save = now


        all_colorings = all_max_vertex_no >= graph.number_of_nodes()
        trianlge_classes = len(nac.find_monochromatic_classes(graph=graph, class_type=MonochromaticClassType.TRIANGLES)[1])
        monochromatic_classes = len(nac.find_monochromatic_classes(graph=graph, class_type=MonochromaticClassType.MONOCHROMATIC)[1])

        strategies = Promising.strategies_offline if all_colorings else Promising.strategies_online

        # add cycle strategy
        if (all_colorings and graph.number_of_nodes() < cycles_all_max_vertices) or (not all_colorings and graph.number_of_nodes() < cycles_first_max_vertices):
            strategies = itertools.chain(strategies, [None])

        graph_id = graph_to_id(graph)
        df_graph = df_seen.query(f"graph == '{graph_id}'")

        for strategy in strategies:
            # skip test that already run
            if strategy is not None:
                prev_record = df_graph.query(
                    f"relabel == '{strategy[0]}'"
                    + f" and split == '{strategy[1]}'"
                    + f" and merging == '{strategy[2]}'"
                    + f" and subgraph_size == {strategy[3]}"
                    + f" and use_smart_split == {use_smart_split}"
                    + f" and used_monochromatic_classes == {use_monochromatic_classes}"
                )
            else:
                prev_record = df_graph.query(
                    f"relabel == 'none'"
                    + f" and split == 'naive-cycles'"
                    + f" and merging == 'naive-cycles'"
                    + f" and subgraph_size == 0"
                    + f" and use_smart_split == {use_smart_split}"
                    + f" and used_monochromatic_classes == {use_monochromatic_classes}"
                )
            if len(prev_record) > 0:
                # ensureds graphs are recomputed if all_max_vertex_no is increased
                if graph.number_of_nodes() > all_max_vertex_no or list(prev_record["nac_all_mean_time"])[-1] > 0:
                    continue

            try:
                # print(strategy)
                search_res = nac_benchmark_core(
                    graph,
                    rounds=rounds,
                    first_only=not all_colorings,
                    strategy=create_strategy(strategy, use_smart_split=use_smart_split) if strategy else ("none", "cycles"),
                    use_monochromatic_classes=use_monochromatic_classes,
                    time_limit=graph_timeout,
                )

                relabel, split, merge, subgraph_size = strategy if strategy else ("none", "naive-cycles", "naive-cycles", 0)
                res = create_measurement_result(
                    graph=graph,
                    dataset_name=dataset_name,
                    trianlge_classes=trianlge_classes,
                    monochromatic_classes=monochromatic_classes,
                    nac_first=search_res.first,
                    nac_all=search_res.all,
                    relabel_strategy=relabel,
                    split_strategy=split,
                    merge_strategy=merge,
                    subgraph_size=subgraph_size,
                    use_smart_split=use_smart_split,
                    used_monochromatic_classes=use_monochromatic_classes,
                )
                results.append(res)
                # print(res.nac_first_mean_time)
            except Exception as e:
                print(f"Exception for strategy {strategy}: {e}")
                # raise e

    all_results.extend(results)
    if len(all_results) == 0:
        print("All runs skipped")

    if len(results) > 0:
        df = new_DataFrame(results)
        update_stored_data([df], head_loaded=False)

    df = new_DataFrame(all_results)
    df = df.sort_values(by=["nac_all_mean_time", "nac_first_mean_time"])
    return df

# Running benchmarks

### Testing

In [None]:
if False:
    df_test = measure_for_graph_class(
        "test",
        # [g for g in Graphs.laman if g.number_of_nodes() == 8][:8],
        [g for g in Graphs.laman_nauty if g.number_of_nodes() < 12][:32],
        # [g for g in Graphs.laman_deg_3_plus if g.number_of_nodes() == 8][:8],
        # [g for g in Graphs.sparse_graphs if g.number_of_nodes() == 13][:8],
        # Graphs.sparse_graphs,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
        df_seen=df_benchmarks,
        save_every=None,
    )

### Laman Nauty

In [None]:
if BENCHMARKS:
    df_laman = measure_for_graph_class(
        "Laman",
        Graphs.laman_nauty,
        all_max_vertex_no=15,
        rounds=3,
        graph_timeout=3,
    )

### Laman Random

In [None]:
if BENCHMARKS:
    df_laman_random = measure_for_graph_class(
        "Laman random",
        Graphs.laman_random,
        all_max_vertex_no=18,
        rounds=2,
        graph_timeout=3,
    )

### Laman deg 3+

In [None]:
if BENCHMARKS:
    df_laman_deg_3_plus = measure_for_graph_class(
        "Laman deg 3+",
        Graphs.laman_deg_3_plus,
        # All with 36 strtegies, 3 rounds
        #  8 - 1s/it
        #  9 - 1s/it
        # 10 - 2s/it
        # 11 - 7s/it
        # 12 - 15s/it -> ~20 mon. classes
        # First coloring with 27 strategies, 3 rounds
        # 15 - 5s/it
        # 16 - 5s/it
        # 17 - 90s/it
        all_max_vertex_no=12,
        rounds=3,
        graph_timeout=3,
    )

### No 3 nor 4 cycles

In [None]:
if BENCHMARKS:
    display(pd.Series([g.number_of_nodes() for g in Graphs.no_3_nor_4_cycles]).value_counts())
    df_no_3_nor_4_cycles = measure_for_graph_class(
        "No 3 nor 4 cycles",
        Graphs.no_3_nor_4_cycles,
        # 24 strategies
        # 10 - 5 s/it
        # 11 - 10 s/it
        # 12 - 28 s/it
        # 13 -
        all_max_vertex_no=0,
        rounds=2,
        graph_timeout=3,
    )
display(max(Graphs.no_3_nor_4_cycles, key=lambda g: g.number_of_nodes()).number_of_nodes())

### Line graphs of no 3 nor 4 cycles

In [None]:
if BENCHMARKS:
    line_graphs = [
        nx.convert_node_labels_to_integers(nx.line_graph(g))
        for g in Graphs.no_3_nor_4_cycles
        if max(deg for _, deg in g.degree) >= 3
    ]

    df_line_graphs_of_no_3_nor_4_cycles = measure_for_graph_class(
        "Line graph of no 3 nor 4 cycles",
        line_graphs,
        all_max_vertex_no=13,
        rounds=3,
        graph_timeout=3,
    )

### Globally rigid

In [None]:
if BENCHMARKS:
    df_benchmarks = load_records()
    globary_rigid_dataset = df_benchmarks.query("dataset == 'globally_rigid'")['graph'].unique()
    globary_rigid_dataset = list(graph_from_id(id) for id in globary_rigid_dataset)
    globary_rigid_dataset = pd.concat([globary_rigid_dataset, Graphs.globally_rigid,])

    measure_for_graph_class(
        "globally_rigid",
        globary_rigid_dataset,
        all_max_vertex_no=20,
        rounds=2,
        graph_timeout=5,
    )

### No NAC coloring

In [None]:
if BENCHMARKS:
    for name, df in {
        "no_NAC_coloring_generated_40": Graphs.no_NAC_coloring_generated_40,
        "no_NAC_coloring_generated_50": Graphs.no_NAC_coloring_generated_50,
        "no_NAC_coloring_generated_60": Graphs.no_NAC_coloring_generated_60,
        "no_NAC_coloring_generated_70": Graphs.no_NAC_coloring_generated_70,
        "no_NAC_coloring_generated_80": Graphs.no_NAC_coloring_generated_80,
        "no_NAC_coloring_generated_90": Graphs.no_NAC_coloring_generated_90,
        "no_NAC_coloring_generated_100": Graphs.no_NAC_coloring_generated_100,
        "no_NAC_coloring_generated_110": Graphs.no_NAC_coloring_generated_110,
        "no_NAC_coloring_generated_120": Graphs.no_NAC_coloring_generated_120,
        "no_NAC_coloring_generated_130": Graphs.no_NAC_coloring_generated_130,
    }.items():
        for smart_split in [False]:
        # for smart_split in [False, True]:
            print(name, smart_split)
            measure_for_graph_class(
                name,
                df[:500],
                all_max_vertex_no=0,
                rounds=2,
                graph_timeout=15,
                cycles_first_max_vertices=0,
                use_monochromatic_classes=False, # Most of the graphs have a single monochromatic class only -> it makes no sense for benchmarking
                use_smart_split=smart_split,
            )

### Gather graphs with no NAC-coloring

In [None]:
def export_graph_class(
    dataset_name: str,
    name: str,
):
    df_benchmarks = load_records()
    display(df_benchmarks["dataset"].unique())
    graphs = df_benchmarks.query(f"nac_any_finished == True and dataset == '{dataset_name}'")['graph'].drop_duplicates()
    output_dir = f"graphs_store/random"
    os.makedirs(output_dir, exist_ok=True)
    path = os.path.join(output_dir, f"{name}.g6")
    lines = 0
    with open(path, "wb") as f:
        for graph in graphs:
            lines += 1
            graph = graph_from_id(graph)
            # graph = nx.graph6.from_graph6_bytes(graph)
            graph = nx.graph6.to_graph6_bytes(graph, header=False)
            f.write(graph)
    print(lines)

def export_sparse_graphs_with_few_colorings():
    graphs = [g for g, _ in zip(dataset.generate_NAC_critical_graphs(30, 60, seed=None), range(2000))]
    output_dir = f"graphs_store/random"
    os.makedirs(output_dir, exist_ok=True)
    name = "test"
    path = os.path.join(output_dir, f"{name}.g6")
    lines = 0
    with open(path, "wb") as f:
        for graph in graphs:
            lines += 1
            # graph = graph_from_id(graph)
            # graph = nx.graph6.from_graph6_bytes(graph)
            graph = nx.graph6.to_graph6_bytes(graph, header=False)
            f.write(graph)
    print(lines)

def export_no_NAC_coloring():
    df_benchmarks = load_records()
    df = df_benchmarks.query("nac_any_finished == True and nac_first_coloring_no == 0 and triangle_components_no > 1")
    print(f"Export from groups: {df["dataset"].unique()}")
    graphs = df['graph'].drop_duplicates()

    output_dir = f"graphs_store/extracted"
    os.makedirs(output_dir, exist_ok=True)
    name = "no_nac_coloring"
    path = os.path.join(output_dir, f"{name}.g6")
    lines = 0
    with open(path, "wb") as f:
        for graph in graphs:
            lines += 1
            graph = graph_from_id(graph)
            # graph = nx.graph6.from_graph6_bytes(graph)
            graph = nx.graph6.to_graph6_bytes(graph, header=False)
            f.write(graph)
    print(lines)

# export_no_NAC_coloring()

# Analytics

Base graphs show the time required to find
a first/all NAC coloring based on vertex no./monochromatic classes no.
First graphs are separated for each class of graphs and
in the end for all the classes combined.
Graphs are drawn for each strategy cathegory to compare them easily.
Graphs show mean, median and 1st quartil values of running times to lower bias.

Second group of graphs shows our contribution of decresing
the number of `is_NAC_coloring` checks called compared to
the naive approach without or with triangle/monochromatic classes.

In [None]:
enable_latex_output()

In [None]:
df_analytics_loaded = load_records()
df_analytics_loaded.set_index("graph", inplace=True)
df_analytics_loaded = df_analytics_loaded.query("dataset != 'test'")
display(df_analytics_loaded.columns)
display(list(df_analytics_loaded["dataset"].unique()))
display(list(df_analytics_loaded["relabel"].unique()))
display(list(df_analytics_loaded["split"].unique()))
display(list(df_analytics_loaded["merging"].unique()))

# Transform
df_analytics_loaded = df_analytics_loaded.assign(split_merging=lambda x: (x["split"] + " & " + x["merging"]).str.replace("naive-cycles & naive-cycles", "naive cycles").str.replace("&", r"\&").str.replace("_", r"\_"))
df_analytics_loaded = df_analytics_loaded.assign(split_merging_smart=lambda x: x["split_merging"] + " & " + x["use_smart_split"].astype(str))
df_analytics_loaded.sort_values(by="split", inplace=True, kind="stable") # to make graph colors more regular
df_analytics_loaded.sort_values(by="merging", inplace=True, kind="stable")

In [None]:
# Preserver the original data
df_analytics = df_analytics_loaded

# Filter out trivial graphs
df_analytics = df_analytics.query("(monochromatic_classes_no > 1 and used_monochromatic_classes == True) or (triangle_components_no > 1 and used_monochromatic_classes == False)")

# Filter bad strategies
df_with_failing = df_analytics
df_analytics = df_analytics.query("split == 'naive-cycles' or split == 'none' or split == 'neighbors' or split == 'neighbors_degree' or split=='cycles_match_chunks'")
df_analytics = df_analytics.query("merging == 'naive-cycles' or merging == 'linear' or merging == 'shared_vertices'") #  or merging== 'score'
# df_analytics = df_analytics.query("split != 'naive-cycles' and split != 'neighbors_degree' and split!='cycles_match_chunks'")
# df_analytics = df_analytics.query("merging != 'min_max' and merging != 'sorted_bits'")

# Graphs with no NAC coloring and more triangle connected components
df_analytics_no_nac = df_analytics.query("nac_first_coloring_no == 0 and triangle_components_no > 1 and used_monochromatic_classes == False")

# Statistics
def analyze_general(df: pd.DataFrame) -> None:
    print(f"Total runs: {len(df)}", )
    print(f"Total graphs: {len(df.index.unique())}")
    df_finished = df.query("nac_any_finished == True")
    df_failed = df.query("nac_any_finished == False")
    print(f"Runs that did/not/finish: {len(df_finished)}/{len(df_failed)}/{len(df)} ({np.round(len(df_failed)/len(df)*100, 1)}% did not finish)")
    print(f"Graphs where some runs did/not/finish: {df_finished.index.nunique()}/{df_failed.index.nunique()}/{df.index.nunique()}")

def analyze_colorings(df: pd.DataFrame) -> None:
    df_finished = df.query("nac_any_finished == True")
    print(f"Graphs with  a NAC-coloring:", df_finished.query("nac_first_coloring_no  > 0").index.nunique())
    print(f"Graphs with no NAC-coloring:", df_finished.query("nac_first_coloring_no == 0").index.nunique())
    print(f"Graphs with no NAC-coloring and more monochromatic classes:", df_finished.query("nac_first_coloring_no == 0 and monochromatic_classes_no > 1").index.nunique())

def analyze_finished(df: pd.DataFrame) -> None:
    graphs = df.index.unique()
    graphs_all_finished = filter_graphs_that_finished_for_all_strategies(df)
    graphs_nonnaive_finished = filter_graphs_that_finished_for_all_strategies(df.query("split != 'naive-cycles'"))
    print(f"{len(graphs_all_finished)}/{len(graphs)} graphs finished on all tested strategies.")
    print(f"{len(graphs_nonnaive_finished)}/{len(graphs)} graphs finished on all tested strategies excluding naive cycles.")

    df_all_finished = df.loc[graphs_all_finished]
    df_nonnaive_finished = df.loc[graphs_nonnaive_finished]
    print(f"Records corresponding to graph, that finished on all tested strategies: {len(df_all_finished)}")
    print(f"Records corresponding to graph, that finished on all tested strategies excluding naive cycles: {len(df_nonnaive_finished)}")


print("All together:")
analyze_general(df_with_failing)
print()

print("Just preffered strategies:")
analyze_general(df_analytics)
analyze_colorings(df_analytics)
print()

analyze_finished(df_analytics)

df_analytics = replace_failed_results(df_analytics)

## Graph classes

In [None]:
for dataset in ["globally_rigid", "minimally_rigid_random", "no_3_nor_4_cycles", "no_nac_coloring_generated_40"]:
    fig = plot_monochromatic_vs_triangle(df_analytics, dataset=dataset)
    display(fig)
    export_monochromatic_vs_triangle(fig, dataset)

### Random minimally rigid graphs Graphs

In [None]:
if ANALYTICS and False:
    title = 'Laman random'
    dataset_name = 'laman_random'
    dataset = finished_graphs(df_analytics.query(f"dataset == '{dataset_name}'"))
    figs = [fig for fig in plot_frame(title, dataset)]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

In [None]:
if ANALYTICS:
    title = 'Minimally rigid'
    dataset_name = 'minimally_rigid_random'
    dataset = drop_outliers(finished_graphs(df_analytics.query(f"dataset == '{dataset_name}'")))
    figs = [fig for fig in plot_frame(title, dataset, ops_value_columns_sets=[["nac_first_mean_time"], ["nac_first_check_cycle_mask"]])]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

    dataset = drop_outliers(finished_graphs(df_analytics.query(f"dataset == '{dataset_name}' and split != 'cycles_match_chunks'")))
    figs = [fig for fig in plot_frame(title, dataset, ops_value_columns_sets=[["nac_all_mean_time"], ["nac_all_check_cycle_mask"]])]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

In [None]:
if ANALYTICS and False: # Not enough data yet
    [display(fig) for fig in plot_frame("Laman deg 3+", df_analytics.query("dataset == 'laman_deg_3+'"))]

### No 3 nor 4 cycles

In [None]:
if ANALYTICS:
    title = 'No 3 nor 4 cycles'
    dataset_name = 'no_3_nor_4_cycles'

    dataset = drop_outliers(finished_graphs_no_naive(df_analytics.query(f"dataset == '{dataset_name}' and not (split == 'none' and merging=='shared_vertices' and vertex_no >= 50)")))
    figs = [fig for fig in plot_frame(title, dataset, Columns.first)]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

    dataset = drop_outliers(finished_graphs_no_naive(df_analytics.query(f"dataset == '{dataset_name}'")))
    figs = [fig for fig in plot_frame(title, dataset, Columns.all)]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

In [None]:
if ANALYTICS and False: # Not enough data yet
    [display(fig) for fig in plot_frame("Line graphs of no 3 nor 4 cycles", df_analytics.query("dataset == 'line_graph_of_no_3_nor_4_cycles'"))]

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Sparse", df_analytics.query("dataset == 'sparse'"))]

In [None]:
# Stores weird data - noisy and changing in unpredictible ways
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("Few colorings - None", df_analytics.query("dataset == 'few_colorings'"))]

### Globally Rigid

In [None]:
if ANALYTICS:
    title = 'Globally rigid'
    dataset_name = 'globally_rigid'

    dataset = drop_outliers(finished_graphs_no_naive(df_analytics.query(f"dataset == '{dataset_name}'  and subgraph_size != 4")))
    figs = [fig for fig in plot_frame(title, dataset,
        ops_value_columns_sets=Columns.first,
        # ops_based_on=["subgraph_size"],
    )]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

    dataset = drop_outliers(finished_graphs_no_naive(df_analytics.query(f"dataset == '{dataset_name}'")))
    figs = [fig for fig in plot_frame(title, dataset,
        ops_value_columns_sets=Columns.all,
        ops_x_column=[Columns.MONOCHROMATIC_CLASSES_NO],
    )]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

In [None]:
if ANALYTICS and False:
    [display(fig) for fig in plot_frame("No NAC-coloring gathered, ▵-components", df_analytics_no_nac, ops_x_column = ["vertex_no", "triangle_components_no",],)]

### No NAC-coloring - generated

In [None]:
def query_no_nac_coloring_generated(base: pd.DataFrame) -> pd.DataFrame:
    base = base.reset_index(inplace=False)
    base = base.query("split != 'naive-cycles'")
    base_40 = base.query("dataset == 'no_nac_coloring_generated_40'")
    base_50 = base.query("dataset == 'no_nac_coloring_generated_50'")
    base_60 = base.query("dataset == 'no_nac_coloring_generated_60'")
    base_70 = base.query("dataset == 'no_nac_coloring_generated_70'")
    base_80 = base.query("dataset == 'no_nac_coloring_generated_80'")
    base_90 = base.query("dataset == 'no_nac_coloring_generated_90'")
    base_100 = base.query("dataset == 'no_nac_coloring_generated_100'")
    base_110 = base.query("dataset == 'no_nac_coloring_generated_110'")
    base_120 = base.query("dataset == 'no_nac_coloring_generated_120'")
    base_130 = base.query("dataset == 'no_nac_coloring_generated_130'")
    # df = base_40
    df = pd.concat([base_40, base_50, base_60, base_70, base_80, base_90, base_100, base_110, base_120, base_130], ignore_index=True)
    df.set_index("graph", inplace=True)
    return df

In [None]:
if ANALYTICS or True:
    base = df_analytics_no_nac
    df = query_no_nac_coloring_generated(base)
    df = drop_outliers(finished_graphs_no_naive(df))
    title = 'No NAC-coloring, ▵-connected components'
    dataset_name = 'no_nac_coloring_generated'

    figs = [fig for fig in plot_frame(
        title,
        df.query("subgraph_size==4 or subgraph_size==5"),
        ops_x_column=[ "vertex_no","triangle_components_no",],
        ops_based_on=["split_merging"],
        filter_out_exhaustive_mergin_strategies_for_first=False,
    )]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

    figs = [fig for fig in plot_frame(
        title,
        df.query("vertex_no<60"),
        ops_x_column=[ "vertex_no","triangle_components_no",],
        ops_based_on=["subgraph_size"],
        filter_out_exhaustive_mergin_strategies_for_first=False,
    )]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)

### Failing 1

In [None]:
if ANALYTICS:
    title = 'Minimally rigid'
    # dataset_name = 'laman_random'
    dataset_name = 'minimally_rigid_random'

    dataset = drop_outliers(finished_graphs_no_naive(df_with_failing.query(f"dataset == '{dataset_name}' and split == 'neighbors' and merging != 'score'")))
    figs = [fig for fig in plot_frame( title, dataset)]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name + "_failing_merging", figs)

    dataset = drop_outliers(finished_graphs_no_naive(df_with_failing.query(f"dataset == '{dataset_name}' and merging == 'linear'")))
    figs = [fig for fig in plot_frame( title,dataset)]
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name + "_failing_split", figs)

### Failing 2

In [None]:
if ANALYTICS:
    base = df_with_failing.query("subgraph_size==4 or subgraph_size==5")
    df = query_no_nac_coloring_generated(base)
    title = 'No NAC-coloring'
    cut_at = 70

    dataset = drop_outliers(finished_graphs_no_naive(df.query(f"split == 'neighbors' and vertex_no < {cut_at}")))
    figs = [fig for fig in plot_frame(
        title, dataset,
        ops_x_column=[ "triangle_components_no",], # "vertex_no"
        filter_out_exhaustive_mergin_strategies_for_first=False,
    )]
    [display(fig) for fig in figs]
    export_standard_figure_list("no_nac_coloring_generated_failing_merging", figs)

    dataset = drop_outliers(finished_graphs_no_naive(df.query(f"merging == 'linear' and vertex_no < {cut_at}")))
    figs = [fig for fig in plot_frame(
        title, dataset,
        ops_x_column=[ "triangle_components_no",], # "vertex_no"
        filter_out_exhaustive_mergin_strategies_for_first=False,
    )]
    [display(fig) for fig in figs]
    export_standard_figure_list("no_nac_coloring_generated_failing_split", figs)

## Relative number of checks performed

In [None]:
if ANALYTICS:
    figs = [fig for fig in plot_is_NAC_coloring_calls(df_analytics.query("(split != 'naive-cycles') and (used_monochromatic_classes==True)"))]
    title = 'All datasets'
    dataset_name = 'check-comparision'
    [display(fig) for fig in figs]
    export_standard_figure_list(dataset_name, figs)
    # [export_figure_impl(fig, dataset_name, "first", group_by, "reduction", "checks", "mean+median") for fig, group_by in zip(figs, ["vertices", "monochromatic"])]

# Comparisson with naive approach

In this section of benchmarks we run our algorithm on all the Laman graphs of specified size.
The goal is to show the performance improvement over the previous SOTA - naive approach.
For clarity and simplicity we use only a single strategy - `neighbors_degree` with `linear` merging.

In [None]:
def find_colorings_on_all_graphs(
    dataset_name: str,
    graphs: Callable[[], Iterable[nx.Graph]],
    vertex_no: int,
    algorithm: str = "subgraphs-linear-neighbors_degree-4-smart",
    DIR: str = os.path.join("benchmarks", "results", "iteration"),
    time_log: str | None =  None
):
    os.makedirs(DIR, exist_ok=True)
    time_log = time_log or os.path.join(DIR, "log.csv")

    stats = defaultdict(int)
    rand = random.Random(42)

    # print(f"Running {algorithm} on {len(graphs)} graphs with {vertex_no} vertices")
    print(f"Running {algorithm} on graphs with {vertex_no} vertices")
    graphs = graphs()
    start = time.time()
    for graph in tqdm(graphs):
        iterable = nac.NAC_colorings(
            graph,
            algorithm=algorithm,
            relabel_strategy="none",
            use_decompositions=False,
            use_has_coloring_check=False,
            seed=rand.randint(0, 2**32 - 1),
        )
        counter = itertools.count()
        deque(zip(iterable, counter), maxlen=0)
        coloring_no = next(counter) // 2
        stats[coloring_no] += 1
    end = time.time()

    print(f"The operation took {int(1_000_000*(end-start))}s")
    with open(time_log, "a") as f:
        print(f"{vertex_no},{algorithm},{int(1000*(end-start))}", file=f, flush=True)

    data = np.array(list(stats.items()))
    df = pd.DataFrame(data, columns=["coloring_cnt", "graph_cnt"])
    df.sort_values(by="coloring_cnt", inplace=True)
    df["graph_cnt"]
    print(
        f"Most colorings: {tuple(df.iloc[-1])}, Most common: {tuple(df.loc[df["graph_cnt"].idxmax()])} (coloring_cnt, graph_cnt)"
    )
    # print(df.tail(n=50))

    df.to_csv(os.path.join(DIR, f"{dataset_name}_{vertex_no}_{algorithm}.csv"))

In [None]:

# dataset = list(dataset.load_laman_degree_3_plus_all(vertex_no))

if False:
    for n in list(range(7, 12 + 1)):
        RUN_AS_BENCHMARK_NOT_JUST_FOR_THE_STATS = False
        if RUN_AS_BENCHMARK_NOT_JUST_FOR_THE_STATS:
            # TODO improve so the dataset is not loaded every time
            graphs = lambda: list(dataset.load_laman_all(n))
        else:
            graphs = lambda: dataset.load_laman_all(n)

        find_colorings_on_all_graphs("laman", graphs, n,)
        find_colorings_on_all_graphs("laman", graphs, n, "naive") if n <= 10 else None