# Imports

In [32]:
from __future__ import annotations
import contextlib
import json
import re
from collections import namedtuple
from functools import reduce
from pathlib import Path
from typing import Dict, Iterable, List, Optional, Tuple, Union

import matplotlib as mpl
import matplotlib.ticker as plticker
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from tqdm.notebook import tqdm

# Config

In [2]:
# Instances are considered "hard" if solving them takes >= MIN_HARD_SECS seconds
MIN_HARD_SECS = 1

# Timeout for all runs, in seconds
EXPERIMENT_TIMEOUT = 24 * 60**2

random_instance_regex = re.compile(r'p\d+_\d+\.dat')

# Consolidate results

Generates combined files that make executing this notebook a 2nd/3rd/... time much faster

## Combine findminhs JSON files

In [3]:
result_dirs = [d for d in Path('results').glob('*') if d.is_dir()]
for results_dir in tqdm(result_dirs, desc='Combining experiment json files'):
    data = [json.loads(f.read_text()) for f in results_dir.glob('**/*.json')]
    with Path(f'results/{results_dir.name}.json').open('w') as f:
        json.dump(data, f)

Combining experiment json files:   0%|          | 0/21 [00:00<?, ?it/s]

## Generate `instance-sizes.json`

In [4]:
def get_instance_size(p: Path) -> int:
    with p.open() as f:
        # Skip initial line containing node and edge count
        next(f)
        return sum(int(line.split(maxsplit=1)[0]) for line in f)


instances = list(Path('instances').glob('*.dat'))
instance_sizes = {
    f.name: get_instance_size(f)
    for f in tqdm(instances, desc='Determining instances size')
}
with open('instance-sizes.json', 'w') as f:
    json.dump(instance_sizes, f)

Determining instances size:   0%|          | 0/4256 [00:00<?, ?it/s]

# Load data

## Instance info

In [5]:
all_instance_names = [f.name for f in Path('instances').glob('*.dat')]
with open('instance-sizes.json') as f:
    instance_size = json.load(f)

## Findminhs results

In [6]:
def load_combined_json(path: Path) -> pd.DataFrame:
    with path.open() as f:
        data = json.load(f)
    df = pd.json_normalize(data, sep='__').rename(
        columns=lambda col: col[2:] if col.startswith('__') else col)
    df.set_index('file_name', inplace=True)
    return df.reindex(all_instance_names)


json_files = list(Path('results').glob('*.json'))
dfs_by_experiment = {
    f.stem: load_combined_json(f)
    for f in tqdm(json_files, desc='Loading experiment data')
}

Loading experiment data:   0%|          | 0/21 [00:00<?, ?it/s]

## Gurobi results

In [7]:
# Gurobi log parsing
GUROBI_REGEXES = [
    re.compile(regex, re.MULTILINE) for regex in (
        r'^Presolve time: (.*)s$',
        r'^Root relaxation: .*?, .*? iterations, (.*?) seconds$',
        r'^Explored .*? nodes \(.*? simplex iterations\) in (.*?) seconds$',
    )
]


def gurobi_parse_time(log_file: Path) -> Optional[float]:
    text = Path(log_file).read_text()
    matches = [regex.search(text) for regex in GUROBI_REGEXES]
    # Solving finished if the last message matched by the last regex appears
    if matches[-1] is None:
        return None
    return sum(float(m[1]) for m in matches if m is not None)


def load_gurobi_runtimes(directory: Path) -> pd.Series:
    logs = list(directory.glob('*.log'))
    runtimes = {(f.stem + '.dat'): gurobi_parse_time(f)
                for f in tqdm(logs, desc='Analysing gurobi logs')}
    series = pd.Series(runtimes, dtype=np.float64)
    return series.reindex(all_instance_names)


gurobi_df = load_gurobi_runtimes(Path('gurobi-logs'))

Analysing gurobi logs:   0%|          | 0/4256 [00:00<?, ?it/s]

## Reduced Gurobi results

In [8]:
def load_log(p: Path) -> dict:
    with p.open() as f:
        data = json.load(f)
    data['file_name'] = p.with_suffix('.dat').name
    return data


reduction_logs = list(Path('reduction-logs').glob('*.json'))
logs = [
    load_log(p) for p in tqdm(reduction_logs, desc='Loading reduction logs')
]
gurobi_reduced_df = pd.DataFrame(logs)
gurobi_reduced_df.set_index('file_name', inplace=True)
gurobi_reduced_df = gurobi_reduced_df.reindex(all_instance_names)
gurobi_reduced_df.runtime += load_gurobi_runtimes(Path('gurobi-reduced-logs'))

vs_nonreduced = gurobi_reduced_df.runtime / gurobi_df
vs_nonreduced = vs_nonreduced[vs_nonreduced.index.map(
    lambda name: gurobi_df.loc[name] >= MIN_HARD_SECS)]
improved = len(vs_nonreduced[vs_nonreduced < 0.5])
deteriorated = len(vs_nonreduced[vs_nonreduced > 2.0])
print(f'{improved} better by at least 2x, {deteriorated} worse by at least 2x')

Loading reduction logs:   0%|          | 0/4256 [00:00<?, ?it/s]

Analysing gurobi logs:   0%|          | 0/4256 [00:00<?, ?it/s]

40 better by at least 2x, 10 worse by at least 2x


## Instance classification (easy/hard)

In [9]:
# Hard instance for solver: required >= MIN_HARD_SECS seconds to finish
# Easy: not hard

df = dfs_by_experiment['default']
easy_instances_findminhs = set(df[df.runtimes__total < MIN_HARD_SECS].index)
easy_instances_gurobi = set(gurobi_df[gurobi_df < MIN_HARD_SECS].index)
easy_instances_both = easy_instances_findminhs & easy_instances_gurobi
hard_instances_findminhs = set(all_instance_names) - easy_instances_findminhs

num_hard_finished_findminhs = len(df[df.runtimes__total.notna()
                                     & (df.runtimes__total >= MIN_HARD_SECS)])
num_hard_finished_gurobi = len(gurobi_df[gurobi_df.notna()
                                         & (gurobi_df >= MIN_HARD_SECS)])
unfinished_findminhs = set(df[df.runtimes__total.isna()].index)
unfinished_gurobi = set(gurobi_df[gurobi_df.isna()].index)

print(f'Hard for findminhs: {len(hard_instances_findminhs)}')
print(f'Hard & finished findminhs: {num_hard_finished_findminhs}')
print('Hard for gurobi: '
      f'{len(all_instance_names) - len(easy_instances_gurobi)}')
print(f'Hard & finished gurobi: {num_hard_finished_gurobi}')
print()
print('Hard only for findminhs: '
      f'{len(easy_instances_gurobi - easy_instances_findminhs)}')
print('Hard only for gurobi: '
      f'{len(easy_instances_findminhs - easy_instances_gurobi)}')
print()
print('Finished only for findminhs: '
      f'{len(unfinished_gurobi - unfinished_findminhs)}')
print('Finished only for gurobi: '
      f'{len(unfinished_findminhs - unfinished_gurobi)}')


def get_df_hard_finished(name: str) -> pd.DataFrame:
    """
    Returns a data frame for an experiment, containing only the instances
    that were hard for the default setting and finished in this experiment.
    """
    df = dfs_by_experiment[name]
    return df[df.index.isin(hard_instances_findminhs)].dropna()


def get_dfs_hard_finished(names: Iterable[str]) -> Dict[pd.DataFrame]:
    """
    Returns data frames for multiple experiments, containing only the
    instances that were hard for the default setting and finished in
    all of the given experiments.

    A message is printed listing how many instances were removed since
    they didn't finish in all of the given experiments.
    """
    dfs = {name: get_df_hard_finished(name) for name in names}
    indices = [df.index for df in dfs.values()]
    index_intersection = reduce(lambda i1, i2: i1.intersection(i2), indices)
    index_union = reduce(lambda i1, i2: i1.union(i2), indices)
    dropped = set(index_union) - set(index_intersection)
    if dropped:
        print(
            f'Dropping {len(dropped)} instances since they did not finish in '
            'all experiments')
        print(f'Dropped instances: {dropped}')
    else:
        print('No instances were dropped (all finished in all experiments)')
    return {
        name: df[df.index.isin(index_intersection)].copy()
        for name, df in dfs.items()
    }


Hard for findminhs: 142
Hard & finished findminhs: 136
Hard for gurobi: 300
Hard & finished gurobi: 288

Hard only for findminhs: 0
Hard only for gurobi: 158

Finished only for findminhs: 8
Finished only for gurobi: 2


# Statistics

In [10]:
df = dfs_by_experiment['default'].copy()
df = df[df.runtimes__total.fillna(EXPERIMENT_TIMEOUT) >= 1]
df = df.join(gurobi_reduced_df)
df['runtime_gurobi'] = df.runtime
df = df[df.runtime_gurobi.notna() | df.runtimes__total.notna()]
df = df.fillna(EXPERIMENT_TIMEOUT)
df['speedup'] = df.runtime_gurobi / df.runtimes__total
df['slowdown'] = 1 / df.speedup

is_random_instance = df.index.str.match(random_instance_regex)

print(f'{df[is_random_instance & (df.runtime_gurobi >= 30 * 60)].speedup.min()=}')
print(f'{df[is_random_instance & (df.runtime_gurobi < 30 * 60)].speedup.mean()=}')
print(f'{np.mean([x - 1 if x > 1 else -(1/x - 1) for x in df[is_random_instance & (df.runtime_gurobi < 30 * 60)].speedup])=}')

print(f'{len(df[df.speedup > 1])=}')
print(f'{len(df[df.speedup < 1])=}')

print(f'{len(df[~is_random_instance & (df.speedup > 1)])=}')
print(f'{len(df[~is_random_instance & (df.speedup < 1)])=}')

df[is_random_instance & (df.runtime_gurobi >= 30 * 60)].speedup.min()=1.3440892894899874
df[is_random_instance & (df.runtime_gurobi < 30 * 60)].speedup.mean()=0.8999855183603095
np.mean([x - 1 if x > 1 else -(1/x - 1) for x in df[is_random_instance & (df.runtime_gurobi < 30 * 60)].speedup])=-0.5700824057200232
len(df[df.speedup > 1])=95
len(df[df.speedup < 1])=43
len(df[~is_random_instance & (df.speedup > 1)])=68
len(df[~is_random_instance & (df.speedup < 1)])=12


In [11]:
df = dfs_by_experiment['default'].copy()
df = df.join(gurobi_reduced_df.runtime.rename('runtime_gurobi'))
df = df.fillna(EXPERIMENT_TIMEOUT)
df['speedup'] = df.runtime_gurobi / df.runtimes__total

is_random_instance = df.index.str.match(random_instance_regex)

print(f'{len(df[(df.runtimes__total > 0.01) & (df.speedup > 1)])=}')
print(f'{len(df[(df.runtimes__total > 0.01) & (df.speedup < 1)])=}')

print(f'{len(df[~is_random_instance & (df.runtimes__total > 0.01) & (df.speedup > 1)])=}')
print(f'{len(df[~is_random_instance & (df.runtimes__total > 0.01) & (df.speedup < 1)])=}')

len(df[(df.runtimes__total > 0.01) & (df.speedup > 1)])=332
len(df[(df.runtimes__total > 0.01) & (df.speedup < 1)])=56
len(df[~is_random_instance & (df.runtimes__total > 0.01) & (df.speedup > 1)])=297
len(df[~is_random_instance & (df.runtimes__total > 0.01) & (df.speedup < 1)])=24


# Plotting general

## Config

In [12]:
WIDTH_1COL = 3.335
WIDTH_2COL = 6.808


# Settings for separating random and non-random instances
class RndSettings(namedtuple('RndSettings', ['label', 'color', 'marker'])):
    @property
    def as_scatter(self) -> dict:
        return {'c': self.color, 'label': self.label, 'marker': self.marker}


RND_NONRND_SETTINGS = {
    False: RndSettings(r'\texttt{appl}', 'C0', 'o'),
    True: RndSettings(r'\texttt{rnd}', 'C1', 's'),
}

# Three different styles, by level of highlighting
BACKGROUND_LINE_SETTINGS = [{
    'color': 'black',
    'linewidth': 0.75,
    'zorder': -1,
}, {
    'color': 'gray',
    'linewidth': 0.75,
    'zorder': -1,
}, {
    'color': 'gray',
    'linewidth': 0.5,
    'zorder': -1,
    'linestyle': (0, (5, 10)),
}]

mpl.rc('font', family='serif', serif='Computer Modern')
mpl.rc('text', usetex=True)
plt.rcParams.update({
    'text.latex.preamble': r'''
        \usepackage{amsmath}
        \usepackage{xfrac}
    ''',
    'legend.fontsize': 'small',
})
plt.style.use('seaborn-colorblind')

## Utilities

In [33]:
random_instance_regex = re.compile(r'p\d+_\d+\.dat')


@contextlib.contextmanager
def make_plot(
    name: str, figsize: Tuple[int, int], **subplot_args
) -> Iterable[Tuple[plt.Figure, Union[plt.Axes, np.ndarray[plt.Axes]]]]:
    fig = plt.figure(figsize=figsize)
    ax = fig.subplots(**subplot_args)
    yield fig, ax
    fig.tight_layout()
    Path('plots').mkdir(exist_ok=True)
    fig.savefig(f'plots/{name}.pdf')
    plt.close('all')


def root_lower_bounds(df: pd.DataFrame) -> pd.Series:
    lower_bound_names = [
        'max_degree', 'sum_degree', 'efficiency', 'packing', 'sum_over_packing'
    ]
    return reduce(np.maximum,
                  (df[f'root_bounds__{name}'] for name in lower_bound_names))


def rnd_and_nonrnd(
        index: pd.Index) -> Iterable[Tuple[np.ndarray, RndSettings]]:
    is_random_instance = index.str.match(random_instance_regex)
    yield ~is_random_instance, RND_NONRND_SETTINGS[False]
    yield is_random_instance, RND_NONRND_SETTINGS[True]

# Scatter plots

## Config

In [14]:
def scatter_settings(hollow: bool = False) -> dict:
    settings = {
        # Roughly account for size increase when keeping outlines
        's': (14 if hollow else 20),
        'alpha': 0.7
    }
    if not hollow:
        settings['linewidths'] = 0
    return settings

## Runtime comparison with Gurobi

In [15]:
def runtime_vs_runtime_plot(name: str, df: pd.DataFrame, xcol: str, ycol: str,
                            xlabel: str, ylabel: str) -> None:
    # Gurobis log entries only have two digits of precision.
    # Therefore (and since the low values are not that interesting anyways)
    # clip all values below 0.01 to 0.01.
    x = df[xcol].fillna(EXPERIMENT_TIMEOUT).clip(lower=0.01)
    y = df[ycol].fillna(EXPERIMENT_TIMEOUT).clip(lower=0.01)

    with make_plot(name, (WIDTH_1COL, WIDTH_1COL)) as (_, ax):
        is_timeout = df[xcol].isna() | df[ycol].isna()
        for rnd_filt, settings in rnd_and_nonrnd(df.index):
            for tle in [False, True]:
                filt = rnd_filt & (is_timeout if tle else ~is_timeout)
                label = settings.label
                if tle:
                    label += ' (timeout)'
                ax.scatter(x[filt],
                           y[filt],
                           label=label,
                           facecolors=('none' if tle else settings.color),
                           edgecolors=settings.color,
                           marker=settings.marker,
                           **scatter_settings(hollow=tle))

        ax.legend()
        ax.set_xscale('log')
        ax.set_yscale('log')
        ax.set_aspect('equal')

        mn = 0.01
        mx = max(ax.get_xlim()[1], ax.get_ylim()[1])
        ax.set_xlim((mn, mx))
        ax.set_ylim((mn, mx))

        ax.yaxis.set_major_locator(ax.xaxis.get_major_locator())
        ax.yaxis.set_minor_locator(ax.xaxis.get_minor_locator())

        ax.axline((mn, mn), (mx, mx), **BACKGROUND_LINE_SETTINGS[0])
        for i in range(1, 10):
            ax.axline((mn, mn * 10**i), (mx, mx * 10**i),
                      **BACKGROUND_LINE_SETTINGS[2])
            ax.axline((mn * 10**i, mn), (mx * 10**i, mx),
                      **BACKGROUND_LINE_SETTINGS[2])

        ax.set_xlabel(xlabel)
        ax.set_ylabel(ylabel)


df = dfs_by_experiment['default'].copy()
runtime_vs_runtime_plot('runtime-vs-gurobi',
                        df.join(gurobi_df.rename('runtime_gurobi')),
                        'runtime_gurobi', 'runtimes__total',
                        'Runtime Gurobi (s)', 'Runtime (s)')

runtime_vs_runtime_plot(
    'runtime-vs-gurobi-reduced',
    df.join(gurobi_reduced_df.runtime.rename('runtime_gurobi')),
    'runtime_gurobi', 'runtimes__total', 'Runtime Gurobi reduced (s)',
    'Runtime (s)')

df_gurobi = gurobi_reduced_df.join(gurobi_df.rename('runtime'),
                                   lsuffix='_reduced')
runtime_vs_runtime_plot('gurobi-vs-gurobi-reduced', df_gurobi, 'runtime',
                        'runtime_reduced', 'Runtime Gurobi (s)',
                        'Runtime Gurobi reduced (s)')

## Search space by bound

In [16]:
df = get_df_hard_finished('default')
lower = root_lower_bounds(df)
gap = df.root_bounds__greedy_upper - lower

with make_plot('search-space-bound-gap', (WIDTH_1COL, WIDTH_1COL)) as (_, ax):
    for filt, settings in rnd_and_nonrnd(df.index):
        ax.scatter(gap[filt], df[filt].branching_steps, **settings.as_scatter,
                   **scatter_settings())

    ax.legend()
    ax.set_yscale('symlog')

    ax.set_xlabel(r'Bound gap ($\text{upper} - \text{lower}$)')
    ax.set_ylabel(r'Search space (\#branching steps)')

## Search space by instance size

In [17]:
df = get_df_hard_finished('default')
sizes = df.index.map(instance_size)

with make_plot('search-space-instance-size',
               (WIDTH_1COL, WIDTH_1COL)) as (_, ax):
    for filt, settings in rnd_and_nonrnd(df.index):
        ax.scatter(sizes[filt], df[filt].branching_steps,
                   **settings.as_scatter, **scatter_settings())

    ax.legend()
    ax.set_yscale('symlog')
    ax.set_xscale('log')

    ax.set_xlabel(r'$\lVert \mathcal{F} \rVert$')
    ax.set_ylabel(r'Search space (\#branching steps)')

## Search space (combined plot)

In [46]:
df = get_df_hard_finished('default')
lower = root_lower_bounds(df)
gap = df.root_bounds__greedy_upper - lower
sizes = df.index.map(instance_size)

with make_plot('search-space-combined', (WIDTH_2COL, WIDTH_1COL), nrows=1, ncols=2, sharey=True) as (fig, (ax0, ax1)):
    for filt, settings in rnd_and_nonrnd(df.index):
        ax0.scatter(sizes[filt], df[filt].branching_steps,
                   **settings.as_scatter, **scatter_settings())
    ax0.legend()
    ax0.set_xscale('log')
    ax0.set_yscale('symlog')
    ax0.set_xlabel(r'$\lVert \mathcal{F} \rVert$')
    ax0.set_ylabel(r'Search space (\#branching steps)')

    for filt, settings in rnd_and_nonrnd(df.index):
        ax1.scatter(gap[filt], df[filt].branching_steps, **settings.as_scatter,
                   **scatter_settings())
    ax1.legend()
    ax1.set_xlabel(r'Bound gap ($\text{upper} - \text{lower}$)')

## Bound gaps

In [18]:
df = get_df_hard_finished('default')

lower = root_lower_bounds(df)
opt = df.opt
upper = df.root_bounds__greedy_upper

x = opt / lower
y = upper / opt

with make_plot('bound-gaps', (WIDTH_1COL, 1.6)) as (fig, ax):
    for filt, settings in rnd_and_nonrnd(x.index):
        ax.scatter(x[filt], y[filt], **settings.as_scatter,
                   **scatter_settings())

    ax.legend()

    ax.set_aspect('equal')
    ax.xaxis.set_major_locator(ax.yaxis.get_major_locator())

    ax.set_xlabel(r'$\sfrac{\text{opt}}{\text{lower}}$')
    ax.set_ylabel(r'$\sfrac{\text{upper}}{\text{opt}}$')

## Greedy modes

In [19]:
names = [
    'default', 'greedy-always-before-bounds',
    'greedy-always-before-expensive-reductions'
]
dfs = get_dfs_hard_finished(names)

df = dfs['default']
always_early_df = dfs['greedy-always-before-bounds']
always_late_df = dfs['greedy-always-before-expensive-reductions']

x = always_late_df.runtimes__total / df.runtimes__total
y = always_early_df.runtimes__total / df.runtimes__total
mx = round(1.1 * max(x.max(), y.max()), ndigits=1)

with make_plot('where-greedy', (WIDTH_1COL, WIDTH_1COL)) as (fig, ax):
    for filt, settings in rnd_and_nonrnd(x.index):
        ax.scatter(x[filt], y[filt], **settings.as_scatter,
                   **scatter_settings())

    ax.legend()
    ax.set_xlabel('Every loop, before expensive reductions')
    ax.set_ylabel('Every loop, before bounds')
    ax.set_xlim((0, mx))
    ax.set_ylim((0, mx))

    for p1, p2 in [((0, 1), (mx, 1)), ((1, 0), (1, mx)), ((0, 0), (mx, mx))]:
        ax.axline(p1, p2, **BACKGROUND_LINE_SETTINGS[1])

No instances were dropped (all finished in all experiments)


# Box plots

## Config

In [20]:
# Fill boxes in white by default so that background lines don't
# appear to go through the boxes
def boxplot_settings(facecolor: str = 'white') -> dict:
    return {
        'patch_artist': True,
        'boxprops': {
            'facecolor': facecolor,
        },
        'medianprops': {
            'color': 'C4'
        },
        'flierprops': {
            'marker': '.',
            'markerfacecolor': 'black',
            'markersize': 4,
        },
    }

## Utilities

In [21]:
def boxplot_rnd_split(ax: pd.Axes,
                      data: list[pd.Series],
                      legend_props: Optional[dict] = None) -> None:
    pos = np.arange(len(data)) + 1
    width = 0.2
    offset = 0.175

    legend_handles = []
    legend_labels = []
    for i, (filt, settings) in enumerate(rnd_and_nonrnd(data[0].index)):
        data_filt = [col[filt] for col in data]
        bp = ax.boxplot(data_filt,
                        widths=width,
                        positions=pos + (offset if i else -offset),
                        **boxplot_settings(settings.color))
        legend_handles.append(bp['boxes'][0])
        legend_labels.append(settings.label)

    ax.legend(legend_handles, legend_labels, **(legend_props or {}))

## Operation runtimes

In [22]:
parts = {
    'greedy': 'Greedy',
    'max_degree_bound': 'Max\ndeg.\nbound',
    'efficiency_bound': 'Eff.\nbound',
    'packing_bound': 'Packing\nbound',
    'sum_over_packing_bound': 'Sum\nover\npacking\nbound',
    'forced_vertex': 'Unit\nedge',
    'costly_discard_packing_update': 'Costly\ndiscard\npacking\nupdate',
    'costly_discard_packing_from_scratch': 'Costly\ndiscard\nrepack',
    'vertex_domination': 'Vertex\ndom.',
    'edge_domination': 'Edge\ndom.',
    'other': 'Other',
}
df = get_df_hard_finished('default')

non_other_runtimes = sum(
    df[col] for col in df.columns
    if col.startswith('runtimes__') and col not in (
        'runtimes__total', 'runtimes__applying_reductions'))
runtime_other = df.runtimes__total - non_other_runtimes

data = [(runtime_other if col == 'other' else df[f'runtimes__{col}']) /
        df.runtimes__total for col in parts.keys()]

with make_plot('operations', (WIDTH_2COL, 2.5)) as (_, ax):
    boxplot_rnd_split(ax,
                      data,
                      legend_props={
                          'loc': 'upper left',
                          'bbox_to_anchor': (0.08, 0, 1, 1)
                      })

    font_settings = dict(fontsize='x-small')
    ticklabels = [fr'{num}\%' for num in [0, 20, 40, 60, 80, 100]]
    ax.set_xticks(np.arange(len(parts)) + 1)
    ax.set_xticklabels(parts.values(), fontdict=font_settings)
    ax.set_yticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    ax.set_yticklabels(ticklabels, fontdict=font_settings)

## Search space by lower bound

In [23]:
bounds = {
    'Max degree': 'max-degree-only',
    'Sum degree': 'sum-degree-only',
    'Efficiency': 'efficiency-only',
    'Packing': 'packing-only',
    'Packing\n(local search)': 'packing-local-search-only',
    'Sum\nover packing': 'sum-over-packing-only',
    'Sum\nover packing\n(local search)': 'packing-local-search-only',
}
dfs = get_dfs_hard_finished(list(bounds.values()) + ['default'])
df = dfs['default']

idx_nonzero = df[df.branching_steps > 0].index
num_dropped = len(df) - len(idx_nonzero)
print(f'Dropping {num_dropped} instances since they didnt branch when '
      'using default settings')
dfs = {name: df[df.index.isin(idx_nonzero)] for name, df in dfs.items()}
df = dfs['default']
print(f'Remaining: {len(df)} instances')

data = [
    dfs[name].branching_steps / df.branching_steps for name in bounds.values()
]

with make_plot('search-space-by-bound', (WIDTH_2COL, 2.1)) as (_, ax):
    boxplot_rnd_split(ax,
                      data,
                      legend_props={
                          'loc': 'upper left',
                          'bbox_to_anchor': (0.73, 0, 0.8, 1)
                      })

    ax.set_yscale('symlog')
    ax.set_xticks(np.arange(len(bounds)) + 1)
    ax.set_xticklabels(bounds.keys())
    ax.set_ylabel('Relative search space')

    ax.hlines(1, *ax.get_xlim(), **BACKGROUND_LINE_SETTINGS[2])

Dropping 3 instances since they did not finish in all experiments
Dropped instances: {'isolet_r7798_c618_r7798_c200_diff_sets.hg.dat', 'cost_matrix_component_nr_52_size_885_cutoff_10.0.cm.dat', 'p7_256000.dat'}
Dropping 12 instances since they didnt branch when using default settings
Remaining: 121 instances


## Bound effectiveness

In [24]:
cols = [
    'reductions__max_degree_bound_breaks',
    'reductions__efficiency_degree_bound_breaks',
    'reductions__packing_bound_breaks',
    'reductions__sum_over_packing_bound_breaks',
]
df = get_df_hard_finished('default')
total = sum(df[col] for col in cols)

idx_nonzero = total[total != 0].index
print(
    f'Discarding {len(total) - len(idx_nonzero)} instances which didnt have any bound breaks'
)
df = df.reindex(idx_nonzero)
total = total.reindex(idx_nonzero)

data = [df[col] / total for col in cols]
labels = ['Max\ndegree', 'Efficiency', 'Packing', 'Sum over\npacking']

with make_plot('bound-comparison', (WIDTH_1COL, 2)) as (_, ax):
    boxplot_rnd_split(ax, data)

    ax.set_xticks(np.arange(len(labels)) + 1)
    ax.set_xticklabels(labels)
    ax.set_yticks([0.0, 0.2, 0.4, 0.6, 0.8, 1.0])
    ticklabels = [fr'{num}\%' for num in [0, 20, 40, 60, 80, 100]]
    ax.set_yticklabels(ticklabels)

Discarding 2 instances which didnt have any bound breaks


## Reduction effectiveness

In [25]:
df = get_df_hard_finished('default')
len_before = len(df)
df = df[df.reductions__forced_vertex_runs > 0]
print(
    f'Discarding {len_before - len(df)} instances that never used reductions')

times_reductions_reached = df.reductions__forced_vertex_runs
later_reductions = {
    'costly_discard_efficiency_runs': 'Costly\ndiscard\nefficiency',
    'costly_discard_packing_update_runs': 'Costly\ndiscard\npacking\nupdate',
    'costly_discard_packing_from_scratch_runs': 'Costly\ndiscard\nrepack',
    'vertex_dominations_runs': 'Vertex\ndom.',
    'edge_dominations_runs': 'Edge\ndom.',
}
data = [
    df[f'reductions__{col}'] / times_reductions_reached
    for col in later_reductions.keys()
]

with make_plot('reduction-runs', (WIDTH_1COL, 2.5)) as (_, ax):
    boxplot_rnd_split(ax, data)

    ax.tick_params('both', labelsize='x-small')
    ax.set_xticks(np.arange(len(later_reductions)) + 1)
    ax.set_xticklabels(later_reductions.values())
    ax.yaxis.set_major_formatter(plticker.PercentFormatter(xmax=1))

Discarding 9 instances that never used reductions


## Forced vertex effectiveness

In [26]:
df = get_df_hard_finished('default')
df['reductions__costly_discard_packing_from_scratch_vertices_found'] = df.reductions__costly_discard_packing_from_scratch_steps_per_run.map(
    lambda l: sum(l[:-1]))

reductions = {
    'forced': 'Unit\nedge',
    'costly_discard_efficiency': 'Costly\ndiscard\nefficiency',
    'costly_discard_packing_update': 'Costly\ndiscard\npacking\nupdate',
    'costly_discard_packing_from_scratch': 'Costly\ndiscard\nrepack',
}
reductions = {
    f'reductions__{col}_vertices_found': label
    for col, label in reductions.items()
}
df['reductions__total_vertices_found'] = sum(df[col]
                                             for col in reductions.keys())

len_before = len(df)
df = df[df['reductions__total_vertices_found'] > 0]
print(
    f'Removed {len_before - len(df)} instances for which no forced vertices were found'
)

data = [
    df[col] / df.reductions__total_vertices_found for col in reductions.keys()
]

with make_plot('forced-vertices', (WIDTH_1COL, 2.5)) as (_, ax):
    boxplot_rnd_split(ax, data)

    ax.tick_params('both', labelsize='x-small')
    ax.set_xticks(np.arange(len(data)) + 1)
    ax.set_xticklabels(reductions.values())
    ax.yaxis.set_major_formatter(plticker.PercentFormatter(xmax=1))

Removed 10 instances for which no forced vertices were found


## From scratch packing parameter

In [27]:
names = [f'from-scratch-{i}' for i in [0] + list(range(1, 20, 2)) if i != 3]
names.append('default')
dfs = get_dfs_hard_finished(names)

base_df = dfs['from-scratch-0']
num_dfs = {
    num: dfs['default' if num == 3 else f'from-scratch-{num}']
    for num in range(1, 20, 2)
}

data = [
    df.runtimes__total / base_df.runtimes__total for df in num_dfs.values()
]

with make_plot('from-scratch', (WIDTH_2COL, 2.5)) as (_, ax):
    boxplot_rnd_split(ax, data)

    ax.set_xlabel(r'\#Nodes checked')
    ax.set_ylabel('Runtime (rel. to w/o rule)')
    ax.set_xticks(range(1, len(num_dfs) + 1))
    ax.set_xticklabels([str(num) for num in num_dfs.keys()])

    ax.hlines(1, *ax.get_xlim(), **BACKGROUND_LINE_SETTINGS[2])

No instances were dropped (all finished in all experiments)


## Greedy modes vs disabled

In [28]:
names = [
    'default', 'greedy-never', 'greedy-always-before-bounds',
    'greedy-always-before-expensive-reductions'
]
dfs = get_dfs_hard_finished(names)

base_df = dfs['greedy-never']
data = [
    dfs[name].runtimes__total / base_df.runtimes__total for name in [
        'default',
        'greedy-always-before-expensive-reductions',
        'greedy-always-before-bounds',
    ]
]
labels = [
    'Once,\nin the\nbeginning', 'Every loop,\nbefore expensive\nreductions',
    'Every loop,\nbefore\nbounds'
]

with make_plot('greedy-vs-off', (WIDTH_1COL, 2)) as (_, ax):
    boxplot_rnd_split(ax, data, legend_props={'loc': 'lower right'})

    ax.set_xlim(ax.get_xlim()[0], 1.1 * ax.get_xlim()[1])
    ax.set_xticks(np.arange(len(labels)) + 1)
    ax.set_xticklabels(labels)
    ax.tick_params(axis='both', which='major', labelsize='x-small')

    ax.hlines(1, *ax.get_xlim(), **BACKGROUND_LINE_SETTINGS[2])

No instances were dropped (all finished in all experiments)


# Contour plots

## Upper bound progress

In [29]:


def bound_improvement_curve(row) -> List[Tuple[float, float]]:
    bound_gap = row.root_bounds__greedy_upper - row.opt
    if bound_gap == 0:
        return [(0, 1), (1, 1)]
    curve = [(0, 0)]
    for impr in row.upper_bound_improvements:
        rel_time = impr["runtime"] / row.runtimes__total
        rel_bound = 1 - (impr["new_bound"] - row.opt) / bound_gap
        curve.append((rel_time, rel_bound))
    return curve + [(1, 1)]


df_full = get_df_hard_finished("default")
is_random_instance = df_full.index.str.match(random_instance_regex)
settings = [('-rnd', is_random_instance), ('-nonrnd', ~is_random_instance),
            ('-both', is_random_instance | ~is_random_instance)]

for suffix, filt in settings:
    df = df_full[filt].copy()

    curves = [bound_improvement_curve(row) for row in df.itertuples()]
    x = np.array(sorted({x for curve in curves for x, _y in curve}))

    # Add slightly larger than 0 and slightly lower than 1 values, so that both 0 and 1
    # are only used for the very edges of the plot
    y = np.array(
        sorted({y
                for curve in curves for _x, y in curve} | {1e-12, 1 - 1e-12}))

    z = np.zeros((len(x), len(y)))
    rel = 1 / len(curves)
    for curve in curves:
        for i in range(len(curve)):
            x_left, curve_y = curve[i]
            x_right = 2 if i + 1 == len(curve) else curve[i + 1][0]
            x_l = np.searchsorted(x, x_left, side="left")
            x_r = np.searchsorted(x, x_right, side="left")
            y_r = np.searchsorted(y, curve_y, side="right")
            z[x_l:x_r, :y_r] += rel

    with make_plot(f'bound-updates{suffix}',
                   (WIDTH_1COL, 0.85 * WIDTH_1COL)) as (fig, ax):
        contour = ax.contourf(
            x,
            y,
            z.transpose(),
            cmap="Greys",
            levels=np.linspace(0, 1, 11),
            vmin=0,
            vmax=1,
        )

        # For some reason, the black polygon bugged out and leaves some white part
        # in the bottom right undrawn. This just lays a black background behind the
        # contour plot.
        ax.fill([0, 0, 1, 1], [0, 1, 1, 0], facecolor="black", zorder=-1)

        ax.set_aspect("equal")
        ax.set_xlabel("Runtime")
        ax.set_ylabel("Upper bound progress")

        formatter = plticker.PercentFormatter(xmax=1)
        ticks = np.linspace(0, 1, 6)
        ax.set_xticks(ticks)
        ax.tick_params("both", labelsize="x-small")
        ax.xaxis.set_major_formatter(formatter)
        ax.yaxis.set_major_formatter(formatter)

        cbar = fig.colorbar(contour,
                            ticks=ticks,
                            format=formatter,
                            fraction=0.046,
                            pad=0.04)
        cbar.ax.tick_params(labelsize="x-small")

        # Matplotlib by default cuts of the y-label, this mostly fixes the issue
        fig.subplots_adjust(left=-0.8)