In [None]:
import numpy as np
import polars as pl
import matplotlib.pyplot as plt
import matplotlib as mpl
from pathlib import Path

# Loading

In [None]:
task = "imagenet (a)"
# task = "imagenet (b)"
# task = "segmentation"

c0 = "accuracy"
c1 = "multiply-adds"

In [None]:
# General 'knowledge'
improvement_direction = {
    "accuracy": 1,
    "loss": -1,
    "loss-clip": -1,
    "total bytes": -1,
    "total_memory_bytes": -1, # dict name
    "multiply-adds": -1,
    "total_mult_adds": -1, # dict name
    # "genotype": 0, # -- not a criterion
}
best_possible_value = {
    "accuracy": 1.0,
    "loss": 0.0,
    "loss-clip": 0.0,
    "total bytes": 0.0,
    "total_memory_bytes": 0.0,
    "multiply-adds": 0.0,
    "total_mult_adds": 0.0, 
}

# Task specific settings
if task == "imagenet (a)":
    # imagenet-a
    folder = Path("./2024-01-02-results/imagenet_a/")
    assert folder.exists()
    run_folder = folder / "exp-imagenet-a"
    assert run_folder.exists()
    files = list(run_folder.glob("*.arrow"))
    reference_file = folder / "stitched-imagenet-a-reference.arrow"
    # some task-specific tidbits
    min_accuracy = 0.7
    best_possible_value["accuracy"] = 0.8
elif task == "imagenet (b)":
    # imagenet-b
    folder = Path("./2024-01-04")
    assert folder.exists()
    run_folder = folder / "exp-imagenet-b"
    assert run_folder.exists()
    files = list(run_folder.glob("*.arrow"))
    reference_file = folder / "stitched-imagenet-b-a-resnet50-b-resnext50_32x4d-reference.arrow"
    # some task-specific tidbits
    min_accuracy = 0.7
    best_possible_value["accuracy"] = 0.8

elif task == "segmentation":
    folder = Path("./2024-01-02-results/segmentation/")
    assert folder.exists()
    run_folder = folder / "exp-voc"
    assert run_folder.exists()
    files = list(run_folder.glob("*.arrow"))
    reference_file = folder / "stitched-voc-reference.arrow"
    # some task-specific tidbits
    min_accuracy = 0.90
else:
    raise ValueError("Unknown task")

if c0 == "accuracy":
    min_performance = min_accuracy

In [None]:
# Load reference front
reference_samples = pl.read_ipc(reference_file).rename({
    "total_memory_bytes": "total bytes",
    "total_mult_adds": "multiply-adds",
}).with_columns(
    pl.col("genotype").list.last().map_dict({
        0: "a",
        1: "b",
        2: "ensemble",
    }).alias("set"),
    pl.lit(reference_file.name).alias("file")
)

# Collect some statistics
run_info = []
common_columns = None

def load_run(filepath):
    global run_info
    global common_columns
    # As grouping label - use the file path - which is unique across any run.
    file_ref = filepath.name

    # Determine legend label - i.e. the approach used
    set_name = "unk"
    filepath_split = Path(filepath).name.split("-")
    if len(filepath_split) > 2:
        set_name = filepath_split[1]

    # Load data - note: here we always assume the files have already been converted to arrow
    # e.g. using jsonl-to-arrow.py.
    approach_samples = pl.read_ipc(filepath).lazy().with_row_count(name="#eval")
    num_samples_evaluated = approach_samples.select(pl.count()).collect().item()

    # Clean-up data
    approach_samples = approach_samples.drop_nulls("loss").rename({
        "total_memory_bytes": "total bytes",
        "total_mult_adds": "multiply-adds",
    }).with_columns([
        pl.lit(set_name).alias("set"),
        pl.lit(file_ref).alias("file"),
    ])
    num_samples_evaluated_active = approach_samples.select(pl.count()).collect().item()
    num_samples_evaluated_inactive = num_samples_evaluated - num_samples_evaluated_active

    # Collect data on how many evaluations were short-circuited.
    run_info.append({
        "file_ref": file_ref,
        "set_name": set_name,
        "num_samples_evaluated": num_samples_evaluated,
        "num_samples_evaluated_active": num_samples_evaluated_active,
        "num_samples_evaluated_inactive": num_samples_evaluated_inactive,
    })

    common_columns = list(set(approach_samples.columns).intersection(reference_samples.columns))
    approach_samples = approach_samples.select(common_columns)

    if min_performance is not None:
        approach_samples = approach_samples.filter(improvement_direction[c0] * pl.col(c0) > improvement_direction[c0] * min_performance)

    return approach_samples

runs_data = pl.concat(load_run(f) for f in files)


In [None]:
run_info = pl.DataFrame(run_info)

# Generic data

In [None]:
# Compute some preliminary statistics:
# - How much of the budget is used?
# - How often is the inactive variable short-circuit activated?
task_ = task.replace(" ", "_").replace("(", "").replace(")", "")
run_summary = run_info.lazy().with_columns(
    (pl.col("num_samples_evaluated_active") / pl.col("num_samples_evaluated")).alias("num_samples_evaluated_active_ratio")
).group_by("set_name").agg([
    pl.col("^num_samples.*$").median().map_alias(lambda x: x + "_median"),
    pl.col("^num_samples.*$").quantile(0.20).map_alias(lambda x: x + "_q20"),
    pl.col("^num_samples.*$").quantile(0.80).map_alias(lambda x: x + "_q80"),
]).sort("set_name").collect()
run_summary.write_csv(f"{task}_activity_stats.csv")
run_summary

# Prepare for front computation

In [None]:
# Ensure reference has common samples, too.
reference_samples = reference_samples.select(common_columns)

In [None]:
# Tools for computing fronts
def maybe_over(a, o):
    if len(o) == 0: return a
    else: return a.over(o)

def compute_pareto(df, group_vars):
    return (df.sort(c0, descending=improvement_direction[c0] > 0)
        .with_columns((pl.col(c1) * -improvement_direction[c1]).alias("c1-min"))
        .with_columns(maybe_over((pl.col("c1-min")).cummin(), group_vars).alias("mv"))
        .with_columns((maybe_over(pl.col("c1-min") < pl.col("mv").shift(1), group_vars).alias("is pareto")).fill_null(True))
        .filter(pl.col("is pareto"))
    )

def compute_2d_hv(df_pareto, ref, axis_scale, group_vars):
    # note - df_pareto is a df created using compute_pareto
    dhva = (df_pareto.sort(c0, descending=improvement_direction[c0] < 0)
        # Samples worse than reference point do not contribute.
        # .filter(improvement_direction[c0] * pl.col(c0) > improvement_direction[c0] * ref[0])
        # .filter(improvement_direction[c1] * pl.col(c1) > improvement_direction[c1] * ref[1])
        .with_columns(
        [
            maybe_over( improvement_direction[c0] * (pl.col(c0) - pl.col(c0).shift(1).fill_null(ref[0])) / axis_scale[0], group_vars).alias("slice_width"),
            maybe_over( improvement_direction[c1] * (pl.col(c1) - ref[1]) / axis_scale[1], group_vars).alias("slice_height"),
        ])
        .select([pl.col(group_vars), (pl.col("slice_width") * pl.col("slice_height")).alias("hv_contrib")])
        .group_by(group_vars).agg(pl.col("hv_contrib").sum()))
    return dhva

# Compute front over all evaluated solutions

In [None]:
# Front over all samples over all runs & reference points
all_data = pl.concat([
    runs_data,
    reference_samples.lazy(),
], how="diagonal_relaxed")
total_front = compute_pareto(all_data, []).collect()

In [None]:
# sidenote - only used for imagenet a for training experiment.
# total_front.write_ipc("imagenet-a-all-evals-front.arrow")

In [None]:
plt.step(total_front[c0], total_front[c1], where="post", alpha=0.5) # label=sn 

plt.scatter(total_front[c0], total_front[c1], label="best", alpha=0.5, s=8.0)
plt.scatter(reference_samples[c0], reference_samples[c1], label="reference", alpha=0.5, s=18.0, marker='x')

# Complete the bounding box for the front
ax = plt.gca()
hline_p = ax.transAxes.inverted().transform(ax.transData.transform((total_front[-1, c0], 0)))[0]
vline_p = ax.transAxes.inverted().transform(ax.transData.transform((0, total_front[0, c1])))[1]
plt.axhline(total_front[-1, c1], xmin=0 if improvement_direction[c0] > 0 else 1, xmax=hline_p, alpha=0.5)
plt.axvline(total_front[0, c0], ymin=0 if improvement_direction[c1] > 0 else 1, ymax=vline_p, alpha=0.5)

plt.title(f"Approximation front over all runs - {task}")
plt.legend()

# Compute front per run
With respect to a reference depending on all evaluated solutions / predetermined bounds.

In [None]:
# Compute reference point & scale for hypervolume
# For the reference point we use the nadir: the worst combination of objectives.
# Note, that this is computed /after/ a minimal fitness filter.
worst_perf_c0, worst_perf_c1 = all_data.select(
    (improvement_direction[c0] * (improvement_direction[c0] * pl.col(c0)).min()).alias(c0),
    (improvement_direction[c1] * (improvement_direction[c1] * pl.col(c1)).min()).alias(c1)
    ).collect().row(0)
# Override, if we have a filter threshold.
if min_performance is not None:
    worst_perf_c0 = min_performance
scale_c0 = -improvement_direction[c0] * (worst_perf_c0 - best_possible_value[c0])
scale_c1 = -improvement_direction[c1] * (worst_perf_c1 - best_possible_value[c1])

hv_reference_point = (worst_perf_c0, worst_perf_c1)
hv_scale = (scale_c0, scale_c1)

print(f"Reference point: ({worst_perf_c0}, {worst_perf_c1}) - Scale: ({scale_c0}, {scale_c1})")

In [None]:
per_run_front = compute_pareto(runs_data.lazy(), ["file"]).collect()

In [None]:
per_run_front.columns

In [None]:
colormap = mpl.colormaps["Set1"]

In [None]:
per_run_alpha = 0.3
for c, (approach, multifront) in zip(colormap.colors, per_run_front.sort("file").group_by("set", maintain_order=True)):
    is_first = True
    for _file_path, front in multifront.group_by("file"):
        plt.step(front[c0], front[c1], where="post", alpha=per_run_alpha, color=c) # label=sn 
        plt.scatter(front[c0], front[c1], label=approach if is_first else None, alpha=per_run_alpha, s=8.0, color=c)
        is_first = False

# Complete the bounding box for each front
ax = plt.gca()
for c, (approach, multifront) in zip(colormap.colors, per_run_front.sort("file").group_by("set", maintain_order=True)):
    is_first = True
    for _file_path, front in multifront.group_by("file"):
        hline_p = ax.transAxes.inverted().transform(ax.transData.transform((front[-1, c0], 0)))[0]
        vline_p = ax.transAxes.inverted().transform(ax.transData.transform((0, front[0, c1])))[1]
        plt.axhline(front[-1, c1], xmin=0 if improvement_direction[c0] > 0 else 1, xmax=hline_p, alpha=per_run_alpha, color=c)
        plt.axvline(front[0, c0], ymin=0 if improvement_direction[c1] > 0 else 1, ymax=vline_p, alpha=per_run_alpha, color=c)

plt.scatter(reference_samples[c0], reference_samples[c1], label="reference", alpha=1.0, s=32.0, color="orange", marker='x')
plt.title(f"Individual approximation fronts - {task}")
plt.legend()

In [None]:
len(per_run_front)

In [None]:
# Store a list of solutions to evaluate as part of a 'front'
task_ = task.replace(" ", "_").replace("(", "").replace(")", "")
per_run_out = f"{task_}-front.arrow"
pl.concat([per_run_front.lazy().select(reference_samples.columns), reference_samples.lazy().with_columns([pl.lit("reference").alias("file"), pl.lit("reference").alias("set")])]).collect().write_ipc(per_run_out)
per_run_out

## Compute hypervolume per run

In [None]:
hv_per_run = (compute_2d_hv(
                pl.concat([per_run_front.lazy(), reference_samples.lazy().with_columns([pl.lit("reference").alias("file"), pl.lit("reference").alias("set")])]), 
                hv_reference_point,
                hv_scale, ["file", "set"]).sort("hv_contrib")
              .with_columns(pl.col("set").replace({"SGA": "GA"}))
              .collect())

In [None]:
hv_per_run.write_csv(f"./2024-01-02-hypervolumes-{task_}.csv")
hv_per_run

In [None]:
hv_summary = (hv_per_run.lazy().group_by("set").agg(
    pl.col("hv_contrib").mean().alias("hv_mean"),
    pl.col("hv_contrib").std().alias("hv_std"),
    pl.col("hv_contrib").quantile(0.1).alias("hv_q10"),
    pl.col("hv_contrib").median().alias("hv_median"),
    pl.col("hv_contrib").quantile(0.9).alias("hv_q90"),
)).sort("set").collect()
hv_summary.write_csv(f"./2024-01-02-hypervolumes-summary-{task_}.csv")
hv_summary

## Plot front with median hypervolume per approach
As to showcase a representative front.

In [None]:
# Determine the run closest to the median hypervolume for each configuration.
middle_runs = (hv_per_run.lazy().filter(pl.col("file") != "reference" ).with_columns(((pl.col("hv_contrib").arg_sort() - (pl.count() - 1) / 2).abs() <= 0.5).over("set").alias("is_middle")).collect())
middle_runs

In [None]:
per_run_alpha = 0.3

middle_run_fronts = middle_runs.lazy().filter("is_middle").join(per_run_front.lazy(), on="file").sort("file").collect().group_by("set", maintain_order=True)

for c, (approach, multifront) in zip(colormap.colors, middle_run_fronts):
    is_first = True
    for _file_path, front in multifront.group_by("file"):
        plt.step(front[c0], front[c1], where="post", alpha=per_run_alpha, color=c) # label=sn 
        plt.scatter(front[c0], front[c1], label=approach if is_first else None, alpha=per_run_alpha, s=8.0, color=c)
        is_first = False

# Complete the bounding box for each front
ax = plt.gca()
for c, (approach, multifront) in zip(colormap.colors, middle_run_fronts):
    is_first = True
    for _file_path, front in multifront.group_by("file"):
        hline_p = ax.transAxes.inverted().transform(ax.transData.transform((front[-1, c0], 0)))[0]
        vline_p = ax.transAxes.inverted().transform(ax.transData.transform((0, front[0, c1])))[1]
        plt.axhline(front[-1, c1], xmin=0 if improvement_direction[c0] > 0 else 1, xmax=hline_p, alpha=per_run_alpha, color=c)
        plt.axvline(front[0, c0], ymin=0 if improvement_direction[c1] > 0 else 1, ymax=vline_p, alpha=per_run_alpha, color=c)


plt.scatter(reference_samples[c0], reference_samples[c1], label="reference", alpha=1.0, s=32.0, color="orange", marker='x')
plt.title(f"fronts of run with Median HV - {task}")
plt.legend()

In [None]:
# Determine the run closest to the median hypervolume for each configuration.
picked_runs = (hv_per_run.lazy().filter(pl.col("file") != "reference" ).with_columns(((pl.col("hv_contrib").arg_sort() - (pl.count() - 1)).abs() <= 0.5).over("set").alias("is_chosen")).collect())
per_run_alpha = 0.3

picked_runs_fronts = picked_runs.lazy().filter("is_chosen").join(per_run_front.lazy(), on="file").sort("file").collect().group_by("set", maintain_order=True)

for c, (approach, multifront) in zip(colormap.colors, picked_runs_fronts):
    is_first = True
    for _file_path, front in multifront.group_by("file"):
        plt.step(front[c0], front[c1], where="post", alpha=per_run_alpha, color=c) # label=sn 
        plt.scatter(front[c0], front[c1], label=approach if is_first else None, alpha=per_run_alpha, s=8.0, color=c)
        is_first = False

# Complete the bounding box for each front
ax = plt.gca()
for c, (approach, multifront) in zip(colormap.colors, picked_runs_fronts):
    is_first = True
    for _file_path, front in multifront.group_by("file"):
        hline_p = ax.transAxes.inverted().transform(ax.transData.transform((front[-1, c0], 0)))[0]
        vline_p = ax.transAxes.inverted().transform(ax.transData.transform((0, front[0, c1])))[1]
        plt.axhline(front[-1, c1], xmin=0 if improvement_direction[c0] > 0 else 1, xmax=hline_p, alpha=per_run_alpha, color=c)
        plt.axvline(front[0, c0], ymin=0 if improvement_direction[c1] > 0 else 1, ymax=vline_p, alpha=per_run_alpha, color=c)


plt.scatter(reference_samples[c0], reference_samples[c1], label="reference", alpha=1.0, s=32.0, color="orange", marker='x')
plt.title(f"fronts of run - selected based on HV, somehow - {task}")
plt.legend()

# Nicer graphs with R

In [None]:
import rpy2
# Configure for notebook use.
import rpy2.ipython.html
rpy2.ipython.html.init_printing()
%load_ext rpy2.ipython

In [None]:
%%R
library(ggplot2)
library(dplyr)

In [None]:
# Create a dataframe for plotting in R
pd_per_run_front = (per_run_front.lazy()
    # Add a tag so that we can track which samples were original - and which ones were added for sake
    # of continuing the lines.
    .with_columns(pl.lit(1.0).alias("is_original"))
    # For each run include an additional two rows:
    # Repeat best per objective, but replace the other objective with -Inf - as to plot towards the axes.
    .merge_sorted(per_run_front.lazy()
                  .with_columns([(pl.col(c0) * improvement_direction[c0]).alias("c0-n"),
                                 pl.lit(-np.Inf * improvement_direction[c1]).alias(c1),
                                 pl.lit(0.0).alias("is_original")])
                  .group_by("file", maintain_order=True)
                  .agg(pl.all().sort_by("c0-n").last())
                  .select(per_run_front.columns + ["is_original"]), "file")
    .merge_sorted(per_run_front.lazy()
                  .with_columns([(pl.col(c1) * improvement_direction[c1]).alias("c1-n"),
                                 pl.lit(-np.Inf * improvement_direction[c0]).alias(c0),
                                 pl.lit(0.0).alias("is_original")])
                  .group_by("file", maintain_order=True)
                  .agg(pl.all().sort_by("c1-n").last())
                  .select(per_run_front.columns + ["is_original"]), "file")
    # Add c0 and c1 as a named column
    .with_columns([pl.col(c0).alias("c0"), pl.col(c1).alias("c1")])
    # Collect & convert to pandas in order to transfer.
    .collect().to_pandas())

# Convert reference points
pd_reference_samples = (reference_samples.lazy()
    # Add c0 and c1 as a named column
    .with_columns([pl.col(c0).alias("c0"), pl.col(c1).alias("c1")])
    # Collect & convert to pandas in order to transfer.
    .collect().to_pandas())

In [None]:
%%R -i pd_per_run_front -i pd_reference_samples -i c0 -i c1

# Remove additional samples added to continue the lines to the axis edge
pd_per_run_front_excl_edges <- pd_per_run_front |> filter(`is_original` > 0.5)

ggplot(pd_per_run_front, aes(x = `c0`, y=`c1`, color=`set`, group=`file`)) +
    geom_step(alpha=0.3, direction = "vh") +
    geom_point(data = pd_per_run_front_excl_edges, alpha=0.3) +
    geom_point(data = pd_reference_samples, color="orange", group="reference", shape=4, size=2, stroke=2) +
    labs(x = c0, y = c1, color = "approach") +
    theme_bw() +
    theme(
      legend.position="bottom",
      axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
      plot.background = element_rect(fill='transparent', color=NA),
      strip.background = element_blank())