In [None]:
# switch to main dir to fix local imports
import os
if os.getcwd().endswith("notebooks"):
    os.chdir("..")

# package imports
from dataclasses import asdict
from datetime import datetime
import time
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import glob
import torch
from tqdm.notebook import tqdm
import numpy as np

# local imports
from src.evaluator import Evaluator
from src.args import Args
import src.graphs as graphs

# start autoreload
%load_ext autoreload
%autoreload 2

# select device for machine learning
device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
print(f"using device: {device}")

# settings for plots (seaborn/matplotlib)
sns.set_context("paper")
sns.set_style("darkgrid", {"grid.color": ".8"})
palette = "Dark2"

### Load dataframe

In [None]:
result = pd.concat((pd.read_csv(f) for f in glob.glob("/out/load/*.csv.zip")), ignore_index=True)

# Run grid search for RJS

In [None]:
# setup experiment arguments
args_list = [
    Args(
        graph_type="random",  # specify graph type in question 'rgg'/'girg'/'random'
        graph_size=graph_size,
        subgraph_size=subgraph_size,
        subgraph_alpha=subgraph_alpha
    )
    for graph_size in [500, 1000, 2500]
    for subgraph_size in [50, 100, 250]
    for subgraph_alpha in [0.0, 0.15, 1.0]
    for _ in range(5)  # repetitions for representative results
]
experiment_key = f"gridsearch-random-rjs--{datetime.now().strftime('%d-%m--%H-%M')}"

In [None]:
# generate data
result = pd.DataFrame({})
for r, args in enumerate(tqdm(args_list)):
    graph = graphs.gen_graph(args)
    evaluator = Evaluator(
        graph=graph,
        args=args,
        writer_log_dir=f"runs/{experiment_key}/{args.graph_type}--{args.__hash__()}--{r}",
        device=device
    )
    # train the model
    start_time = time.perf_counter()
    evaluator.train(
        optimizer=torch.optim.Adam(evaluator.net.parameters(), lr=1e-3),
        pbar=False
    )
    end_time = time.perf_counter()
    # test the model
    test_loss, test_ap, test_f1, test_threshold = evaluator.test(
        epoch=args.epochs
    )
    result = pd.concat([
        result,
        pd.Series({
            "run_time": end_time - start_time,
            "loss": test_loss,
            "ap": test_ap,
            "f1": test_f1,
            **asdict(args),
        }).to_frame().T
    ], ignore_index=True)
    # save after every iteration in case the experiment is interrupted
    result.to_csv(f"./out/{experiment_key}.csv.zip", index=False, compression=dict(method='zip', archive_name=f"data.csv"))

In [None]:
# plot heatmaps
alphas = np.unique([args.subgraph_alpha for args in args_list])
graph_type = "girg"

figsize = 15
fig, axs = plt.subplots(
    ncols=len(alphas),
    figsize=(figsize, figsize / len(alphas)),
    sharey='all'
)
cbar_ax = axs[len(alphas) - 1].inset_axes([1.04, 0.0, 0.05, 1.0])
for i, a in enumerate(alphas):
    df_hm = result\
        .loc[result["subgraph_alpha"] == a]\
        .loc[result["graph_type"] == graph_type]\
        .groupby(["graph_size", "subgraph_size"], as_index=False)["ap"].mean()
    g = sns.heatmap(
        vmin=0.0,
        vmax=1.0,
        data=df_hm.pivot(index="graph_size", columns="subgraph_size", values="ap"),
        ax=axs[i],
        cbar_ax=cbar_ax,
        cbar_kws={ "label": "Mean Average Precision" },
        cmap="flare",
        square=True,
        annot=True,
    )
    axs[i].set_title(f"Subgraph Alpha = {a}")
    g.set(xlabel="Subgraph Size", ylabel="Graph Size")

plt.tight_layout(pad=2)
plt.savefig(f'./out/gridsearch_{graph_type}.pdf')