In [None]:
# switch to main dir to fix local imports
import os
if os.getcwd().endswith("notebooks"):
    os.chdir("..")

# package imports
from dataclasses import asdict
from datetime import datetime
import time
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import glob
import torch
from tqdm.notebook import tqdm

# local imports
from src.evaluator import Evaluator
from src.args import Args
import src.graphs as graphs

# start autoreload
%load_ext autoreload
%autoreload 2

# select device for machine learning
device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
print(f"using device: {device}")

# settings for plots (seaborn/matplotlib)
sns.set_context("paper")
sns.set_style("darkgrid", {"grid.color": ".8"})
palette = "Dark2"

### Load dataframe

In [None]:
result = pd.concat((pd.read_csv(f) for f in glob.glob("/out/load/*.csv.zip")), ignore_index=True)

# Test standardized graph features on GIRG

In [None]:
# setup experiment arguments
args_list = [
    Args(
        graph_type="girg",
        subgraph_alpha=0.15,
        sort_dataset=sort_dataset,
    )
    for sort_dataset in [True, False]
    for _ in range(5)  # repetitions for representative results
]
experiment_key = f"sorted-girg-rjs--{datetime.now().strftime('%d-%m--%H-%M')}"

In [None]:
# generate data
result = pd.DataFrame({})
for r, args in enumerate(tqdm(args_list)):
    graph = graphs.gen_graph(args)
    evaluator = Evaluator(
        graph=graph,
        args=args,
        writer_log_dir=f"runs/{experiment_key}/{args.graph_type}--{args.__hash__()}--{r}",
        device=device
    )
    # train the model
    start_time = time.perf_counter()
    evaluator.train(
        optimizer=torch.optim.Adam(evaluator.net.parameters(), lr=1e-3),
        pbar=False
    )
    end_time = time.perf_counter()
    # test the model
    test_loss, test_ap, test_f1, test_threshold = evaluator.test(
        epoch=args.epochs
    )
    result = pd.concat([
        result,
        pd.Series({
            "run_time": end_time - start_time,
            "loss": test_loss,
            "ap": test_ap,
            "f1": test_f1,
            **asdict(args),
        }).to_frame().T
    ], ignore_index=True)
    # save after every iteration in case the experiment is interrupted
    result.to_csv(f"./out/{experiment_key}.csv.zip", index=False, compression=dict(method='zip', archive_name=f"data.csv"))

In [None]:
# plot average precision
sns.boxplot(
    data=result,
    x="sort_dataset",
    y="ap",
    palette=palette,
).set(
    xlabel='Sorted Dataset',
    ylabel='Average Precision'
)
plt.tight_layout(pad=2)
plt.savefig("./out/sorted_box_girg.pdf")

In [None]:
# plot runtime
sns.boxplot(
    data=result,
    x="sort_dataset",
    y="run_time",
    palette=palette,
).set(
    xlabel='Sorted Dataset',
    ylabel='Runtime [sec]'
)
plt.tight_layout(pad=2)
plt.savefig("./out/sorted_box_runtime_girg.pdf")