# Initialization

In [None]:
import os

if os.getcwd().endswith("notebooks"):
    os.chdir("..")
    print("using project root as working dir")

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
%load_ext tensorboard
%tensorboard --logdir runs

In [None]:
import torch

device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
print(f"using {device} device")

# Run

In [None]:
from src.graph import gen_graph
from dataclasses import asdict
from src.args import gridsearch_args, Args
from datetime import datetime
import time
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import glob

from src.evaluator import Evaluator

In [None]:
experiment = dict(
    key=datetime.now().strftime("%d-%m--%H-%M"),
    reps=3,
    skip_to=0,
    args=gridsearch_args(),
)
df_result = pd.DataFrame({})

run_len = len(experiment['args']) * experiment['reps']
for i, args in enumerate(experiment["args"]):
    # override args for specific test
    args.note = "no epoch subsampling"

    # skip to beginning of specified repetition block
    if i < int(experiment['skip_to'] / experiment['reps']):
        print(f"skipping {i * experiment['reps']} to {(i + 1) * experiment['reps'] - 1}")
        continue

    # run with repetitions
    for rep in range(experiment["reps"]):
        run_index = i * experiment['reps'] + rep
        print(f"running evaluator {run_index}/{run_len}")

        graph, dim = gen_graph(args)
        evaluator = Evaluator(
            graph=graph,
            dim=dim,
            args=args,
            writer_log_dir=f"runs/{experiment['key']}/s{args.graph_size}--all-train--{rep}",
            device=device
        )
        start_time = time.time()
        evaluator.train(
            optimizer=torch.optim.Adam(evaluator.net.parameters(), lr=1e-3),
            save_fig=False
        )
        end_time = time.time()
        test_loss, test_ap, test_f1 = evaluator.test(
            epoch=args.epochs,
            save_fig=True
        )

        df_result = pd.concat([
            df_result,
            pd.Series({
                # run meta
                "run_index": run_index,
                "run_time": end_time - start_time,
                # run results
                "loss": test_loss,
                "ap": test_ap,
                "f1": test_f1,
                # run args
                **asdict(args),
            }).to_frame().T
        ], ignore_index=True)

    # save results just in case the run fails mid run
    df_result.to_csv(f"{experiment['key']}.csv.zip", index=False, compression=dict(method='zip', archive_name=f"{experiment['key']}.csv"))

# Print Results

In [None]:
# mean score (bars)
df_result_mean_ap = df_result.groupby(["graph_size", "type"], as_index=False)["ap"].mean().sort_values(by=['ap'])
# print(df_result_mean_ap)

sns.catplot(
    data=df_result_mean_ap,
    col="type",
    x="graph_size",
    y="ap",
    palette="Spectral",
    kind = "bar"
)

In [None]:
# score per epoch graph size and alpha (line with std)
sns.relplot(
    data=df_result, kind="line",
    hue="epoch_graph_size",
    x="epoch_graph_alpha",
    y="ap",
    errorbar="sd",
    palette="Spectral",
    aspect=2,
)

In [None]:
# run time per graph and epoch graph size (line with std)
g = sns.relplot(
    data=df_result, kind="line",
    hue="type",
    x="graph_size",
    y="run_time",
    errorbar="sd",
    palette="Spectral",
    aspect=2,
)
g.set_axis_labels("Graph Size", "Run Time (in seconds)")
g._legend.set_title("Variant")
plt.savefig('run_time.pdf')

In [None]:
# score
g = sns.relplot(
    data=df_result, kind="line",
    hue="type",
    x="graph_size",
    y="ap",
    errorbar="sd",
    palette="Spectral",
    aspect=2,
)
g.set_axis_labels("Graph Size", "Average Precision")
g._legend.set_title("Variant")
plt.savefig('ap.pdf')

In [None]:
sns.scatterplot(
    data=df_result,
    hue="type",
    x="graph_size",
    y="run_time",
    palette="Spectral"
)

In [None]:
# score per time
df_result["rel_ap"] = df_result['ap'] / df_result['run_time']
g = sns.relplot(
    data=df_result, kind="line",
    hue="type",
    x="graph_size",
    y="rel_ap",
    errorbar="sd",
    palette="Spectral",
    aspect=2,
)
g.set_axis_labels("Graph Size", "Average Precision / Run Time (in seconds)")
g._legend.set_title("Variant")
plt.savefig('rel_ap.pdf')

# Load data frames

In [None]:
# load data frame from folder
path = "./results/graph-size/with_sub"
all_files = glob.glob(os.path.join(path, "*.csv.zip"))
df = pd.concat((pd.read_csv(f) for f in all_files), ignore_index=True)
#df_result.to_csv(os.path.join(path, "complete.csv.zip"), index=False, compression=dict(method='zip', archive_name=f"complete.csv"))

df_result_count = df.groupby(["run_index"], as_index=False)["run_index"].size()
sns.catplot(
    data=df_result_count,
    x="run_index",
    y="size",
    palette="Spectral",
    kind="bar"
)

In [None]:
# use data frame
df_result = df

In [None]:
# save data frame
df_result.to_csv(os.path.join(path, "out.csv.zip"), index=False, compression=dict(method='zip', archive_name=f"out.csv"))