In [None]:
# switch to main dir to fix imports
import os

if os.getcwd().endswith("notebooks"):
    os.chdir("..")
    print("using project root as working dir")

In [None]:
from dataclasses import asdict
from datetime import datetime
import time
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import glob
import torch
import numpy as np
import networkx as nx
import math
from torch.utils.data import TensorDataset, DataLoader

from src.toroid import on_toroid
from src.evaluator import Evaluator
from src.args import Args, gridsearch_args
import src.graph_visualization as graph_visualization
import src.subgraphs as subgraphs
import src.graphs as graphs

In [None]:
# start autoreload
%load_ext autoreload
%autoreload 2

# select device
device = "cuda" if torch.cuda.is_available() else ("mps" if torch.backends.mps.is_available() else "cpu")
print(f"using {device} device")

# global seaborn settings
sns.set_context("paper")
sns.set_style("darkgrid", {"grid.color": ".8"})
palette = "Dark2"

%load_ext tensorboard
%tensorboard --logdir runs

# Load data frames

In [None]:
# load data frame from folder
path = "./out/load"
files = glob.glob(os.path.join(path, "*.csv.zip"))
df = pd.concat((pd.read_csv(f) for f in files), ignore_index=True)

# either replace or concat
df_result = df
#df_result = pd.concat((df_result, df), ignore_index=True)

In [None]:
# save data frame
df_result.to_csv(os.path.join(path, "./out/saved.csv.zip"), index=False, compression=dict(method='zip', archive_name=f"data.csv"))

# Run experiment

In [None]:
# experiment settings
experiment_key = datetime.now().strftime("%d-%m--%H-%M")

args_list = gridsearch_args()
repetitions = 3

In [None]:
# run the experiment
df_result = pd.DataFrame({})

for run_i, args in enumerate(args_list):
    # override args for specific test
    args.graph_type = "girg"
    # args.sort_dataset = True

    # run with repetitions
    for rep_i in range(repetitions):
        print(f"running evaluator: rep={rep_i}/{repetitions} args={run_i}/{len(args_list)}")

        graph = graphs.gen_graph(args)
        evaluator = Evaluator(
            graph=graph,
            args=args,
            writer_log_dir=f"runs/{experiment_key}/{args.__hash__()}--{rep_i}",
            device=device
        )

        # train the model
        start_time = time.time()
        evaluator.train(
            optimizer=torch.optim.Adam(evaluator.net.parameters(), lr=1e-3),
            save_fig=False
        )
        end_time = time.time()
        # test the model
        test_loss, test_ap, test_f1 = evaluator.test(
            epoch=args.epochs,
            save_fig=False
        )

        df_result = pd.concat([
            df_result,
            pd.Series({
                # run meta
                "run_index": run_i,
                "rep_index": rep_i,
                "run_time": end_time - start_time,
                # run results
                "loss": test_loss,
                "ap": test_ap,
                "f1": test_f1,
                # run args
                **asdict(args),
            }).to_frame().T
        ], ignore_index=True)

        # save results just in case the run fails mid run
        df_result.to_csv(f"./out/{experiment_key}.csv.zip", index=False, compression=dict(method='zip', archive_name=f"data.csv"))

# Plot experiment results

In [None]:
# mean score (bars) with/without sorted
df_result_mean_ap = df_result.groupby(["sort_dataset"], as_index=False)["ap"].mean().sort_values(by=['ap'])

sns.catplot(
    data=df_result_mean_ap,
    x="sort_dataset",
    y="ap",
    palette=palette,
    kind = "bar"
)

In [None]:
# mean score (bars) with/without sorted
df_result_mean_ap = df_result.groupby(["sort_dataset"], as_index=False)["run_time"].mean().sort_values(by=['run_time'])
# print(df_result_mean_ap)

sns.catplot(
    data=df_result_mean_ap,
    x="sort_dataset",
    y="run_time",
    palette=palette,
    kind = "bar"
)

In [None]:
# mean score (bars)
df_result_mean_ap = df_result.groupby(["epoch_graph_alpha", "epoch_graph_size"], as_index=False)["ap"].mean().sort_values(by=['ap'])
# print(df_result_mean_ap)

sns.catplot(
    data=df_result_mean_ap,
    col="epoch_graph_size",
    x="epoch_graph_alpha",
    y="ap",
    palette=palette,
    kind = "bar"
)

In [None]:
# score per epoch graph size and alpha (line with std)
sns.relplot(
    data=df_result, kind="line",
    hue="epoch_graph_size",
    x="epoch_graph_alpha",
    y="ap",
    errorbar="sd",
    palette=palette,
    aspect=2,
)

In [None]:
# run time per graph and epoch graph size (line with std)
g = sns.relplot(
    data=df_result, kind="line",
    hue="note",
    x="graph_size",
    y="run_time",
    errorbar="sd",
    palette="Dark2",
    aspect=2,
)
g.set_axis_labels("Graph Size", "Run Time (in seconds)")
g._legend.set_title("Variant")
g.figure.savefig("./out/run_time.pdf")

In [None]:
# score
g = sns.relplot(
    data=df_result, kind="line",
    hue="note",
    x="epoch_graph_alpha",
    y="ap",
    errorbar="sd",
    palette=palette,
    aspect=2,
)
g.set_axis_labels("Epoch Alpha", "Average Precision")
g._legend.set_title("Variant")
plt.savefig('./out/ap.pdf')

In [None]:
# score per time
df_result["rel_ap"] = df_result['ap'] / df_result['run_time']
g = sns.relplot(
    data=df_result, kind="line",
    hue="note",
    x="graph_size",
    y="rel_ap",
    errorbar="sd",
    palette=palette,
    aspect=2,
)
g.set_axis_labels("Graph Size", "Average Precision / Run Time (in seconds)")
g._legend.set_title("Variant")
plt.savefig('./out/rel_ap.pdf')

In [None]:
# plot grid search result
fig, axs = plt.subplots(ncols=3, figsize=(10 * 3, 10), sharey='all')
for i, a in enumerate([0.0, 0.5, 1.0]):
    df_hm = df_result\
        .loc[df_result["epoch_graph_alpha"] == a]\
        .groupby(["graph_size", "epoch_graph_size"], as_index=False)["ap"].mean()
    g = sns.heatmap(
        vmin=0.0,
        vmax=1.0,
        data=df_hm.pivot("graph_size", "epoch_graph_size", "ap"),
        ax=axs[i],
        cbar=i==2,
        cmap="flare",
    )
    axs[i].set_title(f"Epoch Graph Alpha = {a}")
    g.set(xlabel="Epoch Graph Size", ylabel="Graph Size")
plt.tight_layout()
plt.savefig('./out/grid_search_t_rgg.pdf')

# Predict RGG distance threshold

In [None]:
# 1. train model
args = Args()
graph = graphs.gen_graph(args)
evaluator = Evaluator(
    graph=graph,
    args=args,
    writer_log_dir=f"runs/test",
    device=device
)
print("training...")
evaluator.train(
    optimizer=torch.optim.Adam(evaluator.net.parameters(), lr=1e-3),
    save_fig=False
)
print("testing...")
test_loss, test_ap, test_f1 = evaluator.test(
    epoch=args.epochs,
    save_fig=True
)
print(f"evaluator trained! ap: {test_ap}, f1: {test_f1}")

In [None]:
# 2. generate edges of known distances
edges = [
    (d, graphs.gen_edge(d))
    for d in np.linspace(0.0, 0.2, num=1000) # distances
    for _ in range(100) # duplicates
]
values, labels = zip(*[
    ([*u, *v], 1)
    for (_, (u, v)) in edges
])
ds_values = torch.FloatTensor(values).to(device)
ds_labels = torch.IntTensor(labels).to(device)
dataset = TensorDataset(ds_values, ds_labels)
dataloader = DataLoader(dataset, batch_size=10, shuffle=False)

In [None]:
# 3. predict edges of known distances
_, preds = evaluator.score(dataloader)
ap = evaluator.ap_score_fn(preds, ds_labels)
f1 = evaluator.f1_score_fn(preds, ds_labels).item()

df_edges = pd.DataFrame([
    {
        "dist": edges[i][0],
        "u": edges[i][1][0],
        "v": edges[i][1][1],
        "i": i,
        "pred": p
    }
    for i, p in enumerate(preds.cpu().numpy())
])

print(f"ap: {ap}, f1: {f1}, max_pred: {df_edges['pred'].max()}")

In [None]:
# 4. plot results
g = sns.relplot(
    data=df_edges, kind="line",
    x="dist",
    y="pred",
    errorbar="sd",
    palette=palette,
    aspect=2,
)
g.set_axis_labels("Edge Distance", "Prediction")

radius = math.sqrt(args.rg_avg_degree / (args.graph_size * math.pi))
print(f"radius: {radius}")
plt.axvline(x=radius, color="r")
plt.axvline(x=f1, color="g")

plt.ylim(bottom=0)
plt.savefig('./out/edge_pred.pdf')