In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from gpn.data.dataset_manager import DatasetManager
from gpn.data.dataset_provider import InMemoryDatasetProvider


dataset = "ogbn-arxiv"

def load_dataset(dataset):
    dataset_provider = InMemoryDatasetProvider(
        DatasetManager(
            dataset=dataset,
            split_no=1,
            root="./data",
            ood_flag=False,
            train_samples_per_class=0.05,
            val_samples_per_class=0.15,
            test_samples_per_class=0.8,
            split="public" if dataset == "ogbn-arxiv" else "random",
            # ood_setting="poisoning",
            # ood_type="leave_out_classes",
            # ood_num_left_out_classes=-1,
            # ood_leave_out_last_classes=True,
        )
    )
    
    return dataset_provider

data = load_dataset(dataset).data_list[0]

2024-02-05 03:34:26.645157: I tensorflow/core/util/port.cc:111] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-02-05 03:34:26.836610: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-05 03:34:26.836640: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-05 03:34:26.837779: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-02-05 03:34:26.931911: I tensorflow/core/platform/cpu_feature_g

In [3]:
import gc
import torch
import train_and_eval as tae

def get_config_name(model):
    if model in ("gpn", "gpn_rw", "gpn_lop"):
        return "configs/gpn/classification_gpn_16.yaml"
    return f"configs/reference/classification_{model}.yaml"

def get_config_updates(model, dataset):
    updates = {}
    match model:
        case "gpn":
            model_name = "GPN"
        case "gpn_rw":
            model_name = "GPN"
            updates["model.adj_normalization"] = "rw"
        case "gpn_lop":
            model_name = "GPN_LOP"
            updates["model.sparse_x_prune_threshold"] = 0.01
        case _:
            model_name = model.upper()

    if dataset in ("AmazonPhotos", "AmazonComputers", "PubMedFull"):
        updates["model.sparse_propagation"] = True

    if dataset == "ogbn-arxiv":
        updates["model.sparse_propagation"] = True
        updates["data.split"] = "public"
        updates["model.entropy_num_samples"] = 100

    return {
        "model.model_name": model_name,
        "data.dataset": dataset,
        "run.num_inits": 1,
        "run.num_splits": 1,
        "run.log": False,
        "run.job": "predict",
        **updates
    }

def get_prediction(model, dataset):
    res = tae.ex.run(
        named_configs=[get_config_name(model)],
        config_updates={
            **get_config_updates(model, dataset),
            # "run.reduced_training_metrics": True,
            # "training.eval_every": 10,
            # "training.stopping_patience": 5,
            # "data.split": "public",
            # "run.num_splits": 1,
            # "model.model_name": "GPN_LOP",
            # "model.sparse_x_prune_threshold": 0.01,
            # "run.reeval": True,
        },
        options={"--force": True},
    )

    assert isinstance(res.result, list) and len(res.result) == 1
    return res.result[0]

gpn_rw_pred = get_prediction("gpn_rw", dataset)
gc.collect()
torch.cuda.empty_cache()
gpn_lop_pred = get_prediction("gpn_lop", dataset)

gpn_lop_pred

INFO - train_and_eval - Running command 'run_experiment'
INFO - train_and_eval - Started
INFO - root - Received the following configuration:
INFO - root - RUN
INFO - root - {'experiment_name': 'classification', 'experiment_directory': './saved_experiments', 'reduced_training_metrics': False, 'eval_mode': 'default', 'job': 'predict', 'save_model': True, 'gpu': 0, 'num_inits': 1, 'num_splits': 1, 'log': False, 'debug': True, 'ex_type': 'transductive', 'ood_loc': True, 'ood_loc_only': False, 'ood_edge_perturbations': True, 'ood_isolated_perturbations': False}
INFO - root - -----------------------------------------
INFO - root - DATA
INFO - root - {'to_sparse': False, 'split_no': 1, 'dataset': 'ogbn-arxiv', 'root': './data', 'split': 'public', 'train_samples_per_class': 0.05, 'val_samples_per_class': 0.15, 'test_samples_per_class': 0.8, 'ood_flag': False}
INFO - root - -----------------------------------------
INFO - root - MODEL
INFO - root - {'model_name': 'GPN', 'seed': 42, 'init_no': 1

Starting experiment (model=GPN, dataset=ogbn-arxiv, ood_type=None, split=1, init=1, results=./saved_experiments/classification/GPN/158/results_1.json, trained=False, evaluated=False).
Completed experiment (model=GPN, dataset=ogbn-arxiv, ood_type=None, split=1, init=1, results=./saved_experiments/classification/GPN/158/results_1.json, trained=False, evaluated=False).


INFO - train_and_eval - Result: [Prediction(soft=tensor([[1.3872e-05, 3.7093e-03, 1.0660e-03,  ..., 2.1653e-05, 1.0974e-05,
         3.6581e-03],
        [2.1655e-04, 3.7456e-03, 1.2107e-03,  ..., 2.2383e-04, 2.1380e-04,
         3.6561e-03],
        [1.0802e-04, 3.8549e-03, 1.1764e-03,  ..., 1.1589e-04, 1.0515e-04,
         3.7669e-03],
        ...,
        [1.3598e-04, 3.9237e-03, 1.2146e-03,  ..., 1.4381e-04, 1.3314e-04,
         3.8045e-03],
        [1.4829e-04, 3.8305e-03, 1.2069e-03,  ..., 1.5613e-04, 1.4538e-04,
         3.7804e-03],
        [2.5477e-04, 3.7782e-03, 1.2481e-03,  ..., 2.6288e-04, 2.5188e-04,
         3.8722e-03]]), log_soft=tensor([[-11.1857,  -5.5969,  -6.8439,  ..., -10.7404, -11.4200,  -5.6108],
        [ -8.4377,  -5.5872,  -6.7165,  ...,  -8.4046,  -8.4505,  -5.6114],
        [ -9.1332,  -5.5584,  -6.7453,  ...,  -9.0629,  -9.1601,  -5.5815],
        ...,
        [ -8.9030,  -5.5407,  -6.7133,  ...,  -8.8470,  -8.9241,  -5.5716],
        [ -8.8163,  -5.5648,

Starting experiment (model=GPN_LOP, dataset=ogbn-arxiv, ood_type=None, split=1, init=1, results=./saved_experiments/classification/GPN_LOP/51/results_1.json, trained=True, evaluated=False).
Completed experiment (model=GPN_LOP, dataset=ogbn-arxiv, ood_type=None, split=1, init=1, results=./saved_experiments/classification/GPN_LOP/51/results_1.json, trained=True, evaluated=False).


INFO - train_and_eval - Result: [Prediction(soft=tensor([[0.0015, 0.0034, 0.0158,  ..., 0.0225, 0.0006, 0.0031],
        [0.0064, 0.0013, 0.0212,  ..., 0.0080, 0.0006, 0.0353],
        [0.0133, 0.0056, 0.0083,  ..., 0.0115, 0.0013, 0.0146],
        ...,
        [0.0021, 0.0024, 0.0389,  ..., 0.0144, 0.0030, 0.0036],
        [0.0048, 0.0058, 0.0143,  ..., 0.0132, 0.0014, 0.0049],
        [0.0004, 0.0200, 0.0027,  ..., 0.0138, 0.0024, 0.0030]]), log_soft=tensor([[-6.5122, -5.6720, -4.1480,  ..., -3.7955, -7.4374, -5.7905],
        [-5.0439, -6.6765, -3.8548,  ..., -4.8302, -7.4524, -3.3450],
        [-4.3188, -5.1791, -4.7956,  ..., -4.4613, -6.6256, -4.2238],
        ...,
        [-6.1626, -6.0326, -3.2457,  ..., -4.2374, -5.7937, -5.6163],
        [-5.3321, -5.1470, -4.2494,  ..., -4.3265, -6.5636, -5.3211],
        [-7.7787, -3.9105, -5.9155,  ..., -4.2805, -6.0250, -5.8014]]), hard=tensor([ 4, 28, 28,  ..., 30, 16,  8]), alpha=tensor([[ 12.0600,  41.1330, 144.2998,  ..., 207.7222,   

Prediction(soft=tensor([[0.0015, 0.0034, 0.0158,  ..., 0.0225, 0.0006, 0.0031],
        [0.0064, 0.0013, 0.0212,  ..., 0.0080, 0.0006, 0.0353],
        [0.0133, 0.0056, 0.0083,  ..., 0.0115, 0.0013, 0.0146],
        ...,
        [0.0021, 0.0024, 0.0389,  ..., 0.0144, 0.0030, 0.0036],
        [0.0048, 0.0058, 0.0143,  ..., 0.0132, 0.0014, 0.0049],
        [0.0004, 0.0200, 0.0027,  ..., 0.0138, 0.0024, 0.0030]]), log_soft=tensor([[-6.5122, -5.6720, -4.1480,  ..., -3.7955, -7.4374, -5.7905],
        [-5.0439, -6.6765, -3.8548,  ..., -4.8302, -7.4524, -3.3450],
        [-4.3188, -5.1791, -4.7956,  ..., -4.4613, -6.6256, -4.2238],
        ...,
        [-6.1626, -6.0326, -3.2457,  ..., -4.2374, -5.7937, -5.6163],
        [-5.3321, -5.1470, -4.2494,  ..., -4.3265, -6.5636, -5.3211],
        [-7.7787, -3.9105, -5.9155,  ..., -4.2805, -6.0250, -5.8014]]), hard=tensor([ 4, 28, 28,  ..., 30, 16,  8]), alpha=tensor([[ 12.0600,  41.1330, 144.2998,  ..., 207.7222,   5.1618,  26.7567],
        [ 64.9

In [4]:
import pandas as pd
import networkx as nx


def generate_graph(data, preds, name):
    edges = data.edge_index.T.numpy()

    y = data.y.numpy()

    nodes = {
        "true_y": y,
        "train_mask": data.train_mask.numpy().astype(int),
        "val_mask": data.val_mask.numpy().astype(int),
        "test_mask": data.test_mask.numpy().astype(int),
    }

    for model, pred in preds.items():
        y_pred = pred.hard.numpy()
        err = (y != y_pred).astype(int)
        pc_aleatoric = pred.prediction_confidence_aleatoric.numpy()
        pc_epistemic = pred.prediction_confidence_epistemic.numpy()
        sc_total = pred.sample_confidence_total.numpy()
        sc_total_entropy = pred.sample_confidence_total_entropy.numpy()
        sc_aleatoric = pred.sample_confidence_aleatoric.numpy()
        sc_aleatoric_entropy = pred.sample_confidence_aleatoric_entropy.numpy()
        sc_epistemic = pred.sample_confidence_epistemic.numpy()
        sc_epistemic_entropy = pred.sample_confidence_epistemic_entropy.numpy()
        sc_epistemic_entropy_diff = pred.sample_confidence_epistemic_entropy_diff.numpy()
        nodes[f"{model}_y"] = y_pred
        nodes[f"{model}_err"] = err
        nodes[f"{model}_pc_aleatoric"] = pc_aleatoric
        nodes[f"{model}_pc_epistemic"] = pc_epistemic
        nodes[f"{model}_sc_total"] = sc_total
        nodes[f"{model}_sc_total_entropy"] = sc_total_entropy
        nodes[f"{model}_sc_aleatoric"] = sc_aleatoric
        nodes[f"{model}_sc_aleatoric_entropy"] = sc_aleatoric_entropy
        nodes[f"{model}_sc_epistemic"] = sc_epistemic
        nodes[f"{model}_sc_epistemic_entropy"] = sc_epistemic_entropy
        nodes[f"{model}_sc_epistemic_entropy_diff"] = sc_epistemic_entropy_diff

    g = nx.Graph()

    g.add_nodes_from(range(len(y)))

    for i in range(len(y)):
        node = g.nodes[i]
        for k, v in nodes.items():
            node[k] = v[i]

    g.add_edges_from(edges)

    nx.write_graphml(g, f"{name}.graphml")

    return g

g = generate_graph(data, {
    "gpn_rw": gpn_rw_pred,
    "gpn_lop": gpn_lop_pred
}, f"graphs/{dataset}")

In [13]:
import torch

def simplex_samples(alpha: torch.Tensor, num_samples=1000):
    dirichlet = torch.distributions.Dirichlet(alpha)
    samples = dirichlet.sample((num_samples,))
    entropies = torch.distributions.Categorical(samples).entropy()
    exp_entropies = entropies.mean(axis=0)

    return exp_entropies

_ = simplex_samples(torch.tensor([[1.0]], device=0).repeat(10, 3))

_

tensor([0.8253, 0.8322, 0.8284, 0.8317, 0.8232, 0.8375, 0.8340, 0.8314, 0.8300,
        0.8359], device='cuda:0')

In [7]:
import torch
import gc


gc.collect()
torch.cuda.empty_cache()