In [1]:
import torch
import matplotlib.pyplot as plt
import numpy as np
from torch_geometric.utils import k_hop_subgraph
import os
from utils import pearsonr
from collections import defaultdict as ddict
from torch_geometric.utils import to_undirected
from tqdm import tqdm

In [2]:
@torch.no_grad()
def MAD(embeddings, targets=None):
    n = embeddings.size(0)#
    dij = []
    for i in range(n):
        dij.append(1 - torch.cosine_similarity(embeddings[i].unsqueeze(0), embeddings, dim=-1).unsqueeze(0))
    dij = torch.cat(dij, dim=0)
    dtgt = torch.mul(dij, targets)
    dtgt = dtgt.sum(dim=-1) / targets.sum(dim=-1)
    return dtgt.mean().item()

In [3]:
@torch.no_grad()
def I2NR(edges, labels, hops=2):
    edges = to_undirected(edges)
    i2nr = []
    nodes = set(edges.reshape(-1).cpu().tolist())
    for node in nodes:
        node_label = labels[node]
        subset, k_edges, _, _, = k_hop_subgraph(node, hops, edges)
        source_labels = labels[k_edges[0, :]]
        target_labels = labels[k_edges[1, :]]
        like_edges = source_labels.eq(target_labels)
        tgt = source_labels.eq(node_label)
        information = torch.mul(like_edges, tgt).sum()
        i2nr.append(information.div(k_edges.size(1)).item())
    mean_i2nr = sum(i2nr)/len(i2nr)
    missing_node_ratio = len(nodes) / labels.size(0)
    weighted_mean = mean_i2nr * missing_node_ratio
    return mean_i2nr, weighted_mean

In [4]:
def load_pkg(path):
    pkg = torch.load(path)
    return pkg

In [5]:
def get_info(path):
    pkg = load_pkg(path)
    #%%
    labels = pkg['labels']
    edge_index = pkg['edge_index']
    n = labels.size(0)
    vars = ddict(list)
    #
    tgt = torch.ones(n, n).cuda()
    #
    for k, v in pkg.items():
        if k not in ['labels', 'edge_index']:
            tgt1 = torch.scatter(torch.zeros(n,n), 1, v['prompt'], 1).cuda()
            static_embs = v['static_embs'].cuda()
            learned_embs = v['learned_embs'].cuda()
            vars['static_prompt_mads'].append(MAD(static_embs, tgt1))
            vars['learned_prompt_mads'].append(MAD(learned_embs, tgt1))
            vars['static_mads'].append(MAD(static_embs, tgt))
            vars['learned_mads'].append(MAD(learned_embs, tgt))
            i2nr, weighted_i2nr = I2NR(v['train_edges'], labels)
            vars['train_i2nr'].append(i2nr)
            vars['train_weighted_i2nr'].append(weighted_i2nr)
            vars['node_acc'].append(v['test_acc'])
    for k, v in vars.items():
        vars[k] = torch.tensor(v)
    vars['global_i2nr'] = I2NR(edge_index, labels)[0]
    return vars

In [6]:
def AutoCorrelation(info):
    Y = info['node_acc']
    PCorr = {}
    for k, X in info.items():
        if k not in ['global_i2nr', 'node_acc']:
            PCorr[k] = pearsonr(X, Y)
    return PCorr

In [7]:
root = './embeddings'
root2 = './processed_embeddings'
if not os.path.isdir(root2):
    os.mkdir(root2)

for p in tqdm(os.listdir(root)):
    path = os.path.join(root, p)
    vars = get_info(path)
    pcoor = AutoCorrelation(vars)
    torch.save({'info': vars, 'correlation': pcoor}, os.path.join(root2, p))


100%|██████████| 8/8 [1:01:20<00:00, 460.01s/it]
