In [None]:
# Diffusion-aware importance score.
import pickle

INPUT = "Graph1"                     # your file name
OUTPUT = "diffusion_importance.pkl"  # output file


# ---------- Load graph ----------
with open(INPUT, "rb") as f:
    graph = pickle.load(f)


# ---------- Collect all nodes ----------
all_nodes = set(graph.keys())
for v in graph.values():
    all_nodes.update(v.keys())   # include followers too


# ---------- Compute raw diffusion score ----------
raw_scores = {node: 0.0 for node in all_nodes}

for v, neighbors in graph.items():
    total_prob = 0.0

    for u, tag_prob_list in neighbors.items():
        # sum all influence probs v -> u across tags
        p_vu = sum(prob for tag, prob in tag_prob_list)
        total_prob += p_vu

    raw_scores[v] = total_prob   # followers keep default 0.0


# ---------- Min–max normalize ----------
values = list(raw_scores.values())
min_v = min(values)
max_v = max(values)

diffusion_scores = {}

if max_v == min_v:
    # all equal → set to zero
    diffusion_scores = {v: 0.0 for v in raw_scores}
else:
    for v in raw_scores:
        diffusion_scores[v] = (raw_scores[v] - min_v) / (max_v - min_v)


# ---------- Save ----------
with open(OUTPUT, "wb") as f:
    pickle.dump(diffusion_scores, f)

print(f"Saved diffusion-aware importance scores to {OUTPUT}")
print("Example values:")
for k in list(diffusion_scores.keys())[:100]:
    print(k, ":", diffusion_scores[k])


Saved diffusion-aware importance scores to diffusion_importance.pkl
Example values:
41041922 : 0.0
12087298 : 0.0
25255942 : 0.014763966325989577
185589766 : 0.03976082113006317
29208582 : 0.06505625505383789
14200843 : 0.0
83496972 : 0.017743537694603597
110370832 : 0.030092695299419867
65437712 : 0.0
43567122 : 0.011951379433336245
106709012 : 0.0
34181142 : 0.003180562303523121
113317912 : 0.010186518723253206
86319132 : 0.04080636418983117
83503132 : 0.0
40681502 : 0.14420363729153665
96673822 : 0.0
9234469 : 0.02035375268046522
187238439 : 0.0
118192172 : 0.0
25798702 : 0.004872161374213769
183703598 : 0.14986012556235212
5054510 : 0.0
9646127 : 0.0
3334190 : 0.0
47390772 : 0.05738470853164886
85608502 : 0.0
12017724 : 0.01224714219187126
9711676 : 0.0
3252286 : 0.03254722699655936
9001024 : 0.03588711972231772
149659722 : 0.0
11647053 : 0.0
5677134 : 0.0056710492400417235
79644752 : 0.0
88066132 : 0.026794984460799157
152639572 : 0.008490991258736385
1435736 : 0.0
8820827 : 0.018

In [None]:
# Structural importance score
import pickle
import networkx as nx

INPUT = "Graph1"                            # your graph file
OUTPUT = "structural_importance.pkl"        # output file name


# -------- Load graph ----------
with open(INPUT, "rb") as f:
    graph = pickle.load(f)


# -------- Collect all node ids ----------
all_nodes = set(graph.keys())
for v in graph.values():
    all_nodes.update(v.keys())


# -------- Build directed graph ----------
G = nx.DiGraph()
G.add_nodes_from(all_nodes)

for v, neighbors in graph.items():
    for u, tag_prob_list in neighbors.items():
        # sum probabilities for v -> u (as in the paper)
        w = sum(prob for tag, prob in tag_prob_list)

        # add weighted directed edge
        if G.has_edge(v, u):
            G[v][u]["weight"] += w
        else:
            G.add_edge(v, u, weight=w)


# -------- Out-degree (number of outgoing neighbors) ----------
outdeg = {v: G.out_degree(v) for v in all_nodes}


# -------- PageRank (weighted) ----------
pagerank = nx.pagerank(G, weight="weight")


# -------- k-core (on undirected graph, unweighted) ----------
UG = G.to_undirected()
core_num = nx.core_number(UG)   # isolated nodes automatically get 0


# -------- Combine into structural score ----------
raw_scores = {}
for v in all_nodes:
    raw_scores[v] = outdeg[v] + pagerank[v] + core_num[v]


# -------- Min–max normalization ----------
values = list(raw_scores.values())
min_v = min(values)
max_v = max(values)

if max_v == min_v:
    structural_scores = {v: 0.0 for v in raw_scores}
else:
    structural_scores = {
        v: (raw_scores[v] - min_v) / (max_v - min_v)
        for v in raw_scores
    }


# -------- Save result ----------
with open(OUTPUT, "wb") as f:
    pickle.dump(structural_scores, f)

print(f"Saved structural importance scores to {OUTPUT}")
print("Example entries:")
for k in list(structural_scores.keys())[:100]:
    print(k, ":", structural_scores[k])


Saved structural importance scores to structural_importance.pkl
Example entries:
41041922 : 4.798027143008087e-06
12087298 : 6.005244282106694e-06
25255942 : 0.03704893429213173
185589766 : 0.12963066854383037
29208582 : 0.16666760086961985
14200843 : 1.893959988224785e-06
83496972 : 0.11114037632941547
110370832 : 0.12963588641876633
65437712 : 1.882707292615204e-07
43567122 : 0.037050140801977945
106709012 : 6.005244282106694e-06
34181142 : 0.03703801603905792
113317912 : 0.03703681684105522
86319132 : 0.07409312004806116
83503132 : 9.85490450221561e-07
40681502 : 0.2592875982136335
96673822 : 1.468837354423981e-06
9234469 : 0.07409342353389026
187238439 : 0.018529495392775
118192172 : 0.03705104624171226
25798702 : 0.037042875566593476
183703598 : 0.314819067673868
5054510 : 3.6457298131799404e-06
9646127 : 0.01852369665915692
3334190 : 6.005244282106694e-06
47390772 : 0.09259477214392968
85608502 : 2.2248528747524698e-06
12017724 : 0.018518381679899536
9711676 : 0.01852408550353157

In [None]:
# print("min =", min(structural_scores.values()))
# print("max =", max(structural_scores.values()))
# print("count =", len(structural_scores))


min = 0.0
max = 1.0
count = 1095


In [None]:
# Reconstruction-sensitivity score.

import pickle
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch_geometric.nn import GATConv
from torch_geometric.data import Data

GRAPH_FILE = "Graph1"
FEATURE_FILE = "node_features.pkl"
OUTPUT_FILE = "reconstruction_importance.pkl"


# ------------ Load Graph ------------
with open(GRAPH_FILE, "rb") as f:
    graph = pickle.load(f)

# ------------ Load Features ------------
with open(FEATURE_FILE, "rb") as f:
    feat_dict = pickle.load(f)


# ------------ Build Node Index Map ------------
all_nodes = set(graph.keys())
for nbrs in graph.values():
    all_nodes.update(nbrs.keys())

nodes = sorted(all_nodes)
node2idx = {n: i for i, n in enumerate(nodes)}
idx2node = {i: n for n, i in node2idx.items()}

N = len(nodes)
F_DIM = 100   # feature dimension


# ------------ Build Feature Matrix ------------
X = np.zeros((N, F_DIM), dtype=np.float32)

for n in nodes:
    X[node2idx[n]] = feat_dict[n]   # already 100-dim

x = torch.tensor(X)


# ------------ Build Edge Index ------------
edge_list = []
for u, nbrs in graph.items():
    for v in nbrs.keys():
        edge_list.append([node2idx[u], node2idx[v]])

edge_index = torch.tensor(edge_list, dtype=torch.long).t()


# ------------ GAT Autoencoder ------------
class GATAE(nn.Module):
    def __init__(self):
        super().__init__()
        self.enc1 = GATConv(F_DIM, 64)
        self.enc2 = GATConv(64, 32)

        self.dec1 = nn.Linear(32, 64)
        self.dec2 = nn.Linear(64, F_DIM)

    def forward(self, x, edge_index):
        z = F.relu(self.enc1(x, edge_index))
        z = self.enc2(z, edge_index)
        h = F.relu(self.dec1(z))
        out = self.dec2(h)
        return out


model = GATAE()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

data = Data(x=x, edge_index=edge_index)


# ------------ Train Autoencoder ------------
EPOCHS = 200

for epoch in range(EPOCHS):
    optimizer.zero_grad()
    recon = model(data.x, data.edge_index)
    loss = F.mse_loss(recon, data.x)
    loss.backward()
    optimizer.step()


# ------------ Baseline Reconstruction Error (E) ------------
with torch.no_grad():
    recon = model(data.x, data.edge_index)
    E = F.mse_loss(recon, data.x).item()


# ------------ Compute E(-v) and i_rec(v) ------------
irec = {}

for i in range(N):

    # mask features (zero vector)
    x_mask = data.x.clone()
    x_mask[i] = torch.zeros(F_DIM)

    # remove all edges touching node i
    mask = (data.edge_index[0] != i) & (data.edge_index[1] != i)
    ei_masked = data.edge_index[:, mask]

    with torch.no_grad():
        recon_mask = model(x_mask, ei_masked)
        E_minus = F.mse_loss(recon_mask, data.x).item()

    # i_rec = (E(-v) - E) / E
    node_id = idx2node[i]
    irec[node_id] = (E_minus - E) / E


# ------------ Save Output ------------
with open(OUTPUT_FILE, "wb") as f:
    pickle.dump(irec, f)

print(f"Saved reconstruction-sensitivity scores to: {OUTPUT_FILE}")
print("Example entries:")
for k in list(irec.keys())[:5]:
    print(k, ":", irec[k])


Saved reconstruction-sensitivity scores to: reconstruction_importance.pkl
Example entries:
39029 : 0.0008956363493262511
80674 : -0.010644155099980132
189951 : 0.0004975490878306716
299259 : -0.010262024922806106
321158 : 0.05721814510052723


In [8]:
# importance score
import pickle
import numpy as np

REC_FILE = "reconstruction_importance.pkl"
STRUCT_FILE = "structural_importance.pkl"
DIFF_FILE = "diffusion_importance.pkl"

OUTPUT_FILE = "importance_score.pkl"


# -------- Load all three score dicts --------
with open(REC_FILE, "rb") as f:
    rec = pickle.load(f)

with open(STRUCT_FILE, "rb") as f:
    struct_ = pickle.load(f)

with open(DIFF_FILE, "rb") as f:
    diff = pickle.load(f)


# -------- Ensure same node set --------
nodes = set(rec.keys()) | set(struct_.keys()) | set(diff.keys())
nodes = sorted(nodes)


def minmax_normalize(score_dict):
    vals = np.array([score_dict[v] for v in nodes], dtype=float)

    mn, mx = vals.min(), vals.max()

    if mx == mn:
        # avoid divide-by-zero — all become 0
        return {v: 0.0 for v in nodes}

    norm = (vals - mn) / (mx - mn)

    return {v: norm[i] for i, v in enumerate(nodes)}


# -------- Normalize EACH component --------
rec_norm   = minmax_normalize(rec)
struct_norm = minmax_normalize(struct_)
diff_norm   = minmax_normalize(diff)


# -------- Combine --------
final_score = {}

for v in nodes:
    final_score[v] = (
        rec_norm[v] +
        struct_norm[v] +
        diff_norm[v]
    )


# -------- Save --------
with open(OUTPUT_FILE, "wb") as f:
    pickle.dump(final_score, f)

print(f"Saved final node importance scores to: {OUTPUT_FILE}")
print("Example entries:")
for k in nodes[:5]:
    print(k, ":", final_score[k])


Saved final node importance scores to: importance_score.pkl
Example entries:
39029 : 0.29640230284611774
80674 : 0.24343295795453437
189951 : 0.28063709633630457
299259 : 0.4057857889337039
321158 : 1.0915127868652192
