-
Notifications
You must be signed in to change notification settings - Fork 2
/
homophilic_graph.py
189 lines (150 loc) · 5.29 KB
/
homophilic_graph.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import torch
import torch.nn.functional as F
import numpy as np
import numba
import random
import torch.nn as nn
from tqdm import tqdm
from HeterophilicGraph import normalize_matrix
from sklearn.metrics.pairwise import cosine_similarity
#* this is core code for construct S, part 6
def A_final(S, nbs=50.0) :
S[S < (1 / nbs)] = 0
S[S >= (1 / nbs)] = 1
S[S > 0] = 1
S = S.T + S
S[S >= 1] = 1
S[S < 1] = 0
return S
#* this is core code for construct S, part 1
@numba.jit
def op_tmp(A, K) :
L = A.shape[0]
A_2 = A.dot(A)
N, M = np.zeros_like(A), np.zeros_like(A)
for i in range(L) :
for j in range(L) :
sum_s_jf_2 = 0
sum_s_jf_Cf = 0
for f in range(L) :
C_f = A_2[i, f] - A[j, f] * A[i, j] - A[i, f]
if f != j :
sum_s_jf_2 += A[j][f] ** 2
sum_s_jf_Cf += A[j][f] * C_f
s_ij_N = 2 * A_2[i, j] - K[i, j] - 2 * sum_s_jf_Cf
s_ij_M = 4 + 2 * sum_s_jf_2
N[i, j] = s_ij_N
M[i, j] = s_ij_M
return N, M
#* this is core code for construct S, part 2
@numba.jit
def op_S(lb2, N, M):
S = (N.T + lb2 ) / M.T
S[S<0] = 0
return S.T
@numba.jit
def create_W(X, scale=16) :
N = X.shape[0]
S = np.zeros((N, N))
for i in range(N) :
for j in range(N) :
S[i, j] += np.linalg.norm(X[i] - X[j], ord=2) / scale
return S
#* this is core code for construct S, part 3
def compute_NM(dataname="Texas", X=None, A=None) :
I = np.eye(A.shape[0])
A = A + I
A = normalize_matrix(A)
K = create_W(X)
N, M = op_tmp(A=A, K=K)
# np.save('./N_{}.npy'.format(dataname), N)
# np.save('./M_{}.npy'.format(dataname), M)
# print(N, M)
return N, M
#* this is core code for construct S, part 4
def compute_A_bar(N, M, dataname="Texas") :
# S = np.zeros_like(N)
lb2 = []
for i in tqdm(range(N.shape[0])) :
lb = optimize_lbd2(N[i], M[i])
lb2.append(lb)
lb2 = np.array(lb2)
S = op_S(lb2, N, M)
# np.save("./S_{}.npy".format(dataname), S)
return S
#* this is core code for construct S, part 5
#* This is for compute \lambda_2 via gradient descent
#* In fact, this is a approximate solution
#* You can also use the exact solution, but it is too slow
class lambda_2(nn.Module):
def __init__(self):
super(lambda_2, self).__init__()
self.lbd = nn.Parameter(torch.Tensor(1))
self.reset_parameter()
def reset_parameter(self):
self.lbd.data.fill_(1e-3)
def forward(self, N_i, M_i):
Lbd = F.relu(self.lbd)
obj = F.relu((N_i + Lbd)/M_i)
return obj.sum()
def optimize_lbd2(N_i, M_i, epochs=1000, learning_rate=1e-4, convengence=1e-16):
N_ii = torch.from_numpy(N_i)
seed = 666666
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
N_ii = N_ii.cuda()
M_ii = torch.from_numpy(M_i).cuda()
model = lambda_2()
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
# model.reset_parameter()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
loss_last = torch.zeros(1).cuda()
for epoch in range(epochs):
optimizer.zero_grad()
obj = model(N_ii, M_ii)
# print(obj.sum())
loss = 10 * torch.square(obj - 1)
if torch.abs_(loss_last - loss) < convengence * loss_last:
break
else:
loss_last = loss.clone()
loss.backward()
optimizer.step()
return F.relu(model.lbd).item()
#* Compute the homophily of the graph
def homophily_v2(A, labels, ignore_negative=False) :
""" gives edge homophily, i.e. proportion of edges that are intra-class
compute homophily of classes in labels vector
See Zhu et al. 2020 "Beyond Homophily ..."
if ignore_negative = True, then only compute for edges where nodes both have
nonnegative class labels (negative class labels are treated as missing
"""
src_node, targ_node = A.nonzero()
matching = labels[src_node] == labels[targ_node]
labeled_mask = (labels[src_node] >= 0) * (labels[targ_node] >= 0)
if ignore_negative :
edge_hom = np.mean(matching[labeled_mask])
else :
edge_hom = np.mean(matching)
return edge_hom
if __name__ == '__main__':
#! A simple example on Cora dataset
from load_data import Swither
from HeterophilicGraph import remove_self_loop, construct_heterophilic_graph
import matplotlib.pyplot as plt
X, A, gnd = Swither[2](dataname="Cora")
N, M = compute_NM(dataname="Cora", X=X, A=A)
lb2 = []
for i in tqdm(range(N.shape[0])):
lb = optimize_lbd2(N[i], M[i])
lb2.append(lb)
lb2 = np.array(lb2)
S = op_S(lb2, N, M)
S = A_final(S)
print("homophily", homophily_v2(A, gnd)) #! 0.8137
print("homophily", homophily_v2(S, gnd)) #! 0.8691 higher than the reported result
print("edges of A", np.count_nonzero(A)) #! 5429
print("edges of S", np.count_nonzero(S)) #! 9153