In [782]:
import dgl
import torch
import numpy as np
import networkx as nx
from node2vec import Node2Vec
import matplotlib.pyplot as plt
from operator import itemgetter
import scipy.sparse as sp
import torch.nn as nn
import torch.nn.functional as F
from sklearn.neighbors import NearestNeighbors
import dgl.function as fn
import dgl.nn.pytorch as dglnn
import itertools
from dgl.nn import GraphConv
from dgl.nn import SumPooling
from dgl.nn import DenseGraphConv
from dgl.nn import SAGEConv
import dgl.data

In [783]:
class GraphSAGE(nn.Module):
    def __init__(self, in_feats, h_feats):
        super().__init__()
        self.conv1 = SAGEConv(in_feats, h_feats, 'mean')
        self.conv2 = SAGEConv(h_feats, h_feats, 'mean')

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        return h

In [784]:
class MLPPredictor(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W1 = nn.Linear(h_feats * 2, h_feats)
        self.W2 = nn.Linear(h_feats, 1)

    def apply_edges(self, edges):
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']

In [785]:
class FC(nn.Module):
    def __init__(self, h_feats):
        super().__init__()
        self.W1 = nn.Linear(h_feats*2, h_feats*2)
        self.W2 = nn.Linear(h_feats*2, h_feats)

    def apply_edges(self, edges):
        h = torch.cat([edges.src['h'], edges.dst['h']], 1)
        return {'score': self.W2(F.relu(self.W1(h))).squeeze(1)}

    def forward(self, g, h):
        with g.local_scope():
            g.ndata['h'] = h
            g.apply_edges(self.apply_edges)
            return g.edata['score']

In [786]:
def positive_sample(graph, test_size=0.1):
    u, v = graph.edges()
    eids = np.random.permutation(np.arange(graph.number_of_edges())) #random index edges
    test_size_idx = int(len(eids) * test_size) #size positive sample by index

    test_pos_u, test_pos_v = u[eids[:test_size_idx]], v[eids[:test_size_idx]]
    train_pos_u, train_pos_v = u[eids[test_size_idx:]], v[eids[test_size_idx:]] 
    
    train_pos_g = dgl.graph((train_pos_u, train_pos_v), num_nodes=graph.number_of_nodes())
    test_pos_g = dgl.graph((test_pos_u, test_pos_v), num_nodes=graph.number_of_nodes())
    
    return train_pos_g, test_pos_g

In [787]:
# dataset = dgl.data.CoraGraphDataset()
# g = dataset[0]
# G = g.to_networkx()

# edges = [
#         ['7', '6'],
#         ['6', '5'],
#         ['5', '4'],
#         ['5', '2'],
#         ['4', '3'],
#         ['2', '1'],
#         ['2', '3'],
#         ['3', '1'],
#         ]
# G = nx.Graph()
# G.add_edges_from(edges)
# g = dgl.from_networkx(G)

u = torch.tensor([6,5,4,4,3,1,1,2])
v = torch.tensor([5,4,3,1,2,0,2,0])
g = dgl.graph((u, v))
G = g.to_networkx()
print(len(list(G.nodes())))
print(len(list(G.edges())))

7
8


In [788]:
print(list(G.nodes()))
print(list(g.nodes()))

[0, 1, 2, 3, 4, 5, 6]
[tensor(0), tensor(1), tensor(2), tensor(3), tensor(4), tensor(5), tensor(6)]


In [789]:
print(list(G.edges()))
print(list(g.edges()))

[(1, 0), (1, 2), (2, 0), (3, 2), (4, 3), (4, 1), (5, 4), (6, 5)]
[tensor([6, 5, 4, 4, 3, 1, 1, 2]), tensor([5, 4, 3, 1, 2, 0, 2, 0])]


In [790]:
def alternate_list(a,b):
    c = list()
    for x in range(len(a)):
        c.extend([a[x], b[x]])
    return c

In [791]:
node2vec = Node2Vec(G, dimensions=3, walk_length=3)#, workers=6)
model_n2v = node2vec.fit(window=3, min_count=1)
embeddings = np.array([model_n2v.wv[x] for x in list(G.nodes())])
embeddings = torch.from_numpy(embeddings)
g.ndata['feat'] = embeddings                       # f_u

Computing transition probabilities:   0%|          | 0/7 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 5872.73it/s]


In [792]:
print(list(G.nodes()))
print(list(g.nodes()))

[0, 1, 2, 3, 4, 5, 6]
[tensor(0), tensor(1), tensor(2), tensor(3), tensor(4), tensor(5), tensor(6)]


In [793]:
embeddings

tensor([[-0.0178,  0.0078,  0.1701],
        [ 0.3004, -0.3102, -0.2373],
        [ 0.2153,  0.2991, -0.1672],
        [-0.1254,  0.2460, -0.0512],
        [-0.1512,  0.2184, -0.1620],
        [-0.0604,  0.0958,  0.0330],
        [-0.2761, -0.3150,  0.2437]])

In [794]:
g.ndata['feat']

tensor([[-0.0178,  0.0078,  0.1701],
        [ 0.3004, -0.3102, -0.2373],
        [ 0.2153,  0.2991, -0.1672],
        [-0.1254,  0.2460, -0.0512],
        [-0.1512,  0.2184, -0.1620],
        [-0.0604,  0.0958,  0.0330],
        [-0.2761, -0.3150,  0.2437]])

In [795]:
model_n2v.wv[0]

array([-0.01777828,  0.00778672,  0.17005034], dtype=float32)

In [796]:
eids = np.random.permutation(np.arange(g.number_of_edges()))
train_g = g#dgl.remove_edges(g, eids[:int(len(eids) * 0.1)]) #subgraph
print(train_g)

Graph(num_nodes=7, num_edges=8,
      ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={})


### line graph

In [797]:
temp_G = nx.Graph()
temp_G.add_edges_from(list(G.edges()))
LnxG = nx.line_graph(temp_G)
# #lg = g.line_graph(backtracking=False)
# #lg = dgl.from_networkx(G).line_graph(backtracking=False)
# lg = dgl.from_networkx(LnxG)
# LG = lg.to_networkx()

# dual_nodes_dict = {}
# for idx, val in enumerate(list(LnxG.nodes())):
#     dual_nodes_dict[val] = idx


In [798]:
print(sorted(list(LnxG.nodes()),reverse=False))
LnxG.edges()

[(0, 1), (0, 2), (1, 2), (1, 4), (2, 3), (3, 4), (4, 5), (5, 6)]


EdgeView([((5, 6), (4, 5)), ((1, 4), (3, 4)), ((1, 4), (1, 2)), ((1, 4), (0, 1)), ((1, 4), (4, 5)), ((3, 4), (2, 3)), ((3, 4), (4, 5)), ((0, 2), (1, 2)), ((0, 2), (2, 3)), ((0, 2), (0, 1)), ((1, 2), (2, 3)), ((1, 2), (0, 1))])

In [799]:
def create_dgl_nx_dual_graph(line_nx_graph):
    nodes = sorted(list(line_nx_graph.nodes()),reverse=False)
    edges = list(line_nx_graph.edges())
    nodes_dict = {}
    new_u, new_v = [], []
    
    for idx, val in enumerate(nodes):
        nodes_dict[val] = idx
    
    for edge in edges:
        new_u.append(nodes_dict[edge[0]])
        new_v.append(nodes_dict[edge[1]])
    
    u = torch.tensor(new_u)
    v = torch.tensor(new_v)
    g = dgl.graph((u, v))
    G = g.to_networkx()
    return g, G
    
    
lg, LG = create_dgl_nx_dual_graph(LnxG)
dual_edges_dict = {edge: num for num, edge in enumerate(list(LG.edges()))}
dual_nodes_dict = {node: num for num, node in enumerate(sorted(list(LnxG.nodes()),reverse=False))}

In [800]:
print(dual_nodes_dict)
print(dual_edges_dict)
print(len(list(LG.nodes)), list(LG.nodes))
print(len(list(LG.edges)))
print(list(LG.edges()))

{(0, 1): 0, (0, 2): 1, (1, 2): 2, (1, 4): 3, (2, 3): 4, (3, 4): 5, (4, 5): 6, (5, 6): 7}
{(1, 2): 0, (1, 4): 1, (1, 0): 2, (2, 4): 3, (2, 0): 4, (3, 5): 5, (3, 2): 6, (3, 0): 7, (3, 6): 8, (5, 4): 9, (5, 6): 10, (7, 6): 11}
8 [0, 1, 2, 3, 4, 5, 6, 7]
12
[(1, 2), (1, 4), (1, 0), (2, 4), (2, 0), (3, 5), (3, 2), (3, 0), (3, 6), (5, 4), (5, 6), (7, 6)]


In [801]:
print(len(list(lg.nodes())), list(lg.nodes()))
print(len(list(lg.edges())[0]))
list(lg.edges())

8 [tensor(0), tensor(1), tensor(2), tensor(3), tensor(4), tensor(5), tensor(6), tensor(7)]
12


[tensor([7, 3, 3, 3, 3, 5, 5, 1, 1, 1, 2, 2]),
 tensor([6, 5, 2, 0, 6, 4, 6, 2, 4, 0, 4, 0])]

In [802]:
model_n2v_dual.wv[1]

array([ 0.3003091 , -0.31009832, -0.23722696], dtype=float32)

In [803]:
m = nn.AvgPool1d(2, stride=2)
node2vec = Node2Vec(LnxG, dimensions=3, walk_length=3)
model_n2v_dual = node2vec.fit(window=3, min_count=1)
#embeddings_dual = [[alternate_list(model_n2v_dual.wv[x][0],model_n2v_dual.wv[x][1]) for x in list(LnxG.nodes)]]
embeddings_dual = [model_n2v_dual.wv[x] for x in list(LG.nodes)]
#embeddings_dual = m(torch.tensor(embeddings_dual))[0]
#embeddings_dual = (torch.tensor(embeddings_dual))[0]
embeddings_dual = torch.tensor(embeddings_dual)
lg.ndata['feat'] = embeddings_dual                #f_uv^*

Computing transition probabilities:   0%|          | 0/8 [00:00<?, ?it/s]

Generating walks (CPU: 1): 100%|██████████| 10/10 [00:00<00:00, 12706.16it/s]


In [804]:
lg.ndata['feat']

tensor([[-0.0178,  0.0084,  0.1703],
        [ 0.3004, -0.3099, -0.2372],
        [ 0.2159,  0.3000, -0.1677],
        [-0.1253,  0.2462, -0.0513],
        [-0.1512,  0.2187, -0.1620],
        [-0.0604,  0.0961,  0.0330],
        [-0.2759, -0.3143,  0.2436],
        [ 0.1691,  0.2252,  0.0254]])

In [805]:
embeddings_dual

tensor([[-0.0178,  0.0084,  0.1703],
        [ 0.3004, -0.3099, -0.2372],
        [ 0.2159,  0.3000, -0.1677],
        [-0.1253,  0.2462, -0.0513],
        [-0.1512,  0.2187, -0.1620],
        [-0.0604,  0.0961,  0.0330],
        [-0.2759, -0.3143,  0.2436],
        [ 0.1691,  0.2252,  0.0254]])

In [806]:
lg.nodes()

tensor([0, 1, 2, 3, 4, 5, 6, 7])

In [807]:
dual_eids = np.random.permutation(np.arange(dual_g.number_of_edges()))
train_dual_g = lg#dgl.remove_edges(dual_g, eids[:int(len(dual_eids) * 0.1)]) #subgraph
print(train_dual_g)

Graph(num_nodes=8, num_edges=12,
      ndata_schemes={'feat': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={})


In [808]:
# print(list(train_dual_g.nodes))
# list(train_dual_g.edges)
#list(train_dual_g.nodes())

In [809]:
model = GraphSAGE(train_g.ndata['feat'].shape[1], 3)
#pred = MLPPredictor(20)
FC_net = FC(3)
###############################################################
model_dual = GraphSAGE(train_dual_g.ndata['feat'].shape[1], 3)
#pred_dual = MLPPredictor(20)
FC_net_dual = FC(3)
#model = SAGE(train_g.ndata['feat'].shape[1], 20, 4, F.relu, 0.25)
#pred = DotPredictor()

In [810]:
train_pos_g, test_pos_g = positive_sample(g)
#train_neg_g, test_neg_g = negative_sample(g, 'dgl_example')
###########################################################
train_pos_dual_g, test_pos_dual_g = positive_sample(dual_g)
#train_neg_dual_g, test_neg_dual_g = negative_sample(dual_g, 'dgl_example')

In [811]:
print(dual_nodes_dict)
print(dual_edges_dict)
list(LnxG.edges())

{(0, 1): 0, (0, 2): 1, (1, 2): 2, (1, 4): 3, (2, 3): 4, (3, 4): 5, (4, 5): 6, (5, 6): 7}
{(1, 2): 0, (1, 4): 1, (1, 0): 2, (2, 4): 3, (2, 0): 4, (3, 5): 5, (3, 2): 6, (3, 0): 7, (3, 6): 8, (5, 4): 9, (5, 6): 10, (7, 6): 11}


[((5, 6), (4, 5)),
 ((1, 4), (3, 4)),
 ((1, 4), (1, 2)),
 ((1, 4), (0, 1)),
 ((1, 4), (4, 5)),
 ((3, 4), (2, 3)),
 ((3, 4), (4, 5)),
 ((0, 2), (1, 2)),
 ((0, 2), (2, 3)),
 ((0, 2), (0, 1)),
 ((1, 2), (2, 3)),
 ((1, 2), (0, 1))]

In [812]:
def g_u_star(G, LnxG, pos_score_dual):
    node_features = np.zeros((G.number_of_nodes(), 3))
    counts = np.zeros((G.number_of_nodes(), 1))

    for node in list(G.nodes()):
        for i, edge in enumerate(list(LnxG.edges())):
            if (node in edge[0])or(node in edge[1]):
                n1, n2 = dual_nodes_dict[edge[0]], dual_nodes_dict[edge[1]]
                #print(edge, n1, n2)
                try:
                    num_embd_edge_G_star = dual_edges_dict[(n1, n2)]
                    node_features[node] += pos_score_dual[num_embd_edge_G_star].detach().numpy()
                    counts[node] += 1
                    #print(pos_score_dual[num_embd_edge_G_star])
                except: print('NUN', edge)
        #print(counts)
        node_features[node]/=counts[node]
        
    return torch.from_numpy(node_features)

In [813]:
def compute_loss_1(z_u, g_u_star, g_uv, z_uv_star):
    alfa, beta = 0.5, 1.
    return alfa*((z_u - g_u_star)**2).mean()+ beta*((g_uv - z_uv_star)**2).mean()
    #return alfa*F.binary_cross_entropy_with_logits(z_u, g_u_star)

In [815]:
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), FC_net.parameters()), lr=0.01)
optimizer_d = torch.optim.Adam(itertools.chain(model_dual.parameters(), FC_net_dual.parameters()), lr=0.01)

all_logits = []
for e in range(200):
    h = model(train_g, embeddings)                              #z_u
    h_dual = model_dual(train_dual_g, embeddings_dual)          #z_uv^*
    
    pos_score = FC_net(train_g, h)                              #g_uv
    #neg_score = pred(train_neg_g, h)                           #g_uv -
    pos_score_dual = FC_net_dual(train_dual_g, h_dual)          #g_u^*
    #neg_score_dual = pred(train_neg_dual_g, h_dual)            #g_u^* -
    g_u_s = g_u_star(G, LnxG, pos_score_dual)
    #loss = compute_loss(pos_score, neg_score) 
    loss = compute_loss_1(h, g_u_s, pos_score, h_dual)
    
    # print(h.shape)
    # print(h_dual.shape)
    # print(pos_score.shape)
    # print(pos_score_dual.shape)
    # print(g_u_s.shape)
    # print(loss, F.mse_loss(h, g_u_s), F.mse_loss(pos_score, h_dual))
    
    optimizer.zero_grad()
    optimizer_d.zero_grad()
    loss.backward()
    optimizer.step()
    optimizer_d.step()

    if e % 5 == 0:
        print('In epoch {}, loss: {}'.format(e, loss))


from sklearn.metrics import roc_auc_score
#with torch.no_grad():
    #pos_score = pred(test_pos_g, h)
    #neg_score = pred(test_neg_g, h)
    #print('AUC', compute_auc(pos_score, neg_score))
    
# torch.Size([7, 3])
# torch.Size([8, 3])
# torch.Size([8, 3])
# torch.Size([12, 3])


In epoch 0, loss: 0.01277326616859916
In epoch 5, loss: 0.01681630470207198
In epoch 10, loss: 0.010242130856540651
In epoch 15, loss: 0.008550427610994438
In epoch 20, loss: 0.007389001131371489
In epoch 25, loss: 0.00577109339677789
In epoch 30, loss: 0.004620463219397593
In epoch 35, loss: 0.003951856506024801
In epoch 40, loss: 0.0033986713514497155
In epoch 45, loss: 0.0029529082138656086
In epoch 50, loss: 0.002652940439605037
In epoch 55, loss: 0.002427889413865857
In epoch 60, loss: 0.0022473620735191245
In epoch 65, loss: 0.0020936153244302718
In epoch 70, loss: 0.001961116767713322
In epoch 75, loss: 0.0018484154059261464
In epoch 80, loss: 0.0017473560971392364
In epoch 85, loss: 0.0016541980818947403
In epoch 90, loss: 0.0015641118044984817
In epoch 95, loss: 0.0014805688572663653
In epoch 100, loss: 0.0014033087895842958
In epoch 105, loss: 0.0013311204580587922
In epoch 110, loss: 0.0012619693389666693
In epoch 115, loss: 0.0011965469031552667
In epoch 120, loss: 0.001135

In [19]:
##################################################################################################################################################################

In [480]:
h

tensor([[ 0.1895,  0.2487, -0.6351],
        [-0.4397, -0.1313, -0.2609],
        [-0.0283,  0.1321, -0.2620],
        [-0.1150, -0.0431, -0.2199],
        [ 0.0915,  0.4219, -0.5364],
        [-0.4543,  0.4381, -1.2338],
        [-0.2357, -0.0734, -0.5800]], grad_fn=<AddBackward0>)

In [481]:
h_dual

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 4.4662e-01,  2.7341e-01,  1.8396e-02],
        [ 1.6291e+00,  4.9753e-01, -6.8852e-02],
        [ 1.1003e+00,  2.7436e-04,  6.5087e-02],
        [-8.0129e-02, -4.1977e-01, -4.7048e-01],
        [-4.8734e-02, -3.3388e-01, -2.9173e-01],
        [ 9.1707e-02, -5.8726e-01, -5.1457e-01],
        [-3.4101e-01,  6.5786e-01,  3.7019e-01]], grad_fn=<AddBackward0>)

In [483]:
pos_score

tensor([[-0.0212,  0.0677, -0.1386],
        [-0.0866,  0.1284, -0.1042],
        [ 0.1637,  0.1365, -0.0231],
        [ 0.1685,  0.1659,  0.0049],
        [ 0.0629,  0.0803, -0.0989],
        [-0.0652,  0.0914, -0.1452],
        [-0.0118,  0.0758, -0.1214],
        [ 0.0554,  0.0850, -0.0968]], grad_fn=<SqueezeBackward1>)

In [482]:
embeddings_dual

tensor([[-0.0179,  0.0079,  0.1701],
        [ 0.3003, -0.3101, -0.2372],
        [ 0.2153,  0.2991, -0.1672],
        [-0.1254,  0.2460, -0.0511],
        [-0.1512,  0.2185, -0.1620],
        [-0.0605,  0.0959,  0.0331],
        [-0.2762, -0.3150,  0.2437],
        [ 0.1690,  0.2253,  0.0254]])

In [40]:
optimizer = torch.optim.Adam(itertools.chain(model.parameters(), pred.parameters()), lr=0.01)

all_logits = []
for e in range(100):
    h = model(train_dual_g, embeddings)  #train_g.ndata['feat'])
    pos_score = pred(train_pos_g, h)
    neg_score = pred(train_neg_g, h)
    loss = compute_loss(pos_score, neg_score)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if e % 5 == 0:
        print('In epoch {}, loss: {}'.format(e, loss))


from sklearn.metrics import roc_auc_score
with torch.no_grad():
    pos_score = pred(test_pos_g, h)
    neg_score = pred(test_neg_g, h)
    print('AUC', compute_auc(pos_score, neg_score))

In epoch 0, loss: 0.706829845905304
In epoch 5, loss: 0.6571868062019348
In epoch 10, loss: 0.6407327055931091
In epoch 15, loss: 0.6243399381637573
In epoch 20, loss: 0.6006797552108765
In epoch 25, loss: 0.5711714625358582
In epoch 30, loss: 0.546612560749054
In epoch 35, loss: 0.5299765467643738
In epoch 40, loss: 0.5167433023452759
In epoch 45, loss: 0.5066409111022949
In epoch 50, loss: 0.4984222650527954
In epoch 55, loss: 0.49076011776924133
In epoch 60, loss: 0.48473072052001953
In epoch 65, loss: 0.48049843311309814
In epoch 70, loss: 0.4720494747161865
In epoch 75, loss: 0.4649830758571625
In epoch 80, loss: 0.4580879807472229
In epoch 85, loss: 0.44973331689834595
In epoch 90, loss: 0.44106853008270264
In epoch 95, loss: 0.4309082329273224
AUC 0.8564272029271526


In [None]:
train_g = dgl.remove_edges(g, eids[:1000], ) #subgraph
train_g = dgl.remove_nodes(train_g, range(2000))
print(train_g)

label2 = train_g.ndata['label']
nx_G2 = train_g.to_networkx()

#visualize(label2, nx_G2)

# TRASH

In [None]:
'''
data_dir = os.path.expanduser("~/data/cora")

data_dir
#edgelist = pd.read_csv(os.path.join(data_dir, "cora.cites"), sep='\t', header=None, names=["target", "source"])

edgelist = pd.read_csv(f"./data/cora/cora.cites", sep='\t', header=None, names=["target", "source"])
edgelist["label"] = "cites"

edgelist.sample(frac=1).head(5)

Gnx = nx.from_pandas_edgelist(edgelist, edge_attr="label")
#nx.set_node_attributes(Gnx, "paper", "label")

In [572]:
nodelist = sorted(G.nodes())
adj_mat = sp.csr_matrix(nx.to_numpy_matrix(G, nodelist))
args_d = LnxG.number_of_nodes()
edgelist = list(LnxG.nodes())

edge_embs = h_dual
enum = {node: num for num, node in enumerate(list(G.nodes()))}
print(enum)
num_nodes = adj_mat.shape[0]
node_features = np.zeros((num_nodes, args_d))
counts = np.ones(num_nodes)
for i, edge in enumerate(edgelist):
    # средние по эмбеддингам рёбер, связанных с данной вершиной
    print(i, edge)
    u = enum[edge[0]]; v = enum[edge[1]]
    print(u, v)
    node_features[u, :] += edge_embs[i, :]
    node_features[v, :] -= edge_embs[i, :]
    counts[u] += 1; counts[v] += 1
node_features /= counts[:, np.newaxis]

{0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 6}
0 (5, 6)
5 6


TypeError: Concatenation operation is not implemented for NumPy arrays, use np.concatenate() instead. Please do not rely on this error; it may not be given on all Python implementations.