<a href="https://colab.research.google.com/github/AnasAito/Geom-viz/blob/master/geom_viz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [100]:
## libraries 
!pip install dgl
!pip install pyvis
! pip install node2vec

Collecting node2vec
  Downloading https://files.pythonhosted.org/packages/87/c4/8e859a1099d78dbb00b25c6832b8ee9fe11110cc7f2f3a6a4bd37ada3185/node2vec-0.4.3.tar.gz
Building wheels for collected packages: node2vec
  Building wheel for node2vec (setup.py) ... [?25l[?25hdone
  Created wheel for node2vec: filename=node2vec-0.4.3-cp37-none-any.whl size=5980 sha256=8fe776547178d905360d3a31d84c3ac5c708e0f9916196744007ee7e72ec5767
  Stored in directory: /root/.cache/pip/wheels/34/f4/60/55708b5e797260310451d919c8f898bbd4acd1901367001f20
Successfully built node2vec
Installing collected packages: node2vec
Successfully installed node2vec-0.4.3


In [15]:
## vanilla CGN network 
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

## msg passing function 
# msg preparation 
gcn_msg = fn.copy_src(src='h', out='m')
#msg aggregation 
gcn_reduce = fn.sum(msg='m', out='h')

class GCNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g, feature):
        # Creating a local scope so that all the stored ndata and edata
        # (such as the `'h'` ndata below) are automatically popped out
        # when the scope exits.
        with g.local_scope():
          ## init h feature vector with feat from input of layer
            g.ndata['h'] = feature
            ## update feat vectors 
            g.update_all(gcn_msg, gcn_reduce)
            ## extract h feat vectors 
            h = g.ndata['h']
            ## projection using linear layer 
            return self.linear(h)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = GCNLayer(1433, 16)
        self.layer2 = GCNLayer(16, 7)

    def forward(self, g, features):
        x = F.relu(self.layer1(g, features))
        x = self.layer2(g, x)
        return x
net = Net()
print(net)

Net(
  (layer1): GCNLayer(
    (linear): Linear(in_features=1433, out_features=16, bias=True)
  )
  (layer2): GCNLayer(
    (linear): Linear(in_features=16, out_features=7, bias=True)
  )
)


In [5]:
## simple train and evaluate loops 
from dgl.data import citation_graph as citegrh
import networkx as nx
from dgl.data import citation_graph as citegrh
import networkx as nx
def load_cora_data():
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    train_mask = th.BoolTensor(data.train_mask)
    test_mask = th.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask

def evaluate(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = th.max(logits, dim=1)
        correct = th.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)



In [74]:
import time
import numpy as np
g, features, labels, train_mask, test_mask = load_cora_data()
net = Net()
optimizer = th.optim.Adam(net.parameters(), lr=1e-2)

dur = []
for epoch in range(50):
    if epoch >=3:
        t0 = time.time()

    net.train()
    logits = net(g, features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >=3:
        dur.append(time.time() - t0)

    acc = evaluate(net, g, features, labels, test_mask)
    acc_train = evaluate(net, g, features, labels, train_mask)
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} |Train Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc,acc_train, np.mean(dur)))

  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done loading data from cached files.
Epoch 00000 | Loss 1.9606 | Test Acc 0.1400 |Train Acc 0.1857 | Time(s) nan


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00001 | Loss 1.8606 | Test Acc 0.2440 |Train Acc 0.2857 | Time(s) nan
Epoch 00002 | Loss 1.7491 | Test Acc 0.2790 |Train Acc 0.3429 | Time(s) nan
Epoch 00003 | Loss 1.6479 | Test Acc 0.3210 |Train Acc 0.4071 | Time(s) 0.0309
Epoch 00004 | Loss 1.5571 | Test Acc 0.3680 |Train Acc 0.5000 | Time(s) 0.0319
Epoch 00005 | Loss 1.4727 | Test Acc 0.4320 |Train Acc 0.5714 | Time(s) 0.0316
Epoch 00006 | Loss 1.3959 | Test Acc 0.5010 |Train Acc 0.6714 | Time(s) 0.0316
Epoch 00007 | Loss 1.3252 | Test Acc 0.5590 |Train Acc 0.7357 | Time(s) 0.0321
Epoch 00008 | Loss 1.2581 | Test Acc 0.6020 |Train Acc 0.7857 | Time(s) 0.0323
Epoch 00009 | Loss 1.1927 | Test Acc 0.6210 |Train Acc 0.7929 | Time(s) 0.0323
Epoch 00010 | Loss 1.1290 | Test Acc 0.6320 |Train Acc 0.8071 | Time(s) 0.0326
Epoch 00011 | Loss 1.0661 | Test Acc 0.6340 |Train Acc 0.8214 | Time(s) 0.0324
Epoch 00012 | Loss 1.0030 | Test Acc 0.6440 |Train Acc 0.8429 | Time(s) 0.0322
Epoch 00013 | Loss 0.9412 | Test Acc 0.6610 |Train Acc 0.8

In [60]:

from pyvis.network import Network
import networkx as nx


g_render = Network('500px', '800px',notebook=True)

# filter edge_data 
def get_edges(g,labels , mask , with_mask = False):
     src,dst = g.all_edges()
     src,dst = src.detach().numpy(),dst.detach().numpy()
     #group_list = labels.detach().numpy()
  
     edge_data = zip(src,dst)

     if with_mask : 
       src =[]
       dst =[]
       filtred_nodes = g.nodes()[mask].detach().numpy()
       for edge in edge_data : 
         #print(edge)
         node_a,node_b = edge
         if (node_a in filtred_nodes) or (node_b in filtred_nodes):
           src.append(node_a)
           dst.append(node_b)
      
       edge_data = zip(np.array(src),np.array(dst))
 
     return edge_data#,group_list

def get_eval_label(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(g, features)
        logits = logits
        labels = labels
        _, indices = th.max(logits, dim=1)
        correct = indices == labels
        return correct
edge_data= get_edges(g,labels , mask = train_mask , with_mask = True)
group_list = get_eval_label(net, g, features, labels, test_mask).detach().numpy()
# populates the nodes and edges data structures
for e in edge_data:
    src = str(e[0])
    dst = str(e[1])
    label_src = str(group_list[e[0]])
    label_dst = str(group_list[e[1]])
   # print(label_src)
    g_render.add_node(src,  title=src, group=label_src)
    g_render.add_node(dst,  title=dst, group=label_dst)
    g_render.add_edge(src, dst,)

g_render.show_buttons(filter_=['physics'])
g_render.show('nx.html')
import IPython
IPython.display.HTML(filename='/content/nx.html')

In [64]:
labels

tensor([3, 4, 4,  ..., 3, 3, 3])

In [98]:
# scatter + error on test 
## subgraph on edges 
### mask on edges  with or on dst ,src 

def get_edges_mask(g,nodes_mask):
     src,dst = g.all_edges()
     src,dst = src.detach().numpy(),dst.detach().numpy()
     #group_list = labels.detach().numpy()
     filtred_nodes = g.nodes()[nodes_mask].detach().numpy()
     edge_data = zip(src,dst)
     mask = [(edge[0] in filtred_nodes) or (edge[1] in filtred_nodes) for edge in edge_data]
     return mask 
mask = test_mask
edges_mask = th.tensor(get_edges_mask(g,mask))  # choose edges [0, 4]
sub_g = dgl.edge_subgraph(g, edges_mask)
# to nx 
sub_g_nx = sub_g.to_networkx().to_undirected()
## add node attrs 
#sub_g_labels =  labels[mask].detach().numpy()
sub_g_labels = get_eval_label(net, g, features, labels, test_mask)[mask].detach().numpy()
sub_g_labels = [('green' if lab else 'red') for lab in sub_g_labels ]
filtred_nodes = g.nodes()[mask].detach().numpy()
zip_iterator = zip(filtred_nodes, sub_g_labels)
group_attr = dict(zip_iterator)
nx.set_node_attributes(sub_g_nx, group_attr, "color")

# relabel 
int2label = { i : str(i) for i in sub_g_nx.nodes()  }
sub_g_nx =  nx.relabel_nodes(sub_g_nx, int2label)


# render
g_render = Network('500px', '800px',notebook=True)
g_render.from_nx(sub_g_nx)
g_render.show('t.html')
import IPython
IPython.display.HTML(filename='/content/t.html') 

In [102]:
# node embeding
import networkx as nx
from node2vec import Node2Vec

# Create a graph
graph = sub_g_nx

# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec = Node2Vec(graph, dimensions=64, walk_length=10, num_walks=50, workers=4)  # Use temp_folder for big graphs

# Embed nodes
model = node2vec.fit(window=3,) 

HBox(children=(FloatProgress(value=0.0, description='Computing transition probabilities', max=2190.0, style=Pr…




In [107]:
import numpy as np
from sklearn.manifold import TSNE
X = model.wv.vectors
X_embedded = TSNE(n_components=2).fit_transform(X)

In [108]:
import plotly.express as px
fig = px.scatter(x=X_embedded[:,0], y=X_embedded[:,1])
fig.show()

# dgl to nx code 