<a href="https://colab.research.google.com/github/AnasAito/Geom-viz/blob/master/geom_viz.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
## libraries 
!pip install dgl
!pip install pyvis
! pip install node2vec

Collecting dgl
[?25l  Downloading https://files.pythonhosted.org/packages/71/c4/ce24841375cf4393787dbf9a645e271c19a03d2d9a0e5770b08ba76bcfde/dgl-0.6.1-cp37-cp37m-manylinux1_x86_64.whl (4.4MB)
[K     |████████████████████████████████| 4.4MB 13.6MB/s 
Installing collected packages: dgl
Successfully installed dgl-0.6.1
Collecting pyvis
  Downloading https://files.pythonhosted.org/packages/07/d1/e87844ec86e96df7364f21af2263ad6030c0d727660ae89935c7af56a540/pyvis-0.1.9-py3-none-any.whl
Collecting jsonpickle>=1.4.1
  Downloading https://files.pythonhosted.org/packages/bb/1a/f2db026d4d682303793559f1c2bb425ba3ec0d6fd7ac63397790443f2461/jsonpickle-2.0.0-py2.py3-none-any.whl
Installing collected packages: jsonpickle, pyvis
Successfully installed jsonpickle-2.0.0 pyvis-0.1.9
Collecting node2vec
  Downloading https://files.pythonhosted.org/packages/87/c4/8e859a1099d78dbb00b25c6832b8ee9fe11110cc7f2f3a6a4bd37ada3185/node2vec-0.4.3.tar.gz
Building wheels for collected packages: node2vec
  Building w

In [2]:
## vanilla CGN network 
import dgl
import dgl.function as fn
import torch as th
import torch.nn as nn
import torch.nn.functional as F
from dgl import DGLGraph

## msg passing function 
# msg preparation 
gcn_msg = fn.copy_src(src='h', out='m')
#msg aggregation 
gcn_reduce = fn.sum(msg='m', out='h')

class GCNLayer(nn.Module):
    def __init__(self, in_feats, out_feats):
        super(GCNLayer, self).__init__()
        self.linear = nn.Linear(in_feats, out_feats)

    def forward(self, g, feature):
        # Creating a local scope so that all the stored ndata and edata
        # (such as the `'h'` ndata below) are automatically popped out
        # when the scope exits.
        with g.local_scope():
          ## init h feature vector with feat from input of layer
            g.ndata['h'] = feature
            ## update feat vectors 
            g.update_all(gcn_msg, gcn_reduce)
            ## extract h feat vectors 
            h = g.ndata['h']
            ## projection using linear layer 
            return self.linear(h)

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.layer1 = GCNLayer(1433, 16)
        self.layer2 = GCNLayer(16, 7)

    def forward(self, g, features):
        x = F.relu(self.layer1(g, features))
        x = self.layer2(g, x)
        return x
net = Net()
print(net)

Setting the default backend to "pytorch". You can change it in the ~/.dgl/config.json file or export the DGLBACKEND environment variable.  Valid options are: pytorch, mxnet, tensorflow (all lowercase)


DGL backend not selected or invalid.  Assuming PyTorch for now.
Using backend: pytorch


Net(
  (layer1): GCNLayer(
    (linear): Linear(in_features=1433, out_features=16, bias=True)
  )
  (layer2): GCNLayer(
    (linear): Linear(in_features=16, out_features=7, bias=True)
  )
)


In [3]:
## simple train and evaluate loops 
from dgl.data import citation_graph as citegrh
import networkx as nx
from dgl.data import citation_graph as citegrh
import networkx as nx
def load_cora_data():
    data = citegrh.load_cora()
    features = th.FloatTensor(data.features)
    labels = th.LongTensor(data.labels)
    train_mask = th.BoolTensor(data.train_mask)
    test_mask = th.BoolTensor(data.test_mask)
    g = DGLGraph(data.graph)
    return g, features, labels, train_mask, test_mask

def evaluate(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(g, features)
        logits = logits[mask]
        labels = labels[mask]
        _, indices = th.max(logits, dim=1)
        correct = th.sum(indices == labels)
        return correct.item() * 1.0 / len(labels)



In [4]:
import time
import numpy as np
g, features, labels, train_mask, test_mask = load_cora_data()
net = Net()
optimizer = th.optim.Adam(net.parameters(), lr=1e-2)

dur = []
for epoch in range(50):
    if epoch >=3:
        t0 = time.time()

    net.train()
    logits = net(g, features)
    logp = F.log_softmax(logits, 1)
    loss = F.nll_loss(logp[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch >=3:
        dur.append(time.time() - t0)

    acc = evaluate(net, g, features, labels, test_mask)
    acc_train = evaluate(net, g, features, labels, train_mask)
    print("Epoch {:05d} | Loss {:.4f} | Test Acc {:.4f} |Train Acc {:.4f} | Time(s) {:.4f}".format(
            epoch, loss.item(), acc,acc_train, np.mean(dur)))

Downloading /root/.dgl/cora_v2.zip from https://data.dgl.ai/dataset/cora_v2.zip...
Extracting file to /root/.dgl/cora_v2
Finished data loading and preprocessing.
  NumNodes: 2708
  NumEdges: 10556
  NumFeats: 1433
  NumClasses: 7
  NumTrainingSamples: 140
  NumValidationSamples: 500
  NumTestSamples: 1000
Done saving data into cached files.


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Epoch 00000 | Loss 1.9466 | Test Acc 0.3610 |Train Acc 0.2786 | Time(s) nan
Epoch 00001 | Loss 1.8027 | Test Acc 0.3800 |Train Acc 0.3714 | Time(s) nan
Epoch 00002 | Loss 1.6946 | Test Acc 0.4940 |Train Acc 0.4929 | Time(s) nan
Epoch 00003 | Loss 1.5949 | Test Acc 0.5880 |Train Acc 0.6071 | Time(s) 0.0349
Epoch 00004 | Loss 1.5015 | Test Acc 0.6510 |Train Acc 0.6857 | Time(s) 0.0356
Epoch 00005 | Loss 1.4218 | Test Acc 0.6510 |Train Acc 0.7071 | Time(s) 0.0350
Epoch 00006 | Loss 1.3509 | Test Acc 0.6540 |Train Acc 0.7429 | Time(s) 0.0346
Epoch 00007 | Loss 1.2859 | Test Acc 0.6600 |Train Acc 0.7786 | Time(s) 0.0345
Epoch 00008 | Loss 1.2248 | Test Acc 0.6710 |Train Acc 0.7929 | Time(s) 0.0346
Epoch 00009 | Loss 1.1650 | Test Acc 0.6810 |Train Acc 0.7929 | Time(s) 0.0344
Epoch 00010 | Loss 1.1067 | Test Acc 0.6860 |Train Acc 0.8214 | Time(s) 0.0343
Epoch 00011 | Loss 1.0510 | Test Acc 0.7000 |Train Acc 0.8429 | Time(s) 0.0342
Epoch 00012 | Loss 0.9975 | Test Acc 0.7060 |Train Acc 0.8571

In [5]:

from pyvis.network import Network
import networkx as nx


g_render = Network('500px', '800px',notebook=True)

# filter edge_data 
def get_edges(g,labels , mask , with_mask = False):
     src,dst = g.all_edges()
     src,dst = src.detach().numpy(),dst.detach().numpy()
     #group_list = labels.detach().numpy()
  
     edge_data = zip(src,dst)

     if with_mask : 
       src =[]
       dst =[]
       filtred_nodes = g.nodes()[mask].detach().numpy()
       for edge in edge_data : 
         #print(edge)
         node_a,node_b = edge
         if (node_a in filtred_nodes) or (node_b in filtred_nodes):
           src.append(node_a)
           dst.append(node_b)
      
       edge_data = zip(np.array(src),np.array(dst))
 
     return edge_data#,group_list

def get_eval_label(model, g, features, labels, mask):
    model.eval()
    with th.no_grad():
        logits = model(g, features)
        logits = logits
        labels = labels
        _, indices = th.max(logits, dim=1)
        correct = indices == labels
        return correct
edge_data= get_edges(g,labels , mask = train_mask , with_mask = True)
group_list = get_eval_label(net, g, features, labels, test_mask).detach().numpy()
# populates the nodes and edges data structures
for e in edge_data:
    src = str(e[0])
    dst = str(e[1])
    label_src = str(group_list[e[0]])
    label_dst = str(group_list[e[1]])
   # print(label_src)
    g_render.add_node(src,  title=src, group=label_src)
    g_render.add_node(dst,  title=dst, group=label_dst)
    g_render.add_edge(src, dst,)

g_render.show_buttons(filter_=['physics'])
g_render.show('nx.html')
import IPython
IPython.display.HTML(filename='/content/nx.html')

In [6]:

## subgraph on edges 

### mask on edges  condition (or on dst ,src in node_mask)

def get_edges_mask(g,nodes_mask):
     src,dst = g.all_edges()
     src,dst = src.detach().numpy(),dst.detach().numpy()
     #group_list = labels.detach().numpy()
     filtred_nodes = g.nodes()[nodes_mask].detach().numpy()
     edge_data = zip(src,dst)
     mask = [(edge[0] in filtred_nodes) or (edge[1] in filtred_nodes) for edge in edge_data]
     return th.tensor(mask)
mask = test_mask
edges_mask = get_edges_mask(g,mask) 
### construct sub graph 
sub_g = dgl.edge_subgraph(g, edges_mask , preserve_nodes=True)
# to nx 
sub_g_nx = sub_g.to_networkx().to_undirected()
### add node attrs 
#sub_g_labels =  labels[mask].detach().numpy()
sub_g_labels = get_eval_label(net, g, features, labels, test_mask)[mask].detach().numpy()
sub_g_labels = [('green' if lab else 'red') for lab in sub_g_labels ]
filtred_nodes = g.nodes()[mask].detach().numpy()
zip_iterator = zip(filtred_nodes, sub_g_labels)
group_attr = dict(zip_iterator)
nx.set_node_attributes(sub_g_nx, group_attr, "color")

### delete isolated nodes 

sub_g_nx.remove_nodes_from(list(nx.isolates(sub_g_nx)))
###  relabel for pyviz 
int2label = { i : str(i) for i in sub_g_nx.nodes()  }
sub_g_nx =  nx.relabel_nodes(sub_g_nx, int2label)


# render
g_render = Network('500px', '800px',notebook=True)
g_render.from_nx(sub_g_nx)
g_render.show('t.html')
import IPython
IPython.display.HTML(filename='/content/t.html') 

In [7]:
# node embeding
import networkx as nx
from node2vec import Node2Vec

# Create a graph
graph = sub_g_nx

# Precompute probabilities and generate walks - **ON WINDOWS ONLY WORKS WITH workers=1**
node2vec = Node2Vec(graph, dimensions=64, walk_length=10, num_walks=50, workers=4)  # Use temp_folder for big graphs

# Embed nodes
model = node2vec.fit(window=3,) 

HBox(children=(FloatProgress(value=0.0, description='Computing transition probabilities', max=2190.0, style=Pr…




In [8]:
colors=[]
size = []
for node,attr in sub_g_nx.nodes(data=True) : 
  try :
    colors.append(attr['color'])
    size.append(10)
  except : 
    colors.append('black')
    size.append(5) 

In [9]:
import numpy as np
from sklearn.manifold import TSNE
X = model.wv.vectors
X_embedded = TSNE(n_components=2).fit_transform(X)

In [19]:

import pandas as pd
import plotly.graph_objects as go

dict_ = {'x': X_embedded[:,0], 'y': X_embedded[:,1], 'label': np.array(colors)} 
df = pd.DataFrame(dict_)
fig = go.Figure(data=go.Scatter(

    mode='markers',

    x=X_embedded[:,0], y=X_embedded[:,1],  marker=dict(color=colors,size=size)
))

fig.show()

In [38]:
pair1 ,  pair2=(60, 65) ,(-60, -55)

filter_x = df[(df['x']>=pair1[0]) & (df['x']<pair1[1])]
filter_xy = filter_x[(filter_x['y']>=pair2[0]) &(filter_x['y']<pair2[1])]
len(filter_xy)

0

In [45]:
import plotly.graph_objects as go
import numpy as np



xe = np.arange(-70,70, 5)
ye = np.arange(-70,80, 5)


## compute error matrix 
## get pairs from xe , ye 
## see algo in notes 
x_pairs = [(xe[i],xe[i+1])for i in range(len(xe)-1) ]
y_pairs = [(ye[i],ye[i+1])for i in range(len(ye)-1) ]
error_matrix = []
for pair_i in x_pairs : 
  error= []
  for pair_j in y_pairs : 
   # print(pair_i,pair_j) 
    
    pair1 = pair_i
    pair2 = pair_j
    filter_x = df[(df['x']>=pair1[0]) & (df['x']<pair1[1])]
    filter_xy = filter_x[(filter_x['y']>=pair2[0]) &(filter_x['y']<pair2[1])]
    if len(filter_xy)==0 : 
        error.append(0)
        #print(pair1,pair2)
        
    else : 

       try : 
           red = filter_xy['label'].value_counts()['red']
           error.append(red)
       except : 
           error.append(0)
        

    

   

  error_matrix.append(error)




fig = go.Figure(data=go.Heatmap(
          x = xe,
          y = ye,
          z = error_matrix,
          type = 'heatmap',
          colorscale = 'Reds'))







fig.add_trace(go.Scatter(

    mode='markers',

    x=X_embedded[:,0], y=X_embedded[:,1],  marker=dict(color=colors)
))


fig.show()

In [46]:
np.array(error_matrix).shape

(27, 29)

In [44]:
ye.shape

(30,)