# 1. Setup

In [112]:
import networkx as nx
import os
import numpy as np
import math
import torch

  from .autonotebook import tqdm as notebook_tqdm


# 2. Data Preprocessing  
Data Structure:
1. **gList** <Dict>: containing total 31 graphs, which 30 from Synthetic and 1 from youtube,using filename as key  
2. element of gList <Dict>: 'graph':nx.Graph();'score': <Dict> with 'node' and 'score'

In [76]:
# Input data
dpath = ".\\data\\"
gList = dict()

for root, dirs, files in os.walk(dpath):
    for file in files:
        file_path = os.path.join(root, file)
        if 'score' not in file:
            # Process nodes and edges
            gList[file] = dict()
            gList[file]['graph']=nx.Graph()
            with open(file_path,'r') as f:
                content = f.readlines()
                edges = []
                for line in content:
                    if 'com' not in file:
                        nodes = line[:-1].split('\t')
                    else:
                        continue # after finish all code run code with com
                        nodes = line[:-1].split(" ")
                    # Create edge tuple and append
                    edges.append((int(nodes[0]),int(nodes[1])))
                gList[file]['graph'].add_edges_from(edges)
                print("{} has {} nodes, {} edges".format(file,gList[file]['graph'].number_of_nodes(),gList[file]['graph'].number_of_edges()))
            
            # Process scores
            scorefile = file.replace(".txt","_score.txt")
            gList[file]['score'] = dict()
            score_file_path = os.path.join(root,scorefile) 
            with open(score_file_path,'r') as f:
                content = f.readlines()
                for line in content:
                    if 'com' not in file:
                        node_score = line[:-1].split('\t')
                    else:
                        continue # after finish all code run code with com
                        node_score = line[:-1].split(" ")
                    gList[file]['score'][int(node_score[0])] = float(node_score[1])

0.txt has 5000 nodes, 19982 edges
1.txt has 5000 nodes, 19981 edges
10.txt has 5000 nodes, 19980 edges
11.txt has 5000 nodes, 19983 edges
12.txt has 5000 nodes, 19983 edges
13.txt has 5000 nodes, 19984 edges
14.txt has 5000 nodes, 19982 edges
15.txt has 5000 nodes, 19984 edges
16.txt has 5000 nodes, 19982 edges
17.txt has 5000 nodes, 19981 edges
18.txt has 5000 nodes, 19984 edges
19.txt has 5000 nodes, 19981 edges
2.txt has 5000 nodes, 19980 edges
20.txt has 5000 nodes, 19983 edges
21.txt has 5000 nodes, 19982 edges
22.txt has 5000 nodes, 19982 edges
23.txt has 5000 nodes, 19981 edges
24.txt has 5000 nodes, 19984 edges
25.txt has 5000 nodes, 19982 edges
26.txt has 5000 nodes, 19984 edges
27.txt has 5000 nodes, 19983 edges
28.txt has 5000 nodes, 19982 edges
29.txt has 5000 nodes, 19983 edges
3.txt has 5000 nodes, 19982 edges
4.txt has 5000 nodes, 19984 edges
5.txt has 5000 nodes, 19981 edges
6.txt has 5000 nodes, 19984 edges
7.txt has 5000 nodes, 19983 edges
8.txt has 5000 nodes, 19983 

# 3. DrBC

In [108]:
# Prepare nodes initial feature X [dv,1,1]
def gen_nodes_feature(G):
    deg = np.array(list(dict(sorted(dict(g.degree()).items())).values()))
    X = np.ones((3,len(deg)))
    X[0,:]=deg
    return X.T

In [110]:
X=gen_nodes_feature(g)
norms = np.linalg.norm(X,axis = 1,keepdims=True)
print(norms.shape)
X_norm = X/norms
print(X_norm)

(5000, 1)
[[0.99998249 0.00418403 0.00418403]
 [0.99996844 0.0056178  0.0056178 ]
 [0.99995496 0.00671111 0.00671111]
 ...
 [0.94280904 0.23570226 0.23570226]
 [0.94280904 0.23570226 0.23570226]
 [0.94280904 0.23570226 0.23570226]]


## 3a. DrBC encoder function

In [114]:
torch.from_numpy(X_norm[0])

tensor([1.0000, 0.0042, 0.0042], dtype=torch.float64)

In [127]:
# Define GRU cell
def GRU(hv,hn):
    hv_t = torch.from_numpy(hv)
    hn_t = torch.from_numpy(hn)
    u = torch.sigmoid(hv_t+hn_t)
    r = torch.sigmoid(hv_t+hn_t)
    f = torch.tanh(torch.mul(hv_t,r)+hn_t)
    return torch.mul(u,f)+torch.mul((1-u),hv_t)

In [136]:
def encoder(G,L = 5):
    adj_mat = nx.to_numpy_matrix(g)
    deg = dict(G.degree())
    X = gen_nodes_feature(G)
    H = []
    norms1 = np.linalg.norm(X,axis = 1,keepdims=True)
    H.append(X/norms1)
    for l in range(1,L):
        H.append(H[-1])
        Hn = []
        for node in G.nodes():
            hn = 0
            degv = deg[node]
            for neigh in list(G.adj[node]):
                den = math.sqrt(degv+1)*math.sqrt(deg[neigh]+1)
                hn += H[l-1][neigh]/np.full(3,den)
                H[l][node] = GRU(H[l-1][node],hn)
        H[l] = H[l]/np.linalg.norm(H[l],axis = 1,keepdims=True)
    return np.max(H,axis = 0)

In [118]:
g = gList['0.txt']['graph']

In [119]:
nx.to_numpy_matrix(g)

matrix([[0., 1., 1., ..., 0., 0., 0.],
        [1., 0., 1., ..., 0., 0., 0.],
        [1., 1., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [120]:
deg=np.array(list(dict(sorted(dict(g.degree()).items())).values()))
len(deg)

5000

In [121]:
dict(g.degree())

{0: 239,
 4: 196,
 5: 220,
 8: 76,
 10: 102,
 12: 80,
 15: 46,
 16: 61,
 20: 88,
 22: 93,
 26: 79,
 30: 41,
 37: 41,
 39: 49,
 41: 31,
 42: 30,
 51: 52,
 54: 80,
 55: 25,
 59: 64,
 60: 29,
 62: 40,
 63: 30,
 69: 27,
 80: 7,
 84: 31,
 91: 56,
 96: 36,
 104: 23,
 105: 29,
 115: 30,
 118: 28,
 130: 6,
 132: 13,
 137: 18,
 143: 8,
 151: 36,
 155: 20,
 159: 23,
 172: 32,
 174: 12,
 191: 25,
 200: 14,
 216: 17,
 233: 20,
 239: 23,
 240: 12,
 252: 22,
 254: 15,
 262: 20,
 263: 7,
 265: 21,
 271: 34,
 273: 23,
 283: 21,
 296: 8,
 297: 8,
 307: 24,
 311: 20,
 331: 10,
 347: 20,
 370: 13,
 396: 24,
 399: 11,
 429: 27,
 440: 12,
 445: 6,
 462: 13,
 475: 21,
 479: 9,
 487: 14,
 501: 11,
 505: 16,
 556: 5,
 569: 8,
 573: 6,
 604: 13,
 608: 7,
 616: 11,
 664: 14,
 665: 12,
 675: 10,
 684: 10,
 696: 13,
 700: 11,
 705: 10,
 709: 22,
 717: 7,
 720: 10,
 784: 7,
 794: 9,
 832: 7,
 844: 9,
 846: 4,
 862: 10,
 871: 5,
 886: 10,
 889: 10,
 909: 8,
 925: 13,
 947: 10,
 976: 10,
 1008: 7,
 1055: 8,
 1075: 6

In [137]:
encoder(g)

array([[0.99998603, 0.99849511, 0.99849511],
       [0.99994552, 0.99720624, 0.99720624],
       [0.99989171, 0.99639426, 0.99639426],
       ...,
       [0.79089234, 0.67422084, 0.67422084],
       [0.70745371, 0.5524996 , 0.5524996 ],
       [0.69461085, 0.55201545, 0.55201545]])

In [135]:
import numpy as np

# Create a list of NumPy matrices
matrix_list = [np.array([[11, 12], [3, 4]]), np.array([[5, 6], [7, 8]]), np.array([[9, 10], [11, 12]])]

# Find the largest row for each row
largest_rows = np.max(matrix_list, axis=0)

# Print the largest rows
print(largest_rows)


[[11 12]
 [11 12]]
