# 1. Setup

In [1]:
import networkx as nx
import os
import numpy as np
import math
import torch
from torch import nn
import torch.optim as optim

  from .autonotebook import tqdm as notebook_tqdm


# 2. Data Preprocessing  
Data Structure:
1. **gList** <Dict>: containing total 31 graphs, which 30 from Synthetic and 1 from youtube,using filename as key  
2. element of gList <Dict>: 'graph':nx.Graph();'score': <Dict> with 'node' and 'score'

In [2]:
# Input data
dpath = ".\\data\\"
gList = dict()

for root, dirs, files in os.walk(dpath):
    for file in files:
        file_path = os.path.join(root, file)
        if 'score' not in file:
            # Process nodes and edges
            gList[file] = dict()
            gList[file]['graph']=nx.Graph()
            with open(file_path,'r') as f:
                content = f.readlines()
                edges = []
                for line in content:
                    if 'com' not in file:
                        nodes = line[:-1].split('\t')
                    else:
                        continue # after finish all code run code with com
                        nodes = line[:-1].split(" ")
                    # Create edge tuple and append
                    edges.append((int(nodes[0]),int(nodes[1])))
                gList[file]['graph'].add_edges_from(edges)
                print("{} has {} nodes, {} edges".format(file,gList[file]['graph'].number_of_nodes(),gList[file]['graph'].number_of_edges()))
            
            # Process scores
            scorefile = file.replace(".txt","_score.txt")
            gList[file]['score'] = dict()
            score_file_path = os.path.join(root,scorefile) 
            with open(score_file_path,'r') as f:
                content = f.readlines()
                for line in content:
                    if 'com' not in file:
                        node_score = line[:-1].split('\t')
                    else:
                        continue # after finish all code run code with com
                        node_score = line[:-1].split(" ")
                    gList[file]['score'][int(node_score[0])] = float(node_score[1])

0.txt has 5000 nodes, 19982 edges
1.txt has 5000 nodes, 19981 edges
10.txt has 5000 nodes, 19980 edges
11.txt has 5000 nodes, 19983 edges
12.txt has 5000 nodes, 19983 edges
13.txt has 5000 nodes, 19984 edges
14.txt has 5000 nodes, 19982 edges
15.txt has 5000 nodes, 19984 edges
16.txt has 5000 nodes, 19982 edges
17.txt has 5000 nodes, 19981 edges
18.txt has 5000 nodes, 19984 edges
19.txt has 5000 nodes, 19981 edges
2.txt has 5000 nodes, 19980 edges
20.txt has 5000 nodes, 19983 edges
21.txt has 5000 nodes, 19982 edges
22.txt has 5000 nodes, 19982 edges
23.txt has 5000 nodes, 19981 edges
24.txt has 5000 nodes, 19984 edges
25.txt has 5000 nodes, 19982 edges
26.txt has 5000 nodes, 19984 edges
27.txt has 5000 nodes, 19983 edges
28.txt has 5000 nodes, 19982 edges
29.txt has 5000 nodes, 19983 edges
3.txt has 5000 nodes, 19982 edges
4.txt has 5000 nodes, 19984 edges
5.txt has 5000 nodes, 19981 edges
6.txt has 5000 nodes, 19984 edges
7.txt has 5000 nodes, 19983 edges
8.txt has 5000 nodes, 19983 

# 3. DrBC

In [3]:
g = gList['0.txt']['graph']

In [4]:
# Prepare nodes initial feature X [dv,1,1]
def gen_nodes_feature(G):
    deg = np.array(list(dict(sorted(dict(g.degree()).items())).values()))
    X = np.ones((3,len(deg)))
    X[0,:]=deg
    return X.T

In [5]:
X=gen_nodes_feature(g)
norms = np.linalg.norm(X,axis = 1,keepdims=True)
print(norms.shape)
X_norm = X/norms
print(X_norm)

(5000, 1)
[[0.99998249 0.00418403 0.00418403]
 [0.99996844 0.0056178  0.0056178 ]
 [0.99995496 0.00671111 0.00671111]
 ...
 [0.94280904 0.23570226 0.23570226]
 [0.94280904 0.23570226 0.23570226]
 [0.94280904 0.23570226 0.23570226]]


## 3a. DrBC encoder function

In [25]:
class DrBCEncoder(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers,G):
        super(DrBCEncoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.layer1 = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.norm1 = nn.BatchNorm1d(hidden_size)
        self.gru_cell = nn.GRUCell(hidden_size, hidden_size,bias = False)
        self.norm2 = nn.BatchNorm1d(hidden_size)
        self.G = G
        self.deg = dict(self.G.degree())
        
    def forward(self, x):
        x = self.layer1(x)
        x = self.relu(x)
        x = self.norm1(x)
        output = [x]
        for i in range(self.num_layers-1):
            hn = self.calHn(x)
            x = self.gru_cell(x,hn)
            x = self.norm2(x)
            output.append(x)
        output, _ = torch.max(torch.stack(output), dim=0)
        return output

    def calHn(self,x):
        hn = torch.zeros(x.shape)
        for node in self.G.nodes():
            degv = self.deg[node]
            for neigh in list(self.G.adj[node]):
                denominator = 1/(math.sqrt(degv+1)*math.sqrt(self.deg[neigh]+1))
                hn[node,:] += (denominator*x[neigh])
        return hn
    
# Define the model
input_size = 3
hidden_size = 32
num_layers = 5
encoder = DrBCEncoder(input_size, hidden_size, num_layers,g)
out = encoder(torch.FloatTensor(X_norm))
print(out.shape)
print(out)

torch.Size([5000, 32])
tensor([[-0.5139,  2.0427,  2.5759,  ...,  2.6434,  2.5304,  3.3350],
        [-2.3558,  0.4919,  3.1532,  ...,  1.9962,  2.4635,  3.6587],
        [-0.3457,  0.6449,  2.0438,  ...,  1.4955,  2.5203,  2.9065],
        ...,
        [ 2.0724,  1.3120, -0.3375,  ...,  0.9927,  0.4154,  0.0000],
        [ 1.0374,  0.9425,  0.8812,  ...,  0.9927,  1.0440,  0.6344],
        [ 1.0374,  2.0443,  1.1548,  ...,  1.5469,  1.4470,  0.0000]],
       grad_fn=<MaxBackward0>)


In [None]:
# Define GRU cell
def GRU(hv,hn):
    hv_t = torch.from_numpy(hv)
    hn_t = torch.from_numpy(hn)
    u = torch.sigmoid(hv_t+hn_t)
    r = torch.sigmoid(hv_t+hn_t)
    f = torch.tanh(torch.mul(hv_t,r)+hn_t)
    return torch.mul(u,f)+torch.mul((1-u),hv_t)

In [None]:
def encoder(G,L = 5):
    adj_mat = nx.to_numpy_matrix(g)
    deg = dict(G.degree())
    X = gen_nodes_feature(G)
    H = []
    norms1 = np.linalg.norm(X,axis = 1,keepdims=True)
    H.append(X/norms1)
    for l in range(1,L):
        H.append(H[-1])
        Hn = []
        for node in G.nodes():
            hn = 0
            degv = deg[node]
            for neigh in list(G.adj[node]):
                den = math.sqrt(degv+1)*math.sqrt(deg[neigh]+1)
                hn += H[l-1][neigh]/np.full(3,den)
                H[l][node] = GRU(H[l-1][node],hn)
        H[l] = H[l]/np.linalg.norm(H[l],axis = 1,keepdims=True)
    return np.max(H,axis = 0)

In [None]:
encoder(g)

In [None]:
# Define the model
input_size = 3
hidden_size = 64
num_layers = 5
encoder = DrBCEncoder(input_size, hidden_size, num_layers,g)

In [None]:
out = encoder(torch.FloatTensor(X_norm))
print(out.shape)

In [20]:
# list of matrices
A = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
B = torch.tensor([[9, 8, 7], [6, 5, 4], [3, 2, 1]])
C = torch.tensor([[2, 4, 6], [8, 7, 9], [1, 3, 5]])

mat = torch.stack([A,B,C])

# find maximum rows of each matrix
print(mat)

# concatenate maximum rows to form output matrix
torch.max(mat, dim=0)


# print output matrix
#print(output_matrix)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]],

        [[9, 8, 7],
         [6, 5, 4],
         [3, 2, 1]],

        [[2, 4, 6],
         [8, 7, 9],
         [1, 3, 5]]])


torch.return_types.max(
values=tensor([[9, 8, 7],
        [8, 7, 9],
        [7, 8, 9]]),
indices=tensor([[1, 1, 1],
        [2, 2, 2],
        [0, 0, 0]]))

In [15]:
for matrix i

[tensor([[ 0.5302, -0.3618,  0.3227, -0.1702],
        [ 0.4083, -0.3624,  0.1434, -0.2685],
        [-0.1534, -1.2297, -0.8141,  0.9433]]), tensor([[-0.3487, -0.9476, -0.5586,  0.6799],
        [-1.2015, -1.3125, -0.9144, -1.3478],
        [ 0.8463,  1.2838, -0.6756,  0.5241]]), tensor([[-0.0757,  0.5446, -0.0743,  0.3097],
        [-0.3237,  0.0104,  1.4562,  0.3755],
        [-0.7474, -1.5096,  1.4763,  0.5483]])]


In [None]:
num_epochs = 5

# Define the loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(encoder.parameters(), lr=0.001)

# Train the model
for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        # Get the inputs
        inputs, labels = data

        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward + backward + optimize
        outputs = encoder(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # Print statistics
        running_loss += loss.item()
        if i % 100 == 99:    # Print every 100 mini-batches
            print('[%d, %5d] loss: %.3f' %
                  (epoch + 1, i + 1, running_loss / 100))
            running_loss = 0.0


## 3b. Decoder: 2-layer MLP