# Experimenting with our own Approach

## TODO:

- [x] import CORA dataset
- [ ] implement our approach: consider k-hop neighbors' input features and the current node's hidden state
- [ ] experiment a first model with GCN as the message passing scheme (later we'll experiment with GAT and GraphSAGE)
- [ ] track over-smoothing with **MAD** and **MADGap** over: 
    - [ ] K hops considered
    - [ ] epochs

#### Side notes

- Try to inverse the k-hop neighbors (start from remote and move down)
- Early stop for nodes that don't have k-hop neighbors (just pass the hidden state)

## Setup environment

- load packages from google drive (to install once)
- configure working directory (to download datasets)

```
# This is formatted as code
```



In [2]:
# setup colab environment
import os, sys
import os.path as osp
from google.colab import drive
drive.mount('/content/mnt')
nb_path = '/content/notebooks'
os.symlink('/content/mnt/My Drive/Colab Notebooks', nb_path)
sys.path.insert(0, nb_path)  # or append(nb_path)

Mounted at /content/mnt


In [3]:
!wget https://raw.githubusercontent.com/AchrafAsh/gnn-receptive-fields/main/data.py
!wget https://raw.githubusercontent.com/AchrafAsh/gnn-receptive-fields/main/utils.py

--2021-05-28 14:18:57--  https://raw.githubusercontent.com/AchrafAsh/gnn-receptive-fields/main/data.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.110.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1072 (1.0K) [text/plain]
Saving to: ‘data.py’


2021-05-28 14:18:57 (72.8 MB/s) - ‘data.py’ saved [1072/1072]

--2021-05-28 14:18:57--  https://raw.githubusercontent.com/AchrafAsh/gnn-receptive-fields/main/utils.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2333 (2.3K) [text/plain]
Saving to: ‘utils.py’


2021-05-28 14:18:58 (39.4 MB/s) - ‘utils.py’ saved [2333/2333]



In [5]:
# import utility functions
from data import load_dataset

In [7]:
path = osp.join(os.getcwd(), 'data')
cora_dataset = load_dataset(path, 'Cora')
G = cora_dataset[0] # only graph of the dataset

# MAD_cora = mean_average_distance(x=G.x)
# MADGap_cora = mean_average_distance_gap(x=G.x, adj_matrix=tg.utils.to_dense_adj(G.edge_index)[0])
# print(f'Initial MAD for Cora: {MAD_cora}')
# print(f'Initial MADGap for Cora: {MADGap_cora}')

## Understanding data structures

🎯 Filter edge_index to keep only the k-hop neighbors

In [12]:
# Cora has only one graph
G = cora_dataset[0]

In [18]:
# Shape [2, 10556]
G.edge_index.shape

torch.Size([2, 10556])

In [19]:
# Look how the edge_index is made
for i in range(G.edge_index.size(-1)):
    node_i, node_j = G.edge_index[:, i]
    print(f"{node_i} → {node_j}")

    if i == 2: break

0 → 633
0 → 1862
0 → 2582


In [31]:
import time
from torch_geometric.utils import to_dense_adj, dense_to_sparse

In [32]:
def filter_neighbors(edge_index, k=1):
    """Find k-hop neighbors recursevely

    Args:
        edge_index (tensor): sparse adjacency matrix composed of edge indices
        k (int): hop of neighbors to keep (e.g. k=1 is the regular adjacency matrix)
    
    Returns:
        tensor: sparse matrix containing edge indices of the k-hop neighbors
    """
    print(f"k: {k}")
    
    if k==1:
        return edge_index
    
    t_start = time.perf_counter()

    # Step 1: get the adjacency matrix
    A  = to_dense_adj(edge_index=edge_index)
    # Step 2: find k-step neighbors
    for i in range(A.size(-2)):
        for j in range(A.size(-1)):
            if j == i: continue
            if A[0, i, j] == 1:
                for l in range(A.size(-2)):
                    if l == i: continue
                    if A[0, j, l] == 1:
                        A[0, i, l] = 1
    
    t_end = time.perf_counter()
    print(f"Time: {t_end - t_start}")
    return filter_neighbors(dense_to_sparse(A), k-1)

In [33]:
filter_neighbors(G.edge_index, k=3)

k: 3


KeyboardInterrupt: ignored


## Our Approach

In [8]:
def xavier(tensor):
    """Initialize weight matrix with Xavier distribution

    Args:
        tensor (tensor): weigh matrix
    Return:
        tensor - weight matrix initialized
    """
    if tensor is not None:
        stdv = math.sqrt(6.0 / (tensor.size(-2) + tensor.size(-2)))
        tensor.data.uniform_(-stdv, stdv)

def zeros(tensor):
    """Initialize bias vector with all zeros

    Args:
        tensor (tensor): bias vector
    
    Return
        tensor - bias vector initialized with zeros
    """
    if tensor is not None:
        tensor.data.fill_(0)

In [None]:
from torch_geometric.nn import MessagePassing

class KHopGCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels, k):
        self(KHopNet, self).__init__(aggr='add')
        self.k = k
        self.lin = torch.nn.Linear(in_channels, out_channels)
        self.reset_parameters()
    
    def reset_parameters(self):
        xavier(self.lin.weight)
        zeros(self.lin.bias)
    
    def forward(self, x, h, edge_index):
        """
        Args:
            x (tensor): node input features, with shape [N, in_channels]
            h (tensor): node hidden states, with shape [N, in_channels]
            edge_index (tensor): graph edges, with shape [2, E]
            k (int): distance from the neighbors
        """
        # Step 1: filter edges to keep only k-hop neighbors
        edge_index = filter_neighbors(edge_index, num_nodes=x.size(0), k=self.k)

        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)
        h = self.lin(h)

        # Step 3: Compute normalization.
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4-5: Start propagating messages.
        return self.propagate(edge_index, x=x, h=h, norm=norm)
    
    def message(self, x_i, x_j):
        # x_j are input features of the neighbors

        # Step 4: Normalize node features.
        return nrom.view(-1, 1) * x_j
    
    def update(self, aggr_out, norm, h):
        # Step 5: add target node hidden state and return new node embeddings
        aggr_out += norm.view(-1, 1) * h
        return aggr_out

### The KHopNet (Our Approach - full model)

In [None]:
class KHopNet(torch.nn.Module):
    def __init__(self, dataset, K):
        super(KHopNet, self).__init__()
        self.conv1 = KHopNetConv(dataset.num_features, dataset.num_features, k=1)
        self.conv2 = KHopNetConv(dataset.num_features, dataset.num_features, k=2)
        self.conv3 = KHopNetConv(dataset.num_features, dataset.num_classes, k=3)
    
    def reset_parameters(self):
        self.conv1.reset_parameters()
        self.conv2.reset_parameters()
        self.conv3.reset_parameters()
    
    def forward(self, data, dropout=.3):
        x, edge_index = data.x, data.edge_index
        x = F.relu(self.conv1(x, edge_index))
        x = F.dropout(x, p=dropout, training=self.training)
        x = F.relu(self.conv2(x, edge_index))
        x = F.dropout(x, p=dropout, training=self.training)
        x = self.conv3(x, edge_index)
        return F.log_softmax(x, dim=1)

In [None]:
def train(model, optimizer, data):
    model.train()
    optimizer.zero_grad()
    out = model(data)
    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    loss.backward()
    optimizer.step()
    return loss

def evaluate(model, data):
    model.eval()

    with torch.no_grad():
        logits = model(data)
    
    outs = {}
    for key in ['train', 'val', 'test']:
        mask = data[f'{key}_mask']
        loss = F.nll_loss(logits[mask], data.y[mask]).item()
        pred = logits[mask].max(1)[1]
        acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()

        outs[f'{key}_loss'] = loss
        outs[f'{key}_acc'] = acc
    
    return outs

def run(dataset, model, runs, epochs, lr, weight_decay, early_stopping):
    val_losses, accs, durations = [], [], []
    for _ in range(runs):
        data = data.to(device)
        model.to(device).reset_parameters()
        optimizer = Adam(model.parameters(), lr=lr, weight_decay=weight_decay)

        if torch.cuda.is_available():
            torch.cuda.synchronize()
        
        t_start = time.perf_counter()

        best_val_loss = float('inf')
        test_acc = 0
        val_loss_history = []

        for epoch in range(1, epochs+1):
            train(model, optimizer, data)
            eval_info = evaluate(model, data)
            eval_info['epoch'] = epoch
        
            if eval_info['val_loss'] < best_val_loss:
                best_val_loss = eval_info['val_loss']
                test_acc = eval_info['test_acc']
            
            val_loss_history.append(eval_info['val_loss'])
            if early_stopping > 0 and epoch > epochs // 2:
                tmp = tensor(val_loss_history[-(early_stopping + 1):-1])
                if eval_info['val_loss'] > tmp.mean().item():
                    break

        if torch.cuda.is_available():
            torch.cuda.synchronize()

        t_end = time.perf_counter()

        val_losses.append(best_val_loss)
        accs.append(test_acc)
        durations.append(t_end - t_start)

    loss, acc, duration = tensor(val_losses), tensor(accs), tensor(durations)

    print('Val Loss: {:.4f}, Test Accuracy: {:.3f} ± {:.3f}, Duration: {:.3f}'.
          format(loss.mean().item(),
                 acc.mean().item(),
                 acc.std().item(),
                 duration.mean().item()))