**WEIGHTED CONVOLUTION ON DYNAMIC GRAPHS**

In [1]:
import torch
import torch.nn as nn
from torch_geometric.nn import GCNConv

We are going to construct a convolutions on dynamic graphs. 
Input for this module is a sequence of dynamic graphs $\mathbb{G}_i = \{\mathcal{G}_i^1,...\mathcal{G}_i^T\}$, where graph $\mathcal{G}_i^t \in \mathbb{G}_i$ has a sequene of elements represented as $\{e_{i,j}^t \in \mathbb{R}^F, \forall v_{i,j} \in \mathcal{V}_i\}$. (F is the dimention of element representation equal to `in_features` and *i* is the considered household).

For each graph $\mathcal{G}_i$ the output of this modelue is a new sequence representation, which we will denote as  $\{c_{i,j}^t \in \mathbb{R}^{F'}, \forall v_{i,j} \in \mathcal{V}_i\}$. (F' is the new dimension equal to `out_features`).

To reduce the parameter scale and also make our method flexible to deal with sequences with variable lengths, a parameter sharing strategy is adopted. The weighted convolutions are implemented by propagating information of elements in each dynamic graphs as follows. For graph $\mathcal{G}_i$
$$c_{i,j}^{t,l+1} = \sigma\left( b^l + \sum_{k \in N_{i,j}^t \cup \{j\}}   A_i^t[j,k] \cdot \left( W^t c_{i,k}^{t,l} \right) \right),$$ where $A_i^t[j,k]$ represents the item in j-th row and k-th column of matrix $A_i^t$, which is the edge weight of $v_{i,j}$ and $v_{i,k}$ in graph $\mathcal{G}_i^t$.

We are going to override the `nn.Module` for constructing our convolutional layer.

**Convolutional layer**
For the convolutions, we're going to use the `GCNConv` layer from the PyG library. The convolutions are realized as follows:

$$\mathbf{X}^{\prime} = \mathbf{\hat{D}}^{-1/2} \mathbf{\hat{A}}
\mathbf{\hat{D}}^{-1/2} \mathbf{X} \mathbf{\Theta}$$, where $\mathbf{\hat{A}} = \mathbf{A + I}$ is the adjacency matrix of a graph with inserted self-loops, and $\mathbf{\hat{D}}$ is its diagonal degree matrix.

PyG makes the use of convolutions simple by simpy asking us to input the node feature tensor of shape `[num_of_nodes, num_of_features]` and its Sparse transposed adjecency matrix `adj_t`, which takes into account the weights in our graphs.

Here are some other terms needed to understand the following code:


`nn.ModuleList()` - Holds submodules in a list. <br>
`nn.ReLU()` - Applies the rectified linear unit function element-wise: ReLU(x) = max(0,x) <br>
`nn.BatchNorm1d` - Applies Batch Normalization over a 2D or 3D input. $y=\frac{x-E[x]}{\sqrt{var[x]+\epsilon}} \cdot \gamma + \beta$, The mean and standard-deviation are calculated per-dimension over the mini-batches and \gammaγ and \betaβ are learnable parameter vectors of size C (where C is the input size).

In [34]:
class weighted_GCN(nn.Module):
    def __init__(self, in_features, hidden_sizes, out_features):
        '''
        :param in_features: int, number of input features
        :param hidden_sizes: List[int], list of integers of hidden sizes
        :param out_features: int, number of output features
        '''
        super(weighted_GCN, self).__init__()
        # we are going to use 3 layers, first graph conv we wrote before, ReLu function and normalization
        gcns, relus, bns = nn.ModuleList(), nn.ModuleList(), nn.ModuleList()
        
        # layers for hidden_size
        input_size = in_features
        for hidden_size in hidden_sizes:
            # go through all the layers and call all three functions
            gcns.append(GCNConv(input_size, hidden_size)) 
            relus.append(nn.ReLU())
            bns.append(nn.BatchNorm1d(hidden_size))
            input_size = hidden_size # next layer start size will be output from one layer before
        
        # output layer
        gcns.append(GCNConv(hidden_sizes[-1], out_features))
        relus.append(nn.ReLU())
        bns.append(nn.BatchNorm1d(out_features))
        self.gcns, self.relus, self.bns = gcns, relus, bns

    def forward(self, x, adj_t):
        """
        :param graph: dgl.DGLGraph
        :param node_features: torch.Tensor shape (n_1+n_2+..., n_features)
               edges_weight: torch.Tensor shape (T, n_1^2+n_2^2+...)
        :return:
        """
        h = x
        # calculate
        i = 0 #this is here to check if how many iterations we're running. Comment out if not needed
        for gcn, relu, bn in zip(self.gcns, self.relus, self.bns):
            
            print(f'iteration {i}')
            #run the Convolutional layer
            h = gcn(h, adj_t)
            #run the batch norm
            h = bn(h.transpose(1, -1)).transpose(1, -1)
            #run the ReLu
            h = relu(h)
            i += 1
        return h

We are going to represents this embeddings as a matrix $C_{i,j} \in \mathbb{R}^{T \times F'}$, where each row $t$ represents $c_{i,j}^t$. <br>

`class stacked_weighted_GCN_blocks` will construct such matrices.

In [35]:

## TODO: This still isn't used.
class stacked_weighted_GCN_blocks(nn.ModuleList):
    def __init__(self, *args, **kwargs):
        super(stacked_weighted_GCN_blocks, self).__init__(*args, **kwargs)

    def forward(self, *input):
        nodes_feature, edge_weights = input
        h = nodes_feature
        for module in self:
            h = module(h, edge_weights)
        return h

In [31]:
import os
import os.path as osp
import networkx as nx
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.utils import erdos_renyi_graph, to_networkx, from_networkx
import torch_geometric.transforms as T
import torch_sparse
from torch_geometric.data import InMemoryDataset, download_url


#Here we specify our final network dimensions ##TODO: Make this prettier
embedding_dim = 2
hidden_dims = [256, 256]

data_list = []

#This is just a test -- we're only constructing the graphs for houshold id 22 as a proof of concept!
for filename in os.listdir("../data/Test-Graphs/content/Graphs/"):
    if filename[:3] != "22_": continue
    
    
    ## we construct a NX graph and cast it to pytorch.data.Data
    G = nx.Graph(nx.read_pajek(os.path.join("../data/Test-Graphs/content/Graphs/",filename)))
    data = from_networkx(G, group_node_attrs=None)
    
    ## Then, we override the data.x in data to get the desired format of the dimensions.
    ## Here, we're inputting two custom features -- the degree and pagerank centrality of nodes.
    ## In the long term, we're probably just going to go with a tensor of zeros here.
    x = torch.tensor([list(dict(G.degree()).values()),
list(dict(nx.algorithms.link_analysis.pagerank_alg.pagerank(G)).values())]).t()
    data.x = x
    data_list.append(data)

## We initialize a model
model = weighted_GCN(data_list[0].num_features, hidden_dims, embedding_dim)


## Here we look at how our data instance looks and try to run one instance through the model.
print(data_list[0])
o = model(data_list[0].x, torch_sparse.SparseTensor(row=data_list[0].edge_index[0], col=data_list[0].edge_index[1], value=data_list[0].weight).t())

Data(x=[109, 2], edge_index=[2, 841], y=[109], id=[109], shape=[109], weight=[841])
0
1
2


Processing...
Done!


Here is an attempt of me constructing a PyG dataset. It's not working as intended atm. 
## TODO.

In [32]:

class ShoppingDataset(Dataset):
    def __init__(self, root, transform=None, pre_transform=None):
        super().__init__(root, transform, pre_transform)

    @property
    def raw_file_names(self):
        return  os.listdir("../data/Test-Graphs/content/Graphs/")

    @property
    def processed_file_names(self):
        return 'not_implemented.pt'

    def download(self):
        # Download to `self.raw_dir`.
        pass
    
    def process(self):
        idx = 0
        for i, dat in enumerate(data_list):
            torch.save(data, os.path.join(self.processed_dir, f'data_{i}.pt'))
#         for raw_path in self.raw_paths:
#             # Read data from `raw_path`.
#             data = Data(...)

#             if self.pre_filter is not None and not self.pre_filter(data):
#                 continue

#             if self.pre_transform is not None:
#                 data = self.pre_transform(data)

#             torch.save(data, osp.join(self.processed_dir, f'data_{idx}.pt'))
#             idx += 1

    def len(self):
        return len(self.processed_file_names)

    def get(self, idx):
        data = torch.load(osp.join(self.processed_dir, f'data_{idx}.pt'))
        return data

ds = ShoppingDataset(root='../data/ShoppingDataset/')

ValueError: not enough values to unpack (expected 3, got 1)