# Testing usage of sparse matrices in graph NN model

In [1]:
# Training concurrency
import os
os.environ['OMP_NUM_THREADS'] = '4'
os.environ['CUDA_VISIBLE_DEVICES'] = '1'

cuda = False

In [2]:
# System imports
from __future__ import print_function
import os
import multiprocessing as mp
from timeit import default_timer as timer

# Externals
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sklearn.metrics
from sklearn.model_selection import train_test_split

# Torch imports
import torch
from torch.autograd import Variable
import torch.nn as nn

# Local imports
from graph import load_graphs, SparseGraph, feature_scale, graph_from_sparse
from model import SegmentClassifier
from estimator import Estimator

%matplotlib notebook

In [3]:
if cuda:
    np_to_torch = lambda x, volatile=False: (
        Variable(torch.from_numpy(x.astype(np.float32)), volatile=volatile).cuda())
else:
    np_to_torch = lambda x, volatile=False: (
        Variable(torch.from_numpy(x.astype(np.float32)), volatile=volatile))

torch_to_np = lambda x: x.cpu().data.numpy()

In [6]:
input_dir = '/global/cscratch1/sd/sfarrell/heptrkx/hit_graphs_mu10_003/data'

In [7]:
n_samples = 32

In [8]:
%%time

filenames = [os.path.join(input_dir, 'event%06i.npz' % i) for i in range(n_samples)]
graphs = load_graphs(filenames, SparseGraph)

CPU times: user 83.9 ms, sys: 27.2 ms, total: 111 ms
Wall time: 689 ms


In [10]:
class EdgeNetwork(nn.Module):
    """
    A module which computes weights for edges of the graph.
    For each edge, it selects the associated nodes' features
    and applies some fully-connected network layers with a final
    sigmoid activation.
    """
    def __init__(self, input_dim, hidden_dim=8, hidden_activation=nn.Tanh):
        super(EdgeNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim*2, hidden_dim),
            hidden_activation(),
            nn.Linear(hidden_dim, 1),
            nn.Sigmoid())
    def forward(self, X, Ri, Ro):
        # Select the features of the associated nodes
        bo = torch.bmm(Ro.transpose(1, 2), X)
        bi = torch.bmm(Ri.transpose(1, 2), X)
        B = torch.cat([bo, bi], dim=2)
        # Apply the network to each edge
        return self.network(B).squeeze(-1)

class NodeNetwork(nn.Module):
    """
    A module which computes new node features on the graph.
    For each node, it aggregates the neighbor node features
    (separately on the input and output side), and combines
    them with the node's previous features in a fully-connected
    network to compute the new features.
    """
    def __init__(self, input_dim, output_dim, hidden_activation=nn.Tanh):
        super(NodeNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim*3, output_dim),
            hidden_activation(),
            nn.Linear(output_dim, output_dim),
            hidden_activation())
    def forward(self, X, e, Ri, Ro):
        bo = torch.bmm(Ro.transpose(1, 2), X)
        bi = torch.bmm(Ri.transpose(1, 2), X)
        Rwo = Ro * e[:,None]
        Rwi = Ri * e[:,None]
        mi = torch.bmm(Rwi, bo)
        mo = torch.bmm(Rwo, bi)
        M = torch.cat([mi, mo, X], dim=2)
        return self.network(M)

class SegmentClassifier(nn.Module):
    """
    Segment classification graph neural network model.
    Consists of an input network, an edge network, and a node network.
    """
    def __init__(self, input_dim=2, hidden_dim=8, n_iters=3, hidden_activation=nn.Tanh):
        super(SegmentClassifier, self).__init__()
        self.n_iters = n_iters
        # Setup the input network
        self.input_network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            hidden_activation())
        # Setup the edge network
        self.edge_network = EdgeNetwork(input_dim+hidden_dim, hidden_dim, hidden_activation)
        # Setup the node layers
        self.node_network = NodeNetwork(input_dim+hidden_dim, hidden_dim, hidden_activation)

    def forward(self, inputs):
        """Apply forward pass of the model"""
        X, Ri, Ro = inputs
        # Apply input network to get hidden representation
        H = self.input_network(X)
        # Shortcut connect the inputs onto the hidden representation
        H = torch.cat([H, X], dim=-1)
        # Loop over iterations of edge and node networks
        for i in range(self.n_iters):
            # Apply edge network
            e = self.edge_network(H, Ri, Ro)
            # Apply node network
            H = self.node_network(H, e, Ri, Ro)
            # Shortcut connect the inputs onto the hidden representation
            H = torch.cat([H, X], dim=-1)
        # Apply final edge network
        return self.edge_network(H, Ri, Ro)

In [16]:
# Model config
hidden_dim = 8
n_iters = 1

# Construct the model
n_features = feature_scale.shape[0]
model = SegmentClassifier(input_dim=n_features, hidden_dim=hidden_dim, n_iters=n_iters)

model

SegmentClassifier(
  (input_network): Sequential(
    (0): Linear(in_features=3, out_features=8)
    (1): Tanh()
  )
  (edge_network): EdgeNetwork(
    (network): Sequential(
      (0): Linear(in_features=22, out_features=8)
      (1): Tanh()
      (2): Linear(in_features=8, out_features=1)
      (3): Sigmoid()
    )
  )
  (node_network): NodeNetwork(
    (network): Sequential(
      (0): Linear(in_features=33, out_features=8)
      (1): Tanh()
      (2): Linear(in_features=8, out_features=8)
      (3): Tanh()
    )
  )
)

In [15]:
model

SegmentClassifier(
  (input_network): Sequential(
    (0): Linear(in_features=3, out_features=8)
    (1): Tanh()
  )
  (edge_network): EdgeNetwork(
    (network): Sequential(
      (0): Linear(in_features=22, out_features=8)
      (1): Tanh()
      (2): Linear(in_features=8, out_features=1)
      (3): Sigmoid()
    )
  )
  (node_network): NodeNetwork(
    (network): Sequential(
      (0): Linear(in_features=33, out_features=8)
      (1): Tanh()
      (2): Linear(in_features=8, out_features=8)
      (3): Tanh()
    )
  )
)

## Prepare a single sample batch (w/o sparse rep)

In [19]:
g = graph_from_sparse(graphs[0])

In [21]:
g.X.shape

(75, 3)

In [27]:
batch_X = np_to_torch(g.X[None], volatile=True)
batch_Ri = np_to_torch(g.Ri[None], volatile=True)
batch_Ro = np_to_torch(g.Ro[None], volatile=True)
batch_y = np_to_torch(g.y[None], volatile=True)

In [28]:
batch_inputs = [batch_X, batch_Ri, batch_Ro]

### Test forward pass of model

In [30]:
batch_outputs = model(batch_inputs)

In [31]:
batch_outputs.size()

torch.Size([1, 64])

In [32]:
batch_y.size()

torch.Size([1, 64])

## Prepare batch with sparse rep

In [82]:
sg = graphs[0]
n_nodes = sg.X.shape[0]
n_edges = sg.y.shape[0]
n_nodes, n_edges

(75, 64)

In [107]:
Ri_idx = torch.LongTensor(np.stack([sg.Ri_rows, sg.Ri_cols]))
Ro_idx = torch.LongTensor(np.stack([sg.Ro_rows, sg.Ro_cols]))
R_val = torch.FloatTensor(np.ones((n_edges), np.uint8))
R_size = torch.Size((n_nodes, n_edges))
Ri_sparse = torch.sparse.FloatTensor(Ri_idx, R_val, R_size)
Ro_sparse = torch.sparse.FloatTensor(Ro_idx, R_val, R_size)

In [108]:
# Try a basic matrix multiplication now
X = np_to_torch(sg.X, volatile=True)

In [109]:
X.size()

torch.Size([75, 3])

In [110]:
Ri_sparse.size()

torch.Size([75, 64])

In [112]:
Ri = Variable(Ri_sparse, volatile=True)

In [113]:
torch.matmul(Variable(Ri.data.t(), volatile=True), X)

RuntimeError: Expected object of type Variable[torch.sparse.FloatTensor] but found type Variable[torch.FloatTensor] for argument #1 'mat2'

In [117]:
torch.matmul(Ri_sparse.t(), torch.from_numpy(sg.X))


 0.0711  0.9412 -0.0785
 0.0712  0.1191 -0.2477
 0.0728  0.4440  0.2846
 0.0724  0.0888  0.1267
 0.0711 -0.0567  0.2096
 0.0724 -0.1605 -0.0438
 0.0726 -0.1782 -0.1080
 0.0717 -0.5060  0.0992
 0.0718 -0.7842  0.1394
 0.0718  0.9885 -0.4870
 0.0724  0.2019 -0.1206
 0.0718  0.6120  0.2349
 0.1157  0.9437 -0.1137
 0.1165  0.1215 -0.3912
 0.1155  0.4480  0.4649
 0.1168  0.0867  0.2179
 0.1160 -0.0528  0.3564
 0.1158 -0.1634 -0.0567
 0.1167 -0.1809 -0.1599
 0.1153 -0.5088  0.1731
 0.1162 -0.7871  0.2393
 0.1166  0.2055 -0.1806
 0.1155  0.6152  0.3915
 0.1715  0.9467 -0.1579
 0.1722  0.0840  0.3320
 0.1718 -0.1672 -0.0733
 0.1718 -0.1672 -0.0733
 0.1712 -0.1842 -0.2242
 0.1714 -0.5124  0.2682
 0.1730 -0.7909  0.3674
 0.1724  0.2101 -0.2564
 0.2593  0.9514 -0.2300
 0.2591  0.0798  0.5150
 0.2588 -0.1730 -0.1030
 0.2570 -0.1894 -0.3210
 0.2588 -0.1730 -0.1030
 0.2570 -0.1894 -0.3210
 0.2624 -0.5182  0.4240
 0.2565 -0.7965  0.5510
 0.2606  0.2173 -0.3760
 0.3605  0.9569 -0.3090
 0.3564  0.0751

In [119]:
torch.mm(Variable(Ri_sparse.t()), Variable(torch.from_numpy(sg.X)))

RuntimeError: Expected object of type Variable[torch.sparse.FloatTensor] but found type Variable[torch.FloatTensor] for argument #1 'mat2'

## Simpler test

In [125]:
V = torch.autograd.Variable

In [123]:
i = torch.LongTensor([[0, 1, 1],
                      [2, 0, 2]])
v = torch.FloatTensor([3, 4, 5])
x1 = torch.sparse.FloatTensor(i, v, torch.Size([2,3]))

In [127]:
x2 = torch.randn(3, 2)

In [128]:
torch.mm(x1, x2)


-0.4517  5.4612
-9.3581  4.9100
[torch.FloatTensor of size 2x2]

In [129]:
torch.mm(V(x1), V(x2))

RuntimeError: Expected object of type Variable[torch.sparse.FloatTensor] but found type Variable[torch.FloatTensor] for argument #1 'mat2'

## Discussion

Transpose is broken on sparse Variables. I saw a pull request to fix it in pytorch but not yet sure if it's in a release. A bigger issue perhaps is that I can't do sparse-dense multiplication with Variables.

It seems this isn't yet supported..?
https://github.com/pytorch/pytorch/issues/2389