## Imports

In [1]:
import torch
import torch.nn as nn
import torch_sparse

In [3]:
!nvidia-smi

Tue Dec 19 15:15:27 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 515.86.01    Driver Version: 515.86.01    CUDA Version: 11.7     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA A100 80G...  On   | 00000000:61:00.0 Off |                    0 |
| N/A   38C    P0    54W / 300W |      0MiB / 81920MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

## Parameters

In [4]:
num_neurons = 100000
synapses_per_neuron = 10
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [23]:
# Create a dense vector
indices = []
for i in range(num_neurons):
    synapses = torch.randint(0, num_neurons, (synapses_per_neuron,))
    synapse_root = torch.ones_like(synapses) * i
    indices.append(torch.stack((synapses, synapse_root)))
indices = torch.cat(indices, dim=1).to(device)
values = torch.randn(num_neurons*synapses_per_neuron).to(device)

indices, values = torch_sparse.coalesce(indices, values, num_neurons, num_neurons)
        
coo_matrix = torch.sparse_coo_tensor(indices, values, (num_neurons, num_neurons)).coalesce().to(device)
csr_matrix = coo_matrix.to_sparse_csr().to(device)
# dense_matrix = coo_matrix.to_dense().to(device)
dense_vector = torch.randn(num_neurons).to(device)
dense_vector_as_matrix = dense_vector[:, None].to(device)

## On GPU

In [12]:
%timeit torch.mv(coo_matrix, dense_vector)

64.5 µs ± 1.08 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [13]:
%timeit torch.mv(csr_matrix, dense_vector)

27.7 µs ± 7.2 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [14]:
%timeit torch.sparse.mm(coo_matrix, dense_vector_as_matrix)

59.1 µs ± 183 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [15]:
%timeit torch.sparse.mm(csr_matrix, dense_vector_as_matrix)

33.7 µs ± 65.7 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [16]:
%timeit torch_sparse.spmm(indices, values, num_neurons, num_neurons, dense_vector_as_matrix)

58.1 µs ± 902 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [11]:
# %timeit torch.mv(dense_matrix, dense_vector)

## On CPU

In [17]:
coo_matrix = coo_matrix.to('cpu')
csr_matrix = csr_matrix.to('cpu')
# dense_matrix = dense_matrix.to('cpu')
dense_vector = dense_vector.to('cpu')
dense_vector_as_matrix = dense_vector_as_matrix.to('cpu')
indices = indices.to('cpu')
values = values.to('cpu')

In [18]:
%timeit torch.mv(coo_matrix, dense_vector)

17.1 ms ± 490 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
%timeit torch.mv(csr_matrix, dense_vector)

1.23 ms ± 18.6 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [20]:
%timeit torch.sparse.mm(coo_matrix, dense_vector_as_matrix)

18.3 ms ± 133 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [21]:
%timeit torch.sparse.mm(csr_matrix, dense_vector_as_matrix)

2.04 ms ± 110 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [22]:
%timeit torch_sparse.spmm(indices, values, num_neurons, num_neurons, dense_vector_as_matrix)

8 ms ± 111 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
# %timeit torch.mv(dense_matrix, dense_vector)