# Pylops - CUDA basic linear operators

### Author: M.Ravasi

In this notebook we will experiment with Pytorch to assess its usability as backend for CUDA enabled operators

In [1]:
!pip install pylops

Collecting pylops
[?25l  Downloading https://files.pythonhosted.org/packages/ba/5a/dc9d93cd0f9ba3ea9a77c30c92865f07523ebf2fc391dff19aeca2f2b848/pylops-1.4.0-py3-none-any.whl (141kB)
[K     |████████████████████████████████| 143kB 9.5MB/s 
Installing collected packages: pylops
Successfully installed pylops-1.4.0


In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
%pylab inline

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp

import pylops
from pylops.utils import dottest

import torch
import torch.nn as nn

Populating the interactive namespace from numpy and matplotlib


In [3]:
import torch

device = 'cpu'
if torch.cuda.device_count() > 0 and torch.cuda.is_available():
    print("Cuda installed! Running on GPU!")
    device = 'cuda'
else:
    print("No GPU available!")

Cuda installed! Running on GPU!


# Diagonal

In [51]:
nx = 100000
x = np.ones(nx)
Dop = pylops.Diagonal(np.arange(nx))
dottest(Dop, nx, nx, verb=True)

y  = Dop*x
y1 = Dop.H*x

Dot test passed, v^T(Opu)=-6482379.245740 - u^T(Op^Tv)=-6482379.245740


In [0]:
from pylops import LinearOperator

class Diagonal_cuda(LinearOperator):
    def __init__(self, diag, dtype='float64'):
        self.diag = diag
        self.dtype = np.dtype(dtype)
        self.explicit = False

    def _matvec(self, x):
        y = self.diag*x
        return y

    def _rmatvec(self, x):
        y = self.diag*x
        return y

In [53]:
x_cuda = torch.from_numpy(np.ones(nx, dtype=np.float32)).to(device)
diag_cuda = torch.from_numpy(np.arange(nx, dtype=np.float32)).to(device)
Dop_cuda = Diagonal_cuda(diag_cuda)
y_cuda = Dop_cuda._matvec(x_cuda)

print('x  = ',x_cuda[0:5])
print('D*x  = ',y[0:5])

x  =  tensor([1., 1., 1., 1., 1.], device='cuda:0')
D*x  =  [0. 1. 2. 3. 4.]


In [54]:
% timeit -n 10 Dop._matvec(x)
% timeit -n 10 Dop_cuda._matvec(x_cuda)

10 loops, best of 3: 276 µs per loop
10 loops, best of 3: 9.92 µs per loop
