## Patching 2D

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import pylops

from pylops.signalprocessing.patch2d import patch2d_design
from pylops.utils.describe import describe

from pylops.signalprocessing import Patch2D
from patch2dol import Patch2D as Patch2DOLD

USE_CUPY = False

In [2]:
if USE_CUPY:
    import cupy as np
    from cupyx.profiler import benchmark
    np_asarray = np.asarray
    np_asnumpy = np.asnumpy
    np_float = np.float32
    np_floatc = np.complex64
    mempool = np.get_default_memory_pool()
    fftengine = 'numpy'
else:
    np_asarray = np.asarray
    np_asnumpy = np.asarray
    np_float = np.float64
    np_floatc = np.complex128
    fftengine = 'scipy'

In [3]:
def bench_Op(Op, x):
    return Op @ x

def bench_OpH(Op, x):
    return Op.H @ x

In [4]:
savetaper = True
tapertype = 'cosine'

In [5]:
nwin = (42, 34)
nover = (10, 4)
nop = (64, 64)
#dimsd = (200, 200) # small
dimsd = (1000, 500) # large

y = np.random.normal(0, 1, dimsd[0]*dimsd[1]).reshape(dimsd).astype(np_float)

nwins, dims, mwin_inends, dwin_inends = patch2d_design(dimsd, nwin, nover, nop)

# no operator broadcast
Op = pylops.signalprocessing.FFT2D(nwin, nffts=nop, dtype=np_floatc)
Slid = Patch2DOLD(Op.H, dims, dimsd, nwin, nover, nop, tapertype=tapertype)
Slid1a = Patch2D(Op.H, dims, dimsd, nwin, nover, nop, tapertype=tapertype, savetaper=savetaper)

# with operator broadcast
Op = pylops.signalprocessing.FFT2D((*nwins, *nwin), nffts=nop, dtype=np_floatc)
Slid1b = Patch2D(Op.H, dims, dimsd, nwin, nover, nop, tapertype=tapertype, savetaper=savetaper)

x = Slid.H * y.ravel()

 576 608 640 672 704 736 768 800 832 864 896 928], end:[ 42  74 106 138 170 202 234 266 298 330 362 394 426 458 490 522 554 586
 618 650 682 714 746 778 810 842 874 906 938 970] / start:[  0  30  60  90 120 150 180 210 240 270 300 330 360 390 420 450], end:[ 34  64  94 124 154 184 214 244 274 304 334 364 394 424 454 484]
  896  960 1024 1088 1152 1216 1280 1344 1408 1472 1536 1600 1664 1728
 1792 1856], end:[  64  128  192  256  320  384  448  512  576  640  704  768  832  896
  960 1024 1088 1152 1216 1280 1344 1408 1472 1536 1600 1664 1728 1792
 1856 1920] / start:[  0  64 128 192 256 320 384 448 512 576 640 704 768 832 896 960], end:[  64  128  192  256  320  384  448  512  576  640  704  768  832  896
  960 1024]


In [6]:
print(np.allclose(Slid @ x, Slid1a @ x), np.allclose(Slid.H @ y, Slid1a.H @ y))
print(np.allclose(Slid @ x, Slid1b @ x), np.allclose(Slid.H @ y, Slid1b.H @ y))

True True
True True


In [7]:
if not USE_CUPY:
    %timeit -n 5 -r 50 Slid * x # OLD
    %timeit -n 5 -r 50 Slid1a * x # NEW
    %timeit -n 5 -r 50 Slid1b * x # NEW with Op broadcasted
else:
    print(benchmark(bench_Op, (Slid, x,), n_repeat=500))
    print(benchmark(bench_Op, (Slid1a, x,), n_repeat=500))
    print(benchmark(bench_Op, (Slid1b, x,), n_repeat=500))

92 ms ± 2.11 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
47.4 ms ± 3.35 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
31.2 ms ± 1.89 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)


In [8]:
if not USE_CUPY:
    %timeit -n 5 -r 50 Slid.H * y # OLD
    %timeit -n 5 -r 50 Slid1a.H * y # NEW
    %timeit -n 5 -r 50 Slid1b.H * y # NEW with Op broadcasted
else:
    print(benchmark(bench_OpH, (Slid, y,), n_repeat=500))
    print(benchmark(bench_OpH, (Slid1a, y,), n_repeat=500))
    print(benchmark(bench_OpH, (Slid1b, y,), n_repeat=500))

50.5 ms ± 2.17 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
37.5 ms ± 2 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
24.1 ms ± 1.03 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
