## Sliding 2D

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import matplotlib.pyplot as plt
import scipy as sp
import pylops

from pympler import asizeof
from pylops.signalprocessing.sliding2d import sliding2d_design
from pylops.utils.describe import describe

from pylops.signalprocessing import Sliding2D
from sliding2dold import Sliding2D as Sliding2DOLD

USE_CUPY = False

In [2]:
if USE_CUPY:
    import cupy as np
    np_asarray = np.asarray
    np_asnumpy = np.asnumpy
    np_float = np.float32
    np_floatc = np.complex64
    mempool = np.get_default_memory_pool()
    fftengine = 'numpy'
else:
    np_asarray = np.asarray
    np_asnumpy = np.asarray
    np_float = np.float64
    np_floatc = np.complex128
    fftengine = 'scipy'

In [3]:
def bench_Op(Op, x):
    return Op @ x

def bench_OpH(Op, x):
    return Op.H @ x

In [10]:
savetaper = False
tapertype = 'cosine'

In [11]:
nwin = 26
nover = 3
nop = 64
#dimsd = (300, 200) # small
dimsd = (3000, 200) # large

y = np.random.normal(0, 1, dimsd[0]*dimsd[1]).reshape(dimsd).astype(np_float)

nwins, dims, _, _ = sliding2d_design(dimsd, nwin, nover, (nop, (nop + 2) // 2))

# no operator broadcast
Op = pylops.signalprocessing.FFT2D((nwin, dimsd[1]), nffts=(nop, nop), 
                                   real=True, dtype=np_floatc)
Slid = Sliding2DOLD(Op.H, dims, dimsd, nwin, nover, tapertype=tapertype)
Slid1a = Sliding2D(Op.H, dims, dimsd, nwin, nover, tapertype=tapertype, savetaper=savetaper)

# with operator broadcast
Op = pylops.signalprocessing.FFT2D((nwins, nwin, dimsd[1]), nffts=(nop, nop),   
                                   axes=(-2, -1), real=True)
Slid1b = Sliding2D(Op.H, dims, dimsd, nwin, nover, tapertype=tapertype, savetaper=savetaper)

x = Slid.H * y.ravel()

  322  345  368  391  414  437  460  483  506  529  552  575  598  621
  644  667  690  713  736  759  782  805  828  851  874  897  920  943
  966  989 1012 1035 1058 1081 1104 1127 1150 1173 1196 1219 1242 1265
 1288 1311 1334 1357 1380 1403 1426 1449 1472 1495 1518 1541 1564 1587
 1610 1633 1656 1679 1702 1725 1748 1771 1794 1817 1840 1863 1886 1909
 1932 1955 1978 2001 2024 2047 2070 2093 2116 2139 2162 2185 2208 2231
 2254 2277 2300 2323 2346 2369 2392 2415 2438 2461 2484 2507 2530 2553
 2576 2599 2622 2645 2668 2691 2714 2737 2760 2783 2806 2829 2852 2875
 2898 2921 2944 2967], end:[  26   49   72   95  118  141  164  187  210  233  256  279  302  325
  348  371  394  417  440  463  486  509  532  555  578  601  624  647
  670  693  716  739  762  785  808  831  854  877  900  923  946  969
  992 1015 1038 1061 1084 1107 1130 1153 1176 1199 1222 1245 1268 1291
 1314 1337 1360 1383 1406 1429 1452 1475 1498 1521 1544 1567 1590 1613
 1636 1659 1682 1705 1728 1751 1774 1797 1820 1843

In [12]:
print(np.allclose(Slid @ x, Slid1a @ x), np.allclose(Slid.H @ y, Slid1a.H @ y))
print(np.allclose(Slid @ x, Slid1b @ x), np.allclose(Slid.H @ y, Slid1b.H @ y))

True True
True True


In [7]:
if not USE_CUPY:
    %timeit -n 5 -r 50 Slid * x # OLD
    %timeit -n 5 -r 50 Slid1a * x # NEW
    %timeit -n 5 -r 50 Slid1b * x # NEW with Op broadcasted
else:
    print(benchmark(bench_Op, (Slid, x,), n_repeat=500))
    print(benchmark(bench_Op, (Slid1a, x,), n_repeat=500))
    print(benchmark(bench_Op, (Slid1b, x,), n_repeat=500))

122 ms ± 2.41 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
16.1 ms ± 1.33 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)
8.36 ms ± 1.03 ms per loop (mean ± std. dev. of 50 runs, 5 loops each)


In [8]:
if not USE_CUPY:
    %timeit -n 5 -r 50 Slid.H * y # OLD
    %timeit -n 5 -r 50 Slid1a.H * y # NEW
    %timeit -n 5 -r 50 Slid1b.H * y # NEW with Op broadcasted
else:
    print(benchmark(bench_OpH, (Slid, y,), n_repeat=500))
    print(benchmark(bench_OpH, (Slid1a, y,), n_repeat=500))
    print(benchmark(bench_OpH, (Slid1b, y,), n_repeat=500))

11.1 ms ± 335 µs per loop (mean ± std. dev. of 50 runs, 5 loops each)
8.26 ms ± 305 µs per loop (mean ± std. dev. of 50 runs, 5 loops each)
4.05 ms ± 163 µs per loop (mean ± std. dev. of 50 runs, 5 loops each)


In [9]:
print(asizeof.asizeof(Slid) * 1e-6, asizeof.asizeof(Slid1a) * 1e-6, asizeof.asizeof(Slid1b) * 1e-6) 

0.576624 5.418712 5.418736
