From 1df557e1c2bd87e35581b1029cd9571262ce4e28 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 9 May 2024 10:21:18 -0400 Subject: [PATCH 01/65] add simple basis benchmark and plotting script [skip ci] --- bbenchmark/bbenchmark.py | 57 ++++++++++++++++++++++++++++++ bbenchmark/benchmark_gpu0.pkl | Bin 0 -> 307 bytes bbenchmark/benchmark_host.pkl | Bin 0 -> 307 bytes bbenchmark/plot_bb.py | 64 ++++++++++++++++++++++++++++++++++ 4 files changed, 121 insertions(+) create mode 100644 bbenchmark/bbenchmark.py create mode 100644 bbenchmark/benchmark_gpu0.pkl create mode 100644 bbenchmark/benchmark_host.pkl create mode 100644 bbenchmark/plot_bb.py diff --git a/bbenchmark/bbenchmark.py b/bbenchmark/bbenchmark.py new file mode 100644 index 0000000000..6c6320c101 --- /dev/null +++ b/bbenchmark/bbenchmark.py @@ -0,0 +1,57 @@ +import os +import pickle +from pprint import pprint +from time import perf_counter, time + +import matplotlib.pyplot as plt +import numpy as np +from aspire.basis import FFBBasis2D, FLEBasis2D +from aspire.downloader import emdb_2660 +from aspire.noise import WhiteNoiseAdder +from aspire.source import ArrayImageSource, Simulation + +# Download and cache volume map +vol = emdb_2660().astype(np.float64) # doubles +cached_image_fn = "simulated_images.npy" + +if os.path.exists(cached_image_fn): + print(f"Loading cached image source from {cached_image_fn}.") + sim = ArrayImageSource(np.load(cached_image_fn)) +else: + print("Generating Simulated Datatset") + sim = Simulation( + n=512, C=1, vols=vol, noise_adder=WhiteNoiseAdder.from_snr(0.1) + ).cache() + print(f"Saving to {cached_image_fn}") + np.save(cached_image_fn, sim.images[:].asnumpy()) + + +TIMES = {} +for L in [32, 64, 128, 256]: + print(f"Begin L={L}") + src = sim.downsample(L) + imgs = src.images[:] + TIMES[L] = {} + for basis_type in [FFBBasis2D, FLEBasis2D]: + # Construct basis + TIMES[L][basis_type.__name__] = {} + basis = basis_type(L, dtype=src.dtype) + + # Time expanding into basis + tic = perf_counter() + coef = basis.evaluate_t(imgs) + toc = perf_counter() + TIMES[L][basis_type.__name__]["evaluate_t"] = toc - tic + + # Time expanding back into images + tic = perf_counter() + _ = coef.evaluate() + toc = perf_counter() + TIMES[L][basis_type.__name__]["evaluate"] = toc - tic + + +pprint(TIMES) + + +with open(f"benchmark_{int(time())}.pkl", "wb") as fh: + pickle.dump(TIMES, fh) diff --git a/bbenchmark/benchmark_gpu0.pkl b/bbenchmark/benchmark_gpu0.pkl new file mode 100644 index 0000000000000000000000000000000000000000..e702dd442dced27acb26fcc3b3e395ce9f465585 GIT binary patch literal 307 zcmZo*nX19a00y;FG`tmnL=Tsno0C&wab~fR%M>s_wJb5GG_fQ#zGRBK{fB=m-#lPo z=;45g0>v(0*=`CnqZFvs#}!Fy28+7`_dcgM4hDt{R(A)1g!$SUKxL)g4nT7=m_P)J zyZseA`*#jt74}bVm#$9$s>oo2$TFJo@? J0igC$Jpf$(W#0e* literal 0 HcmV?d00001 diff --git a/bbenchmark/benchmark_host.pkl b/bbenchmark/benchmark_host.pkl new file mode 100644 index 0000000000000000000000000000000000000000..dc0dd2a1769fc52c9470986e74eb64864f59e7fe GIT binary patch literal 307 zcmZo*nX19a00y;FG`tmnL=Tsno0C&wab~fR%M>s_wJb5GG_fQ#zGRBK{l>F_wm|hg z957L!*k;SlN}yONP^*tClGY3scLzS3waO9<3>mEM4m>kTmTLf&m3lh>&COr}5iIWZ zmm4fH4}ewJUv3m%3ovVBHPKy1(9iTm;ktG~fPnyM$W- zvToDPIg_qJbek#on}>jO`!X;hX?O6pRZ8-OC=s5uZo?jA?UmgRJ_s6sonHA^-k?wb IsJ&DV0PQbeIsgCw literal 0 HcmV?d00001 diff --git a/bbenchmark/plot_bb.py b/bbenchmark/plot_bb.py new file mode 100644 index 0000000000..05f5350f4b --- /dev/null +++ b/bbenchmark/plot_bb.py @@ -0,0 +1,64 @@ +import os +import pickle +from pprint import pprint + +import matplotlib.pyplot as plt +import numpy as np + +host_fn = "benchmark_host.pkl" +gpu_fn = "benchmark_gpu0.pkl" + + +with open(host_fn, "rb") as fh: + host_times = pickle.load(fh) + +with open(gpu_fn, "rb") as fh: + gpu_times = pickle.load(fh) + +markers = {"FFBBasis2D": "8", "FLEBasis2D": "s"} + +# Evaluate_t +Ls = list(host_times.keys()) +for basis_type in markers.keys(): + plt.plot( + Ls, + [host_times[L][basis_type]["evaluate_t"] for L in Ls], + marker=markers[basis_type], + color="blue", + label=basis_type + "-host", + ) + plt.plot( + Ls, + [gpu_times[L][basis_type]["evaluate_t"] for L in Ls], + marker=markers[basis_type], + color="green", + label=basis_type + "-gpu", + ) +plt.title("Basis `evaluate_t` Permformance - Batch of 512 Images") +plt.xlabel("Image Pixel L (LxL)") +plt.ylabel("Time (seconds)") +plt.legend() +plt.savefig("evaluate_t.png") +plt.show() + +for basis_type in markers.keys(): + plt.plot( + Ls, + [host_times[L][basis_type]["evaluate"] for L in Ls], + marker=markers[basis_type], + color="blue", + label=basis_type + "-host", + ) + plt.plot( + Ls, + [gpu_times[L][basis_type]["evaluate"] for L in Ls], + marker=markers[basis_type], + color="green", + label=basis_type + "-gpu", + ) +plt.title("Basis `evaluate` Permformance - Batch of 512 Images") +plt.xlabel("Image Pixel L (LxL)") +plt.ylabel("Time (seconds)") +plt.legend() +plt.savefig("evaluate.png") +plt.show() From 3a9e509aa43952403bf12c1f3e732fd2a4a1dcd2 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 4 Jun 2024 10:26:19 -0400 Subject: [PATCH 02/65] convert cufinufft towards cupy, keeping result on dvice --- pyproject.toml | 10 ++++----- src/aspire/nufft/cufinufft.py | 40 +++++++++++++++++------------------ 2 files changed, 25 insertions(+), 25 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3cd57981ef..fcc0f7cf4f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,11 +61,11 @@ dependencies = [ "Source" = "https://github.com/ComputationalCryoEM/ASPIRE-Python" [project.optional-dependencies] -gpu-102 = ["pycuda", "cupy-cuda102", "cufinufft==1.3"] -gpu-110 = ["pycuda", "cupy-cuda110", "cufinufft==1.3"] -gpu-111 = ["pycuda", "cupy-cuda111", "cufinufft==1.3"] -gpu-11x = ["pycuda", "cupy-cuda11x", "cufinufft==1.3"] -gpu-12x = ["pycuda", "cupy-cuda12x", "cufinufft==2.2.0"] +gpu-102 = ["cupy-cuda102", "cufinufft==1.3"] +gpu-110 = ["cupy-cuda110", "cufinufft==1.3"] +gpu-111 = ["cupy-cuda111", "cufinufft==1.3"] +gpu-11x = ["cupy-cuda11x", "cufinufft==1.3"] +gpu-12x = ["cupy-cuda12x", "cufinufft==2.2.0"] dev = [ "black", "bumpversion", diff --git a/src/aspire/nufft/cufinufft.py b/src/aspire/nufft/cufinufft.py index 465c0b23f9..2dceb08b80 100644 --- a/src/aspire/nufft/cufinufft.py +++ b/src/aspire/nufft/cufinufft.py @@ -1,9 +1,7 @@ import logging +import cupy as cp import numpy as np -import pycuda.autoinit # noqa: F401 -import pycuda.driver as cuda # noqa: F401 -import pycuda.gpuarray as gpuarray # noqa: F401 from cufinufft import Plan as cufPlan from aspire.nufft import Plan @@ -85,7 +83,7 @@ def __init__(self, sz, fourier_pts, epsilon=1e-8, ntransforms=1, **kwargs): # Note, I store self.fourier_pts_gpu so the GPUArrray life # is tied to instance, instead of this method. - self.fourier_pts_gpu = gpuarray.to_gpu(self.fourier_pts) + self.fourier_pts_gpu = cp.array(self.fourier_pts) self._transform_plan.setpts(*self.fourier_pts_gpu) self._adjoint_plan.setpts(*self.fourier_pts_gpu) @@ -99,7 +97,7 @@ def transform(self, signal): For a batch, signal should have shape `(*sz, ntransforms)`. :returns: Transformed signal of shape `num_pts` or - `(ntransforms, num_pts)`. + `(ntransforms, num_pts)` as CuPy array. """ # Check we're not forcing a dtype workaround for ASPIRE-Python/703, @@ -113,6 +111,8 @@ def transform(self, signal): " In the future this will be an error." ) + signal = cp.asarray(signal, dtype=self.complex_dtype) + sig_shape = signal.shape res_shape = self.num_pts # Note, there is a corner case for ntransforms == 1. @@ -134,17 +134,16 @@ def transform(self, signal): sig_shape == self.sz ), f"Signal frame to be transformed must have shape {self.sz}" - signal_gpu = gpuarray.to_gpu( - np.ascontiguousarray(signal, dtype=self.complex_dtype) - ) + result = cp.empty(res_shape, dtype=self.complex_dtype) - result_gpu = gpuarray.GPUArray(res_shape, dtype=self.complex_dtype) + if signal.dtype != self.complex_dtype: + signal = signal.astype(self.complex_dtype) - self._transform_plan.execute(signal_gpu, out=result_gpu) + self._transform_plan.execute(signal, out=result) - result = result_gpu.get() # ASPIRE-Python/703 - result = result.astype(complex_type(self._original_dtype), copy=False) + if result.dtype != complex_type(self._original_dtype): + result = result.astype(complex_type(self._original_dtype)) return result @@ -156,7 +155,7 @@ def adjoint(self, signal): this should be a a 1D array of len `num_pts`. For a batch, signal should have shape `(ntransforms, num_pts)`. - :returns: Transformed signal `(sz)` or `(sz, ntransforms)`. + :returns: Transformed signal `(sz)` or `(sz, ntransforms)` as CuPy array. """ # Check we're not forcing a dtype workaround for ASPIRE-Python/703, @@ -170,6 +169,8 @@ def adjoint(self, signal): " In the future this will be an error." ) + signal = cp.asarray(signal, dtype=self.complex_dtype) + res_shape = self.sz # Note, there is a corner case for ntransforms == 1. if self.ntransforms > 1 or (self.ntransforms == 1 and len(signal.shape) == 2): @@ -181,16 +182,15 @@ def adjoint(self, signal): ), "For multiple transforms, signal stack length should match ntransforms {self.ntransforms}." res_shape = (self.ntransforms, *self.sz) - signal_gpu = gpuarray.to_gpu( - np.ascontiguousarray(signal, dtype=self.complex_dtype) - ) + result = cp.empty(res_shape, dtype=self.complex_dtype) - result_gpu = gpuarray.GPUArray(res_shape, dtype=self.complex_dtype) + if signal.dtype != self.complex_dtype: + signal = signal.astype(self.complex_dtype) - self._adjoint_plan.execute(signal_gpu, out=result_gpu) + self._adjoint_plan.execute(signal, out=result) - result = result_gpu.get() # ASPIRE-Python/703 - result = result.astype(complex_type(self._original_dtype), copy=False) + if result.dtype != complex_type(self._original_dtype): + result = result.astype(complex_type(self._original_dtype)) return result From 4269f10ab3c88d2c10782e04187ea6260608c1b1 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 4 Jun 2024 10:49:11 -0400 Subject: [PATCH 03/65] convert anufft and nufft towards detecting whether to keep array on gpu [skip ci] --- src/aspire/nufft/__init__.py | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/src/aspire/nufft/__init__.py b/src/aspire/nufft/__init__.py index aa7c3a4adf..f748a10d84 100644 --- a/src/aspire/nufft/__init__.py +++ b/src/aspire/nufft/__init__.py @@ -2,6 +2,11 @@ import numpy as np +try: + import cupy as cp +except ModuleNotFoundError: + cp = None + from aspire import config from aspire.utils import LogFilterByCount, complex_type, real_type @@ -152,6 +157,9 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): Selects best available package from `nfft` `backends` configuration list. + When sig_f is provided as a CuPy gpu array with a cufinufft + backend, result is maintained on GPU. + :param sig_f: Array representing the signal(s) in Fourier space to be transformed. \ sig_f either matches length of fourier_pts or sig_f.shape is stack of (`ntransforms`, ...). :param fourier_pts: The points in Fourier space where the Fourier transform is to be calculated, @@ -162,6 +170,10 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): """ + on_gpu = False + if cp and isinstance(sig_f, cp.ndarray): + on_gpu = True + if fourier_pts.dtype != real_type(sig_f.dtype): raise RuntimeError( "anufft passed inconsistent dtypes." @@ -181,7 +193,13 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): sz=sz, fourier_pts=fourier_pts, ntransforms=ntransforms, epsilon=epsilon ) adjoint = plan.adjoint(sig_f) - return np.real(adjoint) if real else adjoint + + adjoint = adjoint.real if real else adjoint + + if not on_gpu: + adjoint = adjoint.get() + + return adjoint def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): @@ -191,6 +209,9 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): Selects best available package from `nfft` `backends` configuration list. + When sig_f is provided as a CuPy gpu array with a cufinufft + backend, result is maintained on GPU. + :param sig_f: Array representing the signal(s) in real space to be transformed. \ sig_f either matches `sz` or sig_f.shape is stack of (..., `ntransforms`). :param fourier_pts: The points in Fourier space where the Fourier transform is to be calculated, @@ -200,6 +221,10 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): """ + on_gpu = False + if cp and isinstance(sig_f, cp.ndarray): + on_gpu = True + if fourier_pts.dtype != real_type(sig_f.dtype): raise RuntimeError( "nufft passed inconsistent dtypes." @@ -229,4 +254,10 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): sz=sz, fourier_pts=fourier_pts, ntransforms=ntransforms, epsilon=epsilon ) transform = plan.transform(sig_f) - return np.real(transform) if real else transform + + transform = transform.real if real else transform + + if not on_gpu: + transform = transform.get() + + return transform From 99d60d1883975f5df944cf563cd277d7e47a5961 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 10:17:06 -0400 Subject: [PATCH 04/65] whitespace --- bbenchmark/bbenchmark.py | 1 + 1 file changed, 1 insertion(+) diff --git a/bbenchmark/bbenchmark.py b/bbenchmark/bbenchmark.py index 6c6320c101..01aac6e7eb 100644 --- a/bbenchmark/bbenchmark.py +++ b/bbenchmark/bbenchmark.py @@ -5,6 +5,7 @@ import matplotlib.pyplot as plt import numpy as np + from aspire.basis import FFBBasis2D, FLEBasis2D from aspire.downloader import emdb_2660 from aspire.noise import WhiteNoiseAdder From 967e3da6eae3b03c1d4d6fee308bf544e9157317 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 10:16:54 -0400 Subject: [PATCH 05/65] add sparse cupy gpu wrapper and tests for methods in use --- src/aspire/numeric/__init__.py | 19 ++++++++++++ tests/test_numeric_sparse.py | 54 ++++++++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+) create mode 100644 tests/test_numeric_sparse.py diff --git a/src/aspire/numeric/__init__.py b/src/aspire/numeric/__init__.py index d298f131e4..95283e5d87 100644 --- a/src/aspire/numeric/__init__.py +++ b/src/aspire/numeric/__init__.py @@ -35,3 +35,22 @@ def fft_object(which): fft = fft_object(config["common"]["fft"].as_str()) + + +# Configure `sparse` in tandem with `numeric` as the arrays generally will need to interoperate. +def sparse_object(which): + if which == "cupy": + from cupyx.scipy import sparse as SparseClass + + # CuPy imports don't work the same as scipy + from cupyx.scipy.sparse.linalg import eigsh + + SparseClass.linalg.eigsh = eigsh + elif which == "numpy": + from scipy import sparse as SparseClass + else: + raise RuntimeError(f"Invalid selection for sparse module: {which}") + return SparseClass + + +sparse = sparse_object(config["common"]["numeric"].as_str()) diff --git a/tests/test_numeric_sparse.py b/tests/test_numeric_sparse.py new file mode 100644 index 0000000000..3964419e21 --- /dev/null +++ b/tests/test_numeric_sparse.py @@ -0,0 +1,54 @@ +import numpy as np +import pytest + +from aspire.numeric import numeric_object, sparse_object + +# If cupy is not available, skip this entire test module +pytest.importorskip("cupy") + +NUMERICS = ["numpy", "cupy"] + + +@pytest.fixture(params=NUMERICS, ids=lambda x: f"{x}", scope="module") +def backends(request): + xp = numeric_object(request.param) + sparse = sparse_object(request.param) + return xp, sparse + + +def test_csr_matrix(backends): + """ + Create csr_matrix and multiply with an `xp` array. + """ + xp, sparse = backends + + m, n = 10, 10 + jdx = xp.arange(10) + idx = xp.arange(10) + vals = xp.random.random(10) + + # Compute dense matmul + _A = np.diag(xp.asnumpy(vals)) + _B = np.random.random((10, 20)) + _C = _A @ _B + + # Compute matmul using sparse csr + A = sparse.csr_matrix((vals, (jdx, idx)), shape=(m, n), dtype=np.float64) + B = xp.array(_B) + C = A @ B + + # Compare + np.testing.assert_allclose(_C, xp.asnumpy(C)) + + +def test_eigsh(backends): + """ + Invoke sparse eigsh call with `xp` arrays. + """ + xp, sparse = backends + + A = xp.eye(123) + + lamb, _ = sparse.linalg.eigsh(A) + np.testing.assert_allclose(xp.asnumpy(lamb), 1.0) + print(lamb) From f131f73cd45eb93e65f20a18089011e5bfb3a5b1 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 15:11:20 -0400 Subject: [PATCH 06/65] fixup mn --- tests/test_numeric_sparse.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_numeric_sparse.py b/tests/test_numeric_sparse.py index 3964419e21..45c28bef31 100644 --- a/tests/test_numeric_sparse.py +++ b/tests/test_numeric_sparse.py @@ -23,13 +23,13 @@ def test_csr_matrix(backends): xp, sparse = backends m, n = 10, 10 - jdx = xp.arange(10) - idx = xp.arange(10) + jdx = xp.arange(m) + idx = xp.arange(n) vals = xp.random.random(10) # Compute dense matmul _A = np.diag(xp.asnumpy(vals)) - _B = np.random.random((10, 20)) + _B = np.random.random((n, 20)) _C = _A @ _B # Compute matmul using sparse csr From 1afb05811e4d2df84fbb0c3add5cf1965f8bb174 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 10:46:35 -0400 Subject: [PATCH 07/65] first pass migrating FLE to cupy via xp --- src/aspire/basis/fle_2d.py | 37 ++++++++++++++++++-------------- src/aspire/basis/fle_2d_utils.py | 16 ++++++++------ src/aspire/config_default.yaml | 2 +- 3 files changed, 31 insertions(+), 24 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 423d37c093..ce6f04d3a2 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -1,7 +1,6 @@ import logging import numpy as np -import scipy.sparse as sparse from scipy.fft import dct, idct from scipy.special import jv @@ -13,7 +12,7 @@ transform_complex_to_real, ) from aspire.nufft import anufft, nufft -from aspire.numeric import fft +from aspire.numeric import fft, sparse, xp from aspire.operators import DiagMatrix from aspire.utils import complex_type, grid_2d @@ -440,7 +439,7 @@ def _create_basis_functions(self): """ Generate the actual basis functions as Python lambda operators """ - norm_constants = np.zeros(self.count) + norm_constants = xp.zeros(self.count) basis_functions = [None] * self.count for i in range(self.count): # parameters defining the basis function: bessel order and which bessel root @@ -537,13 +536,14 @@ def _step2_t(self, z): num_img = z.shape[0] # Compute FFT along angular nodes betas = fft.fft(z, axis=2) / self.num_angular_nodes + betas = xp.asarray(betas) # RM betas = betas[:, :, self.nus] - betas = np.conj(betas) - betas = np.swapaxes(betas, 0, 2) + betas = betas.conj() + betas = betas.swapaxes(0, 2) betas = betas.reshape(-1, self.num_radial_nodes * num_img) betas = self.c2r_nus @ betas betas = betas.reshape(-1, self.num_radial_nodes, num_img) - betas = np.real(np.swapaxes(betas, 0, 2)) + betas = betas.swapaxes(0, 2).real return betas def _step3_t(self, betas): @@ -554,18 +554,20 @@ def _step3_t(self, betas): """ num_img = betas.shape[0] if self.num_interp > self.num_radial_nodes: + betas = xp.asnumpy(betas) betas = dct(betas, axis=1, type=2) / (2 * self.num_radial_nodes) zeros = np.zeros(betas.shape) betas = np.concatenate((betas, zeros), axis=1) betas = idct(betas, axis=1, type=2) * 2 * betas.shape[1] - betas = np.moveaxis(betas, 0, -1) + betas = xp.asarray(betas) + betas = xp.moveaxis(betas, 0, -1) - coefs = np.zeros((self.count, num_img), dtype=np.float64) + coefs = xp.zeros((self.count, num_img), dtype=np.float64) for i in range(self.ell_p_max + 1): coefs[self.idx_list[i]] = self.A3[i] @ betas[:, i, :] coefs = coefs.T - return coefs * self.norm_constants / self.h + return xp.asnumpy(coefs * self.norm_constants / self.h) def _step3(self, coefs): """ @@ -574,19 +576,20 @@ def _step3(self, coefs): Uses barycenteric interpolation in reverse to compute values of Betas at Chebyshev nodes, given an array of FLE coefficients. """ - coefs = coefs.copy().reshape(-1, self.count) + coefs = xp.asarray(coefs.reshape(-1, self.count)) num_img = coefs.shape[0] coefs *= self.h * self.norm_constants coefs = coefs.T - out = np.zeros( + out = xp.zeros( (self.num_interp, 2 * self.max_ell + 1, num_img), dtype=np.float64, ) for i in range(self.ell_p_max + 1): out[:, i, :] = self.A3_T[i] @ coefs[self.idx_list[i]] - out = np.moveaxis(out, -1, 0) + out = xp.moveaxis(out, -1, 0) if self.num_interp > self.num_radial_nodes: + out = xp.asnumpy(out) # RM out = dct(out, axis=1, type=2) out = out[:, : self.num_radial_nodes, :] out = idct(out, axis=1, type=2) @@ -599,19 +602,21 @@ def _step2(self, betas): to images). Uses the IFFT to convert Beta values into Fourier-space images. """ + betas = xp.asarray(betas) num_img = betas.shape[0] - tmp = np.zeros( + tmp = xp.zeros( (num_img, self.num_radial_nodes, self.num_angular_nodes), dtype=np.complex128, ) - betas = np.swapaxes(betas, 0, 2) + betas = betas.swapaxes(0, 2) betas = betas.reshape(-1, self.num_radial_nodes * num_img) betas = self.r2c_nus @ betas betas = betas.reshape(-1, self.num_radial_nodes, num_img) - betas = np.swapaxes(betas, 0, 2) + betas = betas.swapaxes(0, 2) - tmp[:, :, self.nus] = np.conj(betas) + tmp[:, :, self.nus] = betas.conj() + tmp = xp.asnumpy(tmp) # rm z = fft.ifft(tmp, axis=2) return z diff --git a/src/aspire/basis/fle_2d_utils.py b/src/aspire/basis/fle_2d_utils.py index cde0cd11bf..23d1441a68 100644 --- a/src/aspire/basis/fle_2d_utils.py +++ b/src/aspire/basis/fle_2d_utils.py @@ -1,5 +1,6 @@ import numpy as np -import scipy.sparse as sparse + +from aspire.numeric import sparse, xp def transform_complex_to_real(B, ells): @@ -43,9 +44,9 @@ def precomp_transform_complex_to_real(ells): """ count = len(ells) num_nonzero = np.sum(ells == 0) + 2 * np.sum(ells != 0) - idx = np.zeros(num_nonzero, dtype=int) - jdx = np.zeros(num_nonzero, dtype=int) - vals = np.zeros(num_nonzero, dtype=np.complex128) + idx = xp.zeros(num_nonzero, dtype=int) + jdx = xp.zeros(num_nonzero, dtype=int) + vals = xp.zeros(num_nonzero, dtype=np.complex128) k = 0 for i in range(count): @@ -190,9 +191,10 @@ def barycentric_interp_sparse(target_points, known_points, numsparse): # note that const cancels in numerator and denominator vals = vals / denom.reshape(-1, 1) - vals = vals.flatten() - idx = idx.flatten() - jdx = jdx.flatten() + # TODO, migrate more of this method towards `xp` + vals = xp.array(vals.flatten()) + idx = xp.array(idx.flatten()) + jdx = xp.array(jdx.flatten()) # A is the linear operator mapping the function values from the fixed source # points to the fixed target points. # A(i,j) = \ell(x[i] ) w_j/(x[i] - xs[j]), with the notation in Eq. 3.3 diff --git a/src/aspire/config_default.yaml b/src/aspire/config_default.yaml index def78983c0..26176a97ac 100644 --- a/src/aspire/config_default.yaml +++ b/src/aspire/config_default.yaml @@ -1,7 +1,7 @@ version: 0.12.3 common: # numeric module to use - one of numpy/cupy - numeric: numpy + numeric: cupy # fft backend to use - one of pyfftw/scipy/cupy/mkl fft: scipy From 257d69f94d157a230647211fc50eba256ecd4d63 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 11:02:32 -0400 Subject: [PATCH 08/65] add dct/idct to pyfftw, scipy, cupy wrappers --- src/aspire/basis/fle_2d.py | 13 ++++++------- src/aspire/numeric/cupy_fft.py | 6 ++++++ src/aspire/numeric/pyfftw_fft.py | 6 ++++++ src/aspire/numeric/scipy_fft.py | 6 ++++++ 4 files changed, 24 insertions(+), 7 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index ce6f04d3a2..5675becf86 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -1,7 +1,6 @@ import logging import numpy as np -from scipy.fft import dct, idct from scipy.special import jv from aspire.basis import Coef, FBBasisMixin, SteerableBasis2D @@ -555,10 +554,10 @@ def _step3_t(self, betas): num_img = betas.shape[0] if self.num_interp > self.num_radial_nodes: betas = xp.asnumpy(betas) - betas = dct(betas, axis=1, type=2) / (2 * self.num_radial_nodes) + betas = fft.dct(betas, axis=1, type=2) / (2 * self.num_radial_nodes) zeros = np.zeros(betas.shape) betas = np.concatenate((betas, zeros), axis=1) - betas = idct(betas, axis=1, type=2) * 2 * betas.shape[1] + betas = fft.idct(betas, axis=1, type=2) * 2 * betas.shape[1] betas = xp.asarray(betas) betas = xp.moveaxis(betas, 0, -1) @@ -590,9 +589,9 @@ def _step3(self, coefs): out = xp.moveaxis(out, -1, 0) if self.num_interp > self.num_radial_nodes: out = xp.asnumpy(out) # RM - out = dct(out, axis=1, type=2) + out = fft.dct(out, axis=1, type=2) out = out[:, : self.num_radial_nodes, :] - out = idct(out, axis=1, type=2) + out = fft.idct(out, axis=1, type=2) return out @@ -736,10 +735,10 @@ def _radial_convolve_weights(self, b): b = np.squeeze(b) b = np.array(b) if self.num_interp > self.num_radial_nodes: - b = dct(b, axis=0, type=2) / (2 * self.num_radial_nodes) + b = fft.dct(b, axis=0, type=2) / (2 * self.num_radial_nodes) bz = np.zeros(b.shape) b = np.concatenate((b, bz), axis=0) - b = idct(b, axis=0, type=2) * 2 * b.shape[0] + b = fft.idct(b, axis=0, type=2) * 2 * b.shape[0] a = np.zeros(self.count, dtype=np.float64) y = [None] * (self.ell_p_max + 1) for i in range(self.ell_p_max + 1): diff --git a/src/aspire/numeric/cupy_fft.py b/src/aspire/numeric/cupy_fft.py index 4f45f92117..3327c78f7d 100644 --- a/src/aspire/numeric/cupy_fft.py +++ b/src/aspire/numeric/cupy_fft.py @@ -33,3 +33,9 @@ def fftshift(self, x, axes=None): def ifftshift(self, x, axes=None): return cp.fft.ifftshift(x, axes=axes) + + def dct(self, *args, **kwargs): + return cp.fft.dct(*args, **kwargs) + + def idct(self, *args, **kwargs): + return cp.fft.idct(*args, **kwargs) diff --git a/src/aspire/numeric/pyfftw_fft.py b/src/aspire/numeric/pyfftw_fft.py index 9cfdd45210..afcad98d28 100644 --- a/src/aspire/numeric/pyfftw_fft.py +++ b/src/aspire/numeric/pyfftw_fft.py @@ -159,3 +159,9 @@ def fftshift(self, a, axes=None): def ifftshift(self, a, axes=None): return scipy_fft.ifftshift(a, axes=axes) + + def dct(self, *args, **kwargs): + return scipy_fft.dct(*args, **kwargs) + + def idct(self, *args, **kwargs): + return scipy_fft.idct(*args, **kwargs) diff --git a/src/aspire/numeric/scipy_fft.py b/src/aspire/numeric/scipy_fft.py index c5a392f96b..d78e463803 100644 --- a/src/aspire/numeric/scipy_fft.py +++ b/src/aspire/numeric/scipy_fft.py @@ -33,3 +33,9 @@ def fftshift(self, x, axes=None): def ifftshift(self, x, axes=None): return sp.fft.ifftshift(x, axes=axes) + + def dct(self, *args, **kwargs): + return sp.fft.dct(*args, **kwargs) + + def idct(self, *args, **kwargs): + return sp.fft.idct(*args, **kwargs) From 64578372fe61448106eb4f06808606778dc0d04d Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 11:52:13 -0400 Subject: [PATCH 09/65] phase 2, fle internals --- src/aspire/basis/fle_2d.py | 15 ++++----------- src/aspire/config_default.yaml | 2 +- src/aspire/image/image.py | 7 +++++-- src/aspire/numeric/__init__.py | 15 +++++++++++++++ src/aspire/numeric/cupy_fft.py | 5 +++-- src/aspire/numeric/numpy.py | 7 ++++++- 6 files changed, 34 insertions(+), 17 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 5675becf86..6215baaca8 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -496,7 +496,7 @@ def _evaluate_t(self, imgs): coefficients. """ # See Section 3.5 - imgs = imgs.copy() + imgs = xp.array(imgs) # Copy here, mutating. imgs[:, self.radial_mask] = 0 z = self._step1_t(imgs) b = self._step2_t(z) @@ -513,7 +513,7 @@ def _step1_t(self, im): """ im = im.reshape(-1, self.nres, self.nres).astype(complex_type(self.dtype)) num_img = im.shape[0] - z = np.zeros( + z = xp.zeros( (num_img, self.num_radial_nodes, self.num_angular_nodes), dtype=complex_type(self.dtype), ) @@ -535,7 +535,6 @@ def _step2_t(self, z): num_img = z.shape[0] # Compute FFT along angular nodes betas = fft.fft(z, axis=2) / self.num_angular_nodes - betas = xp.asarray(betas) # RM betas = betas[:, :, self.nus] betas = betas.conj() betas = betas.swapaxes(0, 2) @@ -553,12 +552,9 @@ def _step3_t(self, betas): """ num_img = betas.shape[0] if self.num_interp > self.num_radial_nodes: - betas = xp.asnumpy(betas) betas = fft.dct(betas, axis=1, type=2) / (2 * self.num_radial_nodes) - zeros = np.zeros(betas.shape) - betas = np.concatenate((betas, zeros), axis=1) + betas = xp.concatenate((betas, xp.zeros(betas.shape)), axis=1) betas = fft.idct(betas, axis=1, type=2) * 2 * betas.shape[1] - betas = xp.asarray(betas) betas = xp.moveaxis(betas, 0, -1) coefs = xp.zeros((self.count, num_img), dtype=np.float64) @@ -588,7 +584,6 @@ def _step3(self, coefs): out[:, i, :] = self.A3_T[i] @ coefs[self.idx_list[i]] out = xp.moveaxis(out, -1, 0) if self.num_interp > self.num_radial_nodes: - out = xp.asnumpy(out) # RM out = fft.dct(out, axis=1, type=2) out = out[:, : self.num_radial_nodes, :] out = fft.idct(out, axis=1, type=2) @@ -601,7 +596,6 @@ def _step2(self, betas): to images). Uses the IFFT to convert Beta values into Fourier-space images. """ - betas = xp.asarray(betas) num_img = betas.shape[0] tmp = xp.zeros( (num_img, self.num_radial_nodes, self.num_angular_nodes), @@ -615,7 +609,6 @@ def _step2(self, betas): betas = betas.swapaxes(0, 2) tmp[:, :, self.nus] = betas.conj() - tmp = xp.asnumpy(tmp) # rm z = fft.ifft(tmp, axis=2) return z @@ -639,7 +632,7 @@ def _step1(self, z): im = im.reshape(num_img, self.nres, self.nres) im[:, self.radial_mask] = 0 - return im + return xp.asnumpy(im) def _create_dense_matrix(self): """ diff --git a/src/aspire/config_default.yaml b/src/aspire/config_default.yaml index 26176a97ac..fed4cea50a 100644 --- a/src/aspire/config_default.yaml +++ b/src/aspire/config_default.yaml @@ -3,7 +3,7 @@ common: # numeric module to use - one of numpy/cupy numeric: cupy # fft backend to use - one of pyfftw/scipy/cupy/mkl - fft: scipy + fft: cupy # Set cache directory for ASPIRE example data. # By default the cache location will be set by pooch.os_cache(), diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index fd160d1644..892b6accd0 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -357,11 +357,14 @@ def downsample(self, ds_res): im = self.stack_reshape(-1) # compute FT with centered 0-frequency - fx = fft.centered_fft2(im._data) + fx = xp.asnumpy(fft.centered_fft2(xp.asarray(im._data))) # crop 2D Fourier transform for each image crop_fx = np.array([crop_pad_2d(fx[i], ds_res) for i in range(self.n_images)]) # take back to real space, discard complex part, and scale - out = np.real(fft.centered_ifft2(crop_fx)) * (ds_res**2 / self.resolution**2) + out = fft.centered_ifft2(xp.asarray(crop_fx)).real * ( + ds_res**2 / self.resolution**2 + ) + out = xp.asnumpy(out) return self.__class__(out).stack_reshape(original_stack_shape) diff --git a/src/aspire/numeric/__init__.py b/src/aspire/numeric/__init__.py index 95283e5d87..be88775498 100644 --- a/src/aspire/numeric/__init__.py +++ b/src/aspire/numeric/__init__.py @@ -36,6 +36,21 @@ def fft_object(which): fft = fft_object(config["common"]["fft"].as_str()) +# Sanity check. +if (config["common"]["numeric"].as_str() == "cupy") and ( + config["common"]["fft"].as_str() != "cupy" +): + raise RuntimeError( + "Using `cupy` numeric backend without `cupy` fft is unsupported." + ) + +if (config["common"]["fft"].as_str() == "cupy") and ( + config["common"]["numeric"].as_str() != "cupy" +): + raise RuntimeError( + "Using `cupy` fft without `cupy` numeric backend is unsupported." + ) + # Configure `sparse` in tandem with `numeric` as the arrays generally will need to interoperate. def sparse_object(which): diff --git a/src/aspire/numeric/cupy_fft.py b/src/aspire/numeric/cupy_fft.py index 3327c78f7d..29939e504c 100644 --- a/src/aspire/numeric/cupy_fft.py +++ b/src/aspire/numeric/cupy_fft.py @@ -1,4 +1,5 @@ import cupy as cp +import cupyx.scipy.fft as cufft from aspire.numeric.base_fft import FFT @@ -35,7 +36,7 @@ def ifftshift(self, x, axes=None): return cp.fft.ifftshift(x, axes=axes) def dct(self, *args, **kwargs): - return cp.fft.dct(*args, **kwargs) + return cufft.dct(*args, **kwargs) def idct(self, *args, **kwargs): - return cp.fft.idct(*args, **kwargs) + return cufft.idct(*args, **kwargs) diff --git a/src/aspire/numeric/numpy.py b/src/aspire/numeric/numpy.py index 3237c2c3ad..9367409c78 100644 --- a/src/aspire/numeric/numpy.py +++ b/src/aspire/numeric/numpy.py @@ -1,8 +1,13 @@ +import cupy as cp import numpy as np class Numpy: - asnumpy = staticmethod(lambda x: x) + @staticmethod + def asnumpy(x): + if isinstance(x, cp.ndarray): + x = x.get() + return x def __getattr__(self, item): """ From eb5ffedde05b077d2c98554dde7c1e3951969238 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 12:20:05 -0400 Subject: [PATCH 10/65] mem cleanup workaround --- src/aspire/basis/fle_2d.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 6215baaca8..4278331e1a 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -1,3 +1,4 @@ +import gc import logging import numpy as np @@ -18,6 +19,19 @@ logger = logging.getLogger(__name__) +def _cleanup(): + """ + Utility for informing python+cupy to cleanup memory held by old vars. + """ + gc.collect() + try: + import cupy + + cupy.get_default_memory_pool().free_all_blocks() + except ModuleNotFoundError: + pass + + class FLEBasis2D(SteerableBasis2D, FBBasisMixin): """ Define a derived class for Fast Fourier Bessel 2D expansion using interpolation @@ -499,12 +513,20 @@ def _evaluate_t(self, imgs): imgs = xp.array(imgs) # Copy here, mutating. imgs[:, self.radial_mask] = 0 z = self._step1_t(imgs) + del imgs + _cleanup() + b = self._step2_t(z) + del z + _cleanup() + coefs = self._step3_t(b) + del b + _cleanup() # return in FB order coefs = coefs[..., self._fle_to_fb_indices] - return coefs.astype(self.coefficient_dtype, copy=False) + return xp.asnumpy(coefs.astype(self.coefficient_dtype)) def _step1_t(self, im): """ @@ -562,7 +584,7 @@ def _step3_t(self, betas): coefs[self.idx_list[i]] = self.A3[i] @ betas[:, i, :] coefs = coefs.T - return xp.asnumpy(coefs * self.norm_constants / self.h) + return coefs * self.norm_constants / self.h def _step3(self, coefs): """ From e24c1acfa5ce5e58b599c910a469827ee12c1ab4 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 14:54:57 -0400 Subject: [PATCH 11/65] cupy eigvals needs large problem or nans... --- tests/test_numeric_sparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_numeric_sparse.py b/tests/test_numeric_sparse.py index 45c28bef31..120a7de49f 100644 --- a/tests/test_numeric_sparse.py +++ b/tests/test_numeric_sparse.py @@ -47,7 +47,7 @@ def test_eigsh(backends): """ xp, sparse = backends - A = xp.eye(123) + A = xp.eye(1234) lamb, _ = sparse.linalg.eigsh(A) np.testing.assert_allclose(xp.asnumpy(lamb), 1.0) From d3f5993bb4bdbd9ed61930d8ad3c4e5e63ad9451 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 14:55:34 -0400 Subject: [PATCH 12/65] crop pad updates --- src/aspire/image/image.py | 6 +++--- src/aspire/utils/coor_trans.py | 14 +++++++++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index 892b6accd0..d04bfe4e25 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -357,11 +357,11 @@ def downsample(self, ds_res): im = self.stack_reshape(-1) # compute FT with centered 0-frequency - fx = xp.asnumpy(fft.centered_fft2(xp.asarray(im._data))) + fx = fft.centered_fft2(xp.asarray(im._data)) # crop 2D Fourier transform for each image - crop_fx = np.array([crop_pad_2d(fx[i], ds_res) for i in range(self.n_images)]) + crop_fx = crop_pad_2d(fx, ds_res) # take back to real space, discard complex part, and scale - out = fft.centered_ifft2(xp.asarray(crop_fx)).real * ( + out = fft.centered_ifft2(crop_fx).real * ( ds_res**2 / self.resolution**2 ) out = xp.asnumpy(out) diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index e909e2f394..844f218551 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -8,6 +8,7 @@ from numpy.linalg import norm from scipy.linalg import svd +from aspire.numeric import xp from aspire.utils.random import Random from aspire.utils.rotation import Rotation @@ -368,23 +369,26 @@ def rots_to_clmatrix(rots, n_theta): def crop_pad_2d(im, size, fill_value=0): """ - :param im: A 2-dimensional numpy array + :param im: A >=2-dimensional numpy array :param size: Integer size of cropped/padded output - :return: A numpy array of shape (size, size) + :return: A numpy array of shape (..., size, size) """ - im_y, im_x = im.shape + im_y, im_x = im.shape[-2:] # shift terms start_x = math.floor(im_x / 2) - math.floor(size / 2) start_y = math.floor(im_y / 2) - math.floor(size / 2) # cropping if size <= min(im_y, im_x): - return im[start_y : start_y + size, start_x : start_x + size] + return im[..., start_y : start_y + size, start_x : start_x + size] # padding elif size >= max(im_y, im_x): + # Determine shape + shape = list(im.shape[:-2]) + shape.extend([size,size]) # ensure that we return in the same dtype as the input - to_return = fill_value * np.ones((size, size), dtype=im.dtype) + to_return = xp.full(shape, fill_value, dtype=im.dtype) # when padding, start_x and start_y are negative since size is larger # than im_x and im_y; the below line calculates where the original image # is placed in relation to the (now-larger) box size From 1c5c17f96f219f457966f1a555860ef8bf40f26a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 5 Jun 2024 14:56:56 -0400 Subject: [PATCH 13/65] tox cleanup [skip ci] --- src/aspire/image/image.py | 4 +--- src/aspire/utils/coor_trans.py | 2 +- tests/test_numeric_sparse.py | 1 - 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index d04bfe4e25..a0a954f2d9 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -361,9 +361,7 @@ def downsample(self, ds_res): # crop 2D Fourier transform for each image crop_fx = crop_pad_2d(fx, ds_res) # take back to real space, discard complex part, and scale - out = fft.centered_ifft2(crop_fx).real * ( - ds_res**2 / self.resolution**2 - ) + out = fft.centered_ifft2(crop_fx).real * (ds_res**2 / self.resolution**2) out = xp.asnumpy(out) return self.__class__(out).stack_reshape(original_stack_shape) diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index 844f218551..dfb1c630f1 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -386,7 +386,7 @@ def crop_pad_2d(im, size, fill_value=0): elif size >= max(im_y, im_x): # Determine shape shape = list(im.shape[:-2]) - shape.extend([size,size]) + shape.extend([size, size]) # ensure that we return in the same dtype as the input to_return = xp.full(shape, fill_value, dtype=im.dtype) # when padding, start_x and start_y are negative since size is larger diff --git a/tests/test_numeric_sparse.py b/tests/test_numeric_sparse.py index 120a7de49f..288e41a176 100644 --- a/tests/test_numeric_sparse.py +++ b/tests/test_numeric_sparse.py @@ -51,4 +51,3 @@ def test_eigsh(backends): lamb, _ = sparse.linalg.eigsh(A) np.testing.assert_allclose(xp.asnumpy(lamb), 1.0) - print(lamb) From 02504556ccc212e65e606463e660ce89c632d812 Mon Sep 17 00:00:00 2001 From: "Joshua C. Carmichael" Date: Tue, 4 Jun 2024 16:06:28 -0400 Subject: [PATCH 14/65] evaluate_t on gpu. --- src/aspire/basis/ffb_2d.py | 37 +++++++++++++++++-------------------- src/aspire/image/image.py | 2 +- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 5a5c7c3f27..e9e6ab90a8 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -193,56 +193,53 @@ def _evaluate_t(self, x): n_images = x.shape[0] # resamping x in a polar Fourier gird using nonuniform discrete Fourier transform - pf = nufft(x, 2 * pi * freqs) - pf = np.reshape(pf, (n_images, n_r, n_theta)) + pf = nufft(xp.array(x), 2 * pi * freqs) + pf = pf.reshape(n_images, n_r, n_theta) # Recover "negative" frequencies from "positive" half plane. - pf = np.concatenate((pf, pf.conjugate()), axis=2) + pf = xp.concatenate((pf, pf.conjugate()), axis=2) # evaluate radial integral using the Gauss-Legendre quadrature rule - for i_r in range(0, n_r): - pf[:, i_r, :] = pf[:, i_r, :] * ( - self._precomp["gl_weights"][i_r] * self._precomp["gl_nodes"][i_r] - ) + pf = pf * (xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"]))[None, :, None] # 1D FFT on the angular dimension for each concentric circle - pf = 2 * pi / (2 * n_theta) * xp.asnumpy(fft.fft(xp.asarray(pf))) + pf = 2 * xp.pi / (2 * n_theta) * fft.fft(pf) # This only makes it easier to slice the array later. - v = np.zeros((n_images, self.count), dtype=x.dtype) + v = xp.zeros((n_images, self.count), dtype=x.dtype) # go through each basis function and find the corresponding coefficient ind = 0 - idx = ind + np.arange(self.k_max[0]) + idx = ind + xp.arange(self.k_max[0]) # include the normalization factor of angular part into radial part - radial_norm = self._precomp["radial"] / np.expand_dims(self.angular_norms, 1) + radial_norm = xp.array(self._precomp["radial"] / np.expand_dims(self.angular_norms, 1)) v[:, self._zero_angular_inds] = pf[:, :, 0].real @ radial_norm[idx].T - ind = ind + np.size(idx) + ind = ind + idx.size ind_pos = ind for ell in range(1, self.ell_max + 1): - idx = ind + np.arange(self.k_max[ell]) - idx_pos = ind_pos + np.arange(self.k_max[ell]) + idx = ind + xp.arange(self.k_max[ell]) + idx_pos = ind_pos + xp.arange(self.k_max[ell]) idx_neg = idx_pos + self.k_max[ell] v_ell = pf[:, :, ell] @ radial_norm[idx].T if np.mod(ell, 2) == 0: - v_pos = np.real(v_ell) - v_neg = -np.imag(v_ell) + v_pos = v_ell.real + v_neg = -v_ell.imag else: - v_pos = np.imag(v_ell) - v_neg = np.real(v_ell) + v_pos = v_ell.imag + v_neg = v_ell.real v[:, idx_pos] = v_pos v[:, idx_neg] = v_neg - ind = ind + np.size(idx) + ind = ind + idx.size ind_pos = ind_pos + 2 * self.k_max[ell] - return v + return xp.asnumpy(v) def filter_to_basis_mat(self, f, **kwargs): """ diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index a0a954f2d9..d0727eb71f 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -364,7 +364,7 @@ def downsample(self, ds_res): out = fft.centered_ifft2(crop_fx).real * (ds_res**2 / self.resolution**2) out = xp.asnumpy(out) - return self.__class__(out).stack_reshape(original_stack_shape) + return self.__class__(np.array(out.get())).stack_reshape(original_stack_shape) def filter(self, filter): """ From 51459ce059c45cd0153e504a6f1857906b8f5b85 Mon Sep 17 00:00:00 2001 From: "Joshua C. Carmichael" Date: Wed, 5 Jun 2024 16:06:21 -0400 Subject: [PATCH 15/65] Optimize ffb2d for gpu. --- src/aspire/basis/ffb_2d.py | 56 ++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index e9e6ab90a8..996e44f95a 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -105,6 +105,7 @@ def _evaluate(self, v): coordinate basis. This is Image instance with resolution of `self.sz` and the first dimension correspond to remaining dimension of `v`. """ + v = xp.array(v) sz_roll = v.shape[:-1] v = v.reshape(-1, self.count) @@ -112,27 +113,29 @@ def _evaluate(self, v): n_data = v.shape[0] # get information on polar grids from precomputed data - n_theta = np.size(self._precomp["freqs"], 2) - n_r = np.size(self._precomp["freqs"], 1) + n_theta = self._precomp["freqs"].shape[2] + n_r = self._precomp["freqs"].shape[1] # go through each basis function and find corresponding coefficient - pf = np.zeros((n_data, 2 * n_theta, n_r), dtype=complex_type(self.dtype)) + pf = xp.zeros((n_data, 2 * n_theta, n_r), dtype=complex_type(self.dtype)) ind = 0 - idx = ind + np.arange(self.k_max[0], dtype=int) + idx = ind + xp.arange(self.k_max[0], dtype=int) # include the normalization factor of angular part into radial part - radial_norm = self._precomp["radial"] / np.expand_dims(self.angular_norms, 1) - pf[:, 0, :] = v[:, self._zero_angular_inds] @ radial_norm[idx] - ind = ind + np.size(idx) + radial_norm = xp.array(self._precomp["radial"]) / xp.array( + np.expand_dims(self.angular_norms, 1) + ) + pf[:, 0, :] = v[:, xp.array(self._zero_angular_inds)] @ radial_norm[idx] + ind = ind + idx.size ind_pos = ind for ell in range(1, self.ell_max + 1): - idx = ind + np.arange(self.k_max[ell], dtype=int) - idx_pos = ind_pos + np.arange(self.k_max[ell], dtype=int) - idx_neg = idx_pos + self.k_max[ell] + idx = ind + xp.arange(self.k_max[ell], dtype=int) + idx_pos = ind_pos + xp.arange(self.k_max[ell], dtype=int) + idx_neg = idx_pos + xp.array(self.k_max[ell]) v_ell = (v[:, idx_pos] - 1j * v[:, idx_neg]) / 2.0 @@ -147,22 +150,19 @@ def _evaluate(self, v): else: pf[:, 2 * n_theta - ell, :] = -pf_ell.conjugate() - ind = ind + np.size(idx) - ind_pos = ind_pos + 2 * self.k_max[ell] + ind = ind + idx.size + ind_pos = ind_pos + 2 * xp.array(self.k_max[ell]) # 1D inverse FFT in the degree of polar angle - pf = 2 * pi * xp.asnumpy(fft.ifft(xp.asarray(pf), axis=1)) + pf = 2 * xp.pi * fft.ifft(xp.asarray(pf), axis=1) # Only need "positive" frequencies. - hsize = int(np.size(pf, 1) / 2) + hsize = int(pf.shape[1] / 2) pf = pf[:, 0:hsize, :] - - for i_r in range(0, n_r): - pf[..., i_r] = pf[..., i_r] * ( - self._precomp["gl_weights"][i_r] * self._precomp["gl_nodes"][i_r] - ) - - pf = np.reshape(pf, (n_data, n_r * n_theta)) + pf *= ( + xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"]) + )[None, None, :] + pf = pf.reshape(n_data, n_r * n_theta) # perform inverse non-uniformly FFT transform back to 2D coordinate basis freqs = m_reshape(self._precomp["freqs"], (2, n_r * n_theta)) @@ -172,7 +172,7 @@ def _evaluate(self, v): # Return X as Image instance with the last two dimensions as *self.sz x = x.reshape((*sz_roll, *self.sz)) - return x + return xp.asnumpy(x) def _evaluate_t(self, x): """ @@ -200,7 +200,13 @@ def _evaluate_t(self, x): pf = xp.concatenate((pf, pf.conjugate()), axis=2) # evaluate radial integral using the Gauss-Legendre quadrature rule - pf = pf * (xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"]))[None, :, None] + pf = ( + pf + * ( + xp.array(self._precomp["gl_weights"]) + * xp.array(self._precomp["gl_nodes"]) + )[None, :, None] + ) # 1D FFT on the angular dimension for each concentric circle pf = 2 * xp.pi / (2 * n_theta) * fft.fft(pf) @@ -213,7 +219,9 @@ def _evaluate_t(self, x): idx = ind + xp.arange(self.k_max[0]) # include the normalization factor of angular part into radial part - radial_norm = xp.array(self._precomp["radial"] / np.expand_dims(self.angular_norms, 1)) + radial_norm = xp.array( + self._precomp["radial"] / np.expand_dims(self.angular_norms, 1) + ) v[:, self._zero_angular_inds] = pf[:, :, 0].real @ radial_norm[idx].T ind = ind + idx.size From da85a5da84a87871a3ec3c19e07e821bc216a128 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Tue, 11 Jun 2024 11:01:51 -0400 Subject: [PATCH 16/65] downsample return --- src/aspire/image/image.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index d0727eb71f..a0a954f2d9 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -364,7 +364,7 @@ def downsample(self, ds_res): out = fft.centered_ifft2(crop_fx).real * (ds_res**2 / self.resolution**2) out = xp.asnumpy(out) - return self.__class__(np.array(out.get())).stack_reshape(original_stack_shape) + return self.__class__(out).stack_reshape(original_stack_shape) def filter(self, filter): """ From d9095611f80d8c0a9db0cf4f11aa7ebb29c2c41c Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Tue, 11 Jun 2024 11:43:52 -0400 Subject: [PATCH 17/65] remove unnecessary xp.array --- src/aspire/basis/ffb_2d.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 996e44f95a..4971b44f77 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -154,7 +154,7 @@ def _evaluate(self, v): ind_pos = ind_pos + 2 * xp.array(self.k_max[ell]) # 1D inverse FFT in the degree of polar angle - pf = 2 * xp.pi * fft.ifft(xp.asarray(pf), axis=1) + pf = 2 * xp.pi * fft.ifft(pf, axis=1) # Only need "positive" frequencies. hsize = int(pf.shape[1] / 2) From f1c296da759020b7e335e94b739fd5757c23c53b Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Tue, 11 Jun 2024 15:24:56 -0400 Subject: [PATCH 18/65] convert pf to complex --- src/aspire/basis/ffb_2d.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 4971b44f77..821800ee54 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -167,7 +167,9 @@ def _evaluate(self, v): # perform inverse non-uniformly FFT transform back to 2D coordinate basis freqs = m_reshape(self._precomp["freqs"], (2, n_r * n_theta)) - x = 2 * anufft(pf, 2 * pi * freqs, self.sz, real=True) + x = 2 * anufft( + pf.astype(complex_type(self.dtype)), 2 * pi * freqs, self.sz, real=True + ) # Return X as Image instance with the last two dimensions as *self.sz x = x.reshape((*sz_roll, *self.sz)) From 5d02abf7994daa552f69313407fb185dfa663f7d Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 13 Jun 2024 08:59:56 -0400 Subject: [PATCH 19/65] precompute radial_norm and gl_weighted_nodes in build. --- src/aspire/basis/ffb_2d.py | 38 +++++++++++++++++--------------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 821800ee54..891ab2600d 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -58,6 +58,16 @@ def _build(self): # precompute the basis functions in 2D grids self._precomp = self._precomp() + # include the normalization factor of angular part into radial part + self.radial_norm = xp.array(self._precomp["radial"]) / xp.array( + np.expand_dims(self.angular_norms, 1) + ) + + # precompute weighted nodes + self.gl_weighted_nodes = xp.array(self._precomp["gl_weights"]) * xp.array( + self._precomp["gl_nodes"] + ) + def _precomp(self): """ Precomute the basis functions on a polar Fourier grid @@ -123,11 +133,7 @@ def _evaluate(self, v): idx = ind + xp.arange(self.k_max[0], dtype=int) - # include the normalization factor of angular part into radial part - radial_norm = xp.array(self._precomp["radial"]) / xp.array( - np.expand_dims(self.angular_norms, 1) - ) - pf[:, 0, :] = v[:, xp.array(self._zero_angular_inds)] @ radial_norm[idx] + pf[:, 0, :] = v[:, xp.array(self._zero_angular_inds)] @ self.radial_norm[idx] ind = ind + idx.size ind_pos = ind @@ -142,7 +148,7 @@ def _evaluate(self, v): if np.mod(ell, 2) == 1: v_ell = 1j * v_ell - pf_ell = v_ell @ radial_norm[idx] + pf_ell = v_ell @ self.radial_norm[idx] pf[:, ell, :] = pf_ell if np.mod(ell, 2) == 0: @@ -159,9 +165,7 @@ def _evaluate(self, v): # Only need "positive" frequencies. hsize = int(pf.shape[1] / 2) pf = pf[:, 0:hsize, :] - pf *= ( - xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"]) - )[None, None, :] + pf *= self.gl_weighted_nodes[None, None, :] pf = pf.reshape(n_data, n_r * n_theta) # perform inverse non-uniformly FFT transform back to 2D coordinate basis @@ -202,13 +206,7 @@ def _evaluate_t(self, x): pf = xp.concatenate((pf, pf.conjugate()), axis=2) # evaluate radial integral using the Gauss-Legendre quadrature rule - pf = ( - pf - * ( - xp.array(self._precomp["gl_weights"]) - * xp.array(self._precomp["gl_nodes"]) - )[None, :, None] - ) + pf *= self.gl_weighted_nodes[None, :, None] # 1D FFT on the angular dimension for each concentric circle pf = 2 * xp.pi / (2 * n_theta) * fft.fft(pf) @@ -221,10 +219,8 @@ def _evaluate_t(self, x): idx = ind + xp.arange(self.k_max[0]) # include the normalization factor of angular part into radial part - radial_norm = xp.array( - self._precomp["radial"] / np.expand_dims(self.angular_norms, 1) - ) - v[:, self._zero_angular_inds] = pf[:, :, 0].real @ radial_norm[idx].T + + v[:, self._zero_angular_inds] = pf[:, :, 0].real @ self.radial_norm[idx].T ind = ind + idx.size ind_pos = ind @@ -233,7 +229,7 @@ def _evaluate_t(self, x): idx_pos = ind_pos + xp.arange(self.k_max[ell]) idx_neg = idx_pos + self.k_max[ell] - v_ell = pf[:, :, ell] @ radial_norm[idx].T + v_ell = pf[:, :, ell] @ self.radial_norm[idx].T if np.mod(ell, 2) == 0: v_pos = v_ell.real From 6a832a746a844b499a7722ca6030b7a87234cd1d Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 13 Jun 2024 09:07:46 -0400 Subject: [PATCH 20/65] remove comment --- src/aspire/basis/ffb_2d.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 891ab2600d..1c7ed8cad1 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -218,8 +218,6 @@ def _evaluate_t(self, x): ind = 0 idx = ind + xp.arange(self.k_max[0]) - # include the normalization factor of angular part into radial part - v[:, self._zero_angular_inds] = pf[:, :, 0].real @ self.radial_norm[idx].T ind = ind + idx.size From 1c27b06a8dae11aae8a22d107c9b6deab6d61b54 Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 13 Jun 2024 10:42:35 -0400 Subject: [PATCH 21/65] use asarray --- src/aspire/basis/ffb_2d.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 1c7ed8cad1..f44e959149 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -59,12 +59,12 @@ def _build(self): self._precomp = self._precomp() # include the normalization factor of angular part into radial part - self.radial_norm = xp.array(self._precomp["radial"]) / xp.array( + self.radial_norm = xp.asarray(self._precomp["radial"]) / xp.asarray( np.expand_dims(self.angular_norms, 1) ) # precompute weighted nodes - self.gl_weighted_nodes = xp.array(self._precomp["gl_weights"]) * xp.array( + self.gl_weighted_nodes = xp.asarray(self._precomp["gl_weights"]) * xp.asarray( self._precomp["gl_nodes"] ) @@ -115,7 +115,7 @@ def _evaluate(self, v): coordinate basis. This is Image instance with resolution of `self.sz` and the first dimension correspond to remaining dimension of `v`. """ - v = xp.array(v) + v = xp.asarray(v) sz_roll = v.shape[:-1] v = v.reshape(-1, self.count) @@ -133,7 +133,7 @@ def _evaluate(self, v): idx = ind + xp.arange(self.k_max[0], dtype=int) - pf[:, 0, :] = v[:, xp.array(self._zero_angular_inds)] @ self.radial_norm[idx] + pf[:, 0, :] = v[:, xp.asarray(self._zero_angular_inds)] @ self.radial_norm[idx] ind = ind + idx.size ind_pos = ind @@ -141,7 +141,7 @@ def _evaluate(self, v): for ell in range(1, self.ell_max + 1): idx = ind + xp.arange(self.k_max[ell], dtype=int) idx_pos = ind_pos + xp.arange(self.k_max[ell], dtype=int) - idx_neg = idx_pos + xp.array(self.k_max[ell]) + idx_neg = idx_pos + xp.asarray(self.k_max[ell]) v_ell = (v[:, idx_pos] - 1j * v[:, idx_neg]) / 2.0 @@ -157,7 +157,7 @@ def _evaluate(self, v): pf[:, 2 * n_theta - ell, :] = -pf_ell.conjugate() ind = ind + idx.size - ind_pos = ind_pos + 2 * xp.array(self.k_max[ell]) + ind_pos = ind_pos + 2 * xp.asarray(self.k_max[ell]) # 1D inverse FFT in the degree of polar angle pf = 2 * xp.pi * fft.ifft(pf, axis=1) @@ -199,7 +199,7 @@ def _evaluate_t(self, x): n_images = x.shape[0] # resamping x in a polar Fourier gird using nonuniform discrete Fourier transform - pf = nufft(xp.array(x), 2 * pi * freqs) + pf = nufft(xp.asarray(x), 2 * pi * freqs) pf = pf.reshape(n_images, n_r, n_theta) # Recover "negative" frequencies from "positive" half plane. From 69701a6e19372ea9f660838a433c97e55d77943f Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 13 Jun 2024 13:36:16 -0400 Subject: [PATCH 22/65] Remove cupy.fill culprit. un-cupy indices. --- src/aspire/basis/ffb_2d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index f44e959149..116e009757 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -141,7 +141,7 @@ def _evaluate(self, v): for ell in range(1, self.ell_max + 1): idx = ind + xp.arange(self.k_max[ell], dtype=int) idx_pos = ind_pos + xp.arange(self.k_max[ell], dtype=int) - idx_neg = idx_pos + xp.asarray(self.k_max[ell]) + idx_neg = idx_pos + self.k_max[ell] v_ell = (v[:, idx_pos] - 1j * v[:, idx_neg]) / 2.0 @@ -157,7 +157,7 @@ def _evaluate(self, v): pf[:, 2 * n_theta - ell, :] = -pf_ell.conjugate() ind = ind + idx.size - ind_pos = ind_pos + 2 * xp.asarray(self.k_max[ell]) + ind_pos = ind_pos + 2 * self.k_max[ell] # 1D inverse FFT in the degree of polar angle pf = 2 * xp.pi * fft.ifft(pf, axis=1) From 0302ab68e57c923cb5b43f7a437c034b9261d1fe Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 13 Jun 2024 14:10:48 -0400 Subject: [PATCH 23/65] cupy.fill culprit in fle_2d. sparse indices. --- src/aspire/basis/fle_2d_utils.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/aspire/basis/fle_2d_utils.py b/src/aspire/basis/fle_2d_utils.py index 23d1441a68..33f237165e 100644 --- a/src/aspire/basis/fle_2d_utils.py +++ b/src/aspire/basis/fle_2d_utils.py @@ -44,9 +44,9 @@ def precomp_transform_complex_to_real(ells): """ count = len(ells) num_nonzero = np.sum(ells == 0) + 2 * np.sum(ells != 0) - idx = xp.zeros(num_nonzero, dtype=int) - jdx = xp.zeros(num_nonzero, dtype=int) - vals = xp.zeros(num_nonzero, dtype=np.complex128) + idx = np.zeros(num_nonzero, dtype=int) + jdx = np.zeros(num_nonzero, dtype=int) + vals = np.zeros(num_nonzero, dtype=np.complex128) k = 0 for i in range(count): @@ -86,7 +86,11 @@ def precomp_transform_complex_to_real(ells): jdx[k] = i + 1 k = k + 1 - A = sparse.csr_matrix((vals, (idx, jdx)), shape=(count, count), dtype=np.complex128) + A = sparse.csr_matrix( + (xp.asarray(vals), (xp.asarray(idx), xp.asarray(jdx))), + shape=(count, count), + dtype=np.complex128, + ) return A.conjugate() From 4794493737107d4de22fad6cfbd1d729280ee21a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 13 Jun 2024 10:58:04 -0400 Subject: [PATCH 24/65] bare min vol hack --- src/aspire/volume/volume.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index b6c100db36..7883f59190 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -475,15 +475,16 @@ def downsample(self, ds_res, mask=None): v = self.stack_reshape(-1) # take 3D Fourier transform of each volume in the stack - fx = fft.fftshift(fft.fftn(v._data, axes=(1, 2, 3))) + fx = xp.asnumpy(fft.fftshift(fft.fftn(xp.asarray(v._data), axes=(1, 2, 3)))) # crop each volume to the desired resolution in frequency space crop_fx = ( np.array([crop_pad_3d(fx[i, :, :, :], ds_res) for i in range(self.n_vols)]) * mask ) # inverse Fourier transform of each volume - out = fft.ifftn(fft.ifftshift(crop_fx), axes=(1, 2, 3)) * ( - ds_res**3 / self.resolution**3 + out = xp.asnumpy( + fft.ifftn(fft.ifftshift(xp.asarray(crop_fx)), axes=(1, 2, 3)) + * (ds_res**3 / self.resolution**3) ) # returns a new Volume object return self.__class__( From e6ee1bd569d5e703ed16196fe5ad670e5837cae6 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 13 Jun 2024 11:33:04 -0400 Subject: [PATCH 25/65] bare min ffb3d hacks [skip ci] --- src/aspire/basis/ffb_3d.py | 52 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/src/aspire/basis/ffb_3d.py b/src/aspire/basis/ffb_3d.py index 6362a9a703..1ac5fd62ff 100644 --- a/src/aspire/basis/ffb_3d.py +++ b/src/aspire/basis/ffb_3d.py @@ -6,6 +6,7 @@ from aspire.basis import FBBasis3D from aspire.basis.basis_utils import lgwt, norm_assoc_legendre, sph_bessel from aspire.nufft import anufft, nufft +from aspire.numeric import xp from aspire.utils.matlab_compat import m_flatten, m_reshape logger = logging.getLogger(__name__) @@ -146,10 +147,10 @@ def _precomp(self): ) return { - "radial_wtd": radial_wtd, - "ang_phi_wtd_even": ang_phi_wtd_even, - "ang_phi_wtd_odd": ang_phi_wtd_odd, - "ang_theta_wtd": ang_theta_wtd, + "radial_wtd": xp.asarray(radial_wtd), + "ang_phi_wtd_even": [xp.asarray(x) for x in ang_phi_wtd_even], + "ang_phi_wtd_odd": [xp.asarray(x) for x in ang_phi_wtd_odd], + "ang_theta_wtd": xp.asarray(ang_theta_wtd), "fourier_pts": fourier_pts, } @@ -163,6 +164,7 @@ def _evaluate(self, v): coordinate basis. This is an array whose last three dimensions equal `self.sz` and the remaining dimensions correspond to `v`. """ + v = xp.asarray(v) # roll dimensions of v sz_roll = v.shape[:-1] v = v.reshape((-1, self.count)) @@ -175,7 +177,7 @@ def _evaluate(self, v): # number of 3D image samples n_data = v.shape[0] - u_even = np.zeros( + u_even = xp.zeros( ( n_r, int(2 * self.ell_max + 1), @@ -184,7 +186,7 @@ def _evaluate(self, v): ), dtype=v.dtype, ) - u_odd = np.zeros( + u_odd = xp.zeros( (n_r, int(2 * self.ell_max + 1), n_data, int(np.ceil(self.ell_max / 2))), dtype=v.dtype, ) @@ -216,10 +218,10 @@ def _evaluate(self, v): int((ell - 1) / 2), ] = v_ell - u_even = np.transpose(u_even, (3, 0, 1, 2)) - u_odd = np.transpose(u_odd, (3, 0, 1, 2)) - w_even = np.zeros((n_phi, n_r, n_data, 2 * self.ell_max + 1), dtype=v.dtype) - w_odd = np.zeros((n_phi, n_r, n_data, 2 * self.ell_max + 1), dtype=v.dtype) + u_even = xp.transpose(u_even, (3, 0, 1, 2)) + u_odd = xp.transpose(u_odd, (3, 0, 1, 2)) + w_even = xp.zeros((n_phi, n_r, n_data, 2 * self.ell_max + 1), dtype=v.dtype) + w_odd = xp.zeros((n_phi, n_r, n_data, 2 * self.ell_max + 1), dtype=v.dtype) # evaluate the phi parts for m in range(0, self.ell_max + 1): @@ -252,8 +254,8 @@ def _evaluate(self, v): w_even[:, :, :, self.ell_max + sgn * m] = w_m_even w_odd[:, :, :, self.ell_max + sgn * m] = w_m_odd - w_even = np.transpose(w_even, (3, 0, 1, 2)) - w_odd = np.transpose(w_odd, (3, 0, 1, 2)) + w_even = xp.transpose(w_even, (3, 0, 1, 2)) + w_odd = xp.transpose(w_odd, (3, 0, 1, 2)) u_even = w_even u_odd = w_odd @@ -266,7 +268,7 @@ def _evaluate(self, v): pf = w_even + 1j * w_odd pf = m_reshape(pf, (n_theta * n_phi * n_r, n_data)) - pf = np.moveaxis(pf, 0, -1) + pf = xp.moveaxis(pf, 0, -1) # perform inverse non-uniformly FFT transformation back to 3D rectangular coordinates freqs = m_reshape(self._precomp["fourier_pts"], (3, n_r * n_theta * n_phi)) @@ -275,7 +277,7 @@ def _evaluate(self, v): # Roll, return the x with the last three dimensions as self.sz # Higher dimensions should be like v. x = x.reshape((*sz_roll, *self.sz)) - return x + return xp.asnumpy(x) def _evaluate_t(self, x): """ @@ -288,6 +290,7 @@ def _evaluate_t(self, x): `self.count` and whose remaining dimensions correspond to higher dimensions of `x`. """ + x = xp.asarray(x) # roll dimensions sz_roll = x.shape[:-3] x = x.reshape((-1, *self.sz)) @@ -303,20 +306,21 @@ def _evaluate_t(self, x): pf = m_reshape(pf.T, (n_theta, n_phi * n_r * n_data)) # evaluate the theta parts - u_even = self._precomp["ang_theta_wtd"].T @ np.real(pf) - u_odd = self._precomp["ang_theta_wtd"].T @ np.imag(pf) + tmp = self._precomp["ang_theta_wtd"].T + u_even = tmp @ xp.real(pf) + u_odd = tmp @ xp.imag(pf) u_even = m_reshape(u_even, (2 * self.ell_max + 1, n_phi, n_r, n_data)) u_odd = m_reshape(u_odd, (2 * self.ell_max + 1, n_phi, n_r, n_data)) - u_even = np.transpose(u_even, (1, 2, 3, 0)) - u_odd = np.transpose(u_odd, (1, 2, 3, 0)) + u_even = xp.transpose(u_even, (1, 2, 3, 0)) + u_odd = xp.transpose(u_odd, (1, 2, 3, 0)) - w_even = np.zeros( + w_even = xp.zeros( (int(np.floor(self.ell_max / 2) + 1), n_r, 2 * self.ell_max + 1, n_data), dtype=x.dtype, ) - w_odd = np.zeros( + w_odd = xp.zeros( (int(np.ceil(self.ell_max / 2)), n_r, 2 * self.ell_max + 1, n_data), dtype=x.dtype, ) @@ -351,11 +355,11 @@ def _evaluate_t(self, x): end = np.size(w_odd, 0) w_odd[end - n_odd_ell : end, :, self.ell_max + sgn * m, :] = w_m_odd - w_even = np.transpose(w_even, (1, 2, 3, 0)) - w_odd = np.transpose(w_odd, (1, 2, 3, 0)) + w_even = xp.transpose(w_even, (1, 2, 3, 0)) + w_odd = xp.transpose(w_odd, (1, 2, 3, 0)) # evaluate the radial parts - v = np.zeros((n_data, self.count), dtype=x.dtype) + v = xp.zeros((n_data, self.count), dtype=x.dtype) for ell in range(0, self.ell_max + 1): k_max_ell = self.k_max[ell] radial_wtd = self._precomp["radial_wtd"][:, 0:k_max_ell, ell] @@ -388,4 +392,4 @@ def _evaluate_t(self, x): # Roll dimensions, last dimension should be self.count, # Higher dimensions like x. v = v.reshape((*sz_roll, self.count)) - return v + return xp.asnumpy(v) From c852070792181fa56b28674702a00e29c2af51cf Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 13 Jun 2024 14:58:00 -0400 Subject: [PATCH 26/65] better style --- src/aspire/basis/ffb_3d.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/aspire/basis/ffb_3d.py b/src/aspire/basis/ffb_3d.py index 1ac5fd62ff..5740900a34 100644 --- a/src/aspire/basis/ffb_3d.py +++ b/src/aspire/basis/ffb_3d.py @@ -218,8 +218,8 @@ def _evaluate(self, v): int((ell - 1) / 2), ] = v_ell - u_even = xp.transpose(u_even, (3, 0, 1, 2)) - u_odd = xp.transpose(u_odd, (3, 0, 1, 2)) + u_even = u_even.transpose((3, 0, 1, 2)) + u_odd = u_odd.transpose((3, 0, 1, 2)) w_even = xp.zeros((n_phi, n_r, n_data, 2 * self.ell_max + 1), dtype=v.dtype) w_odd = xp.zeros((n_phi, n_r, n_data, 2 * self.ell_max + 1), dtype=v.dtype) @@ -254,8 +254,8 @@ def _evaluate(self, v): w_even[:, :, :, self.ell_max + sgn * m] = w_m_even w_odd[:, :, :, self.ell_max + sgn * m] = w_m_odd - w_even = xp.transpose(w_even, (3, 0, 1, 2)) - w_odd = xp.transpose(w_odd, (3, 0, 1, 2)) + w_even = w_even.transpose((3, 0, 1, 2)) + w_odd = w_odd.transpose((3, 0, 1, 2)) u_even = w_even u_odd = w_odd @@ -307,14 +307,14 @@ def _evaluate_t(self, x): # evaluate the theta parts tmp = self._precomp["ang_theta_wtd"].T - u_even = tmp @ xp.real(pf) - u_odd = tmp @ xp.imag(pf) + u_even = tmp @ pf.real + u_odd = tmp @ pf.imag u_even = m_reshape(u_even, (2 * self.ell_max + 1, n_phi, n_r, n_data)) u_odd = m_reshape(u_odd, (2 * self.ell_max + 1, n_phi, n_r, n_data)) - u_even = xp.transpose(u_even, (1, 2, 3, 0)) - u_odd = xp.transpose(u_odd, (1, 2, 3, 0)) + u_even = u_even.transpose((1, 2, 3, 0)) + u_odd = u_odd.transpose((1, 2, 3, 0)) w_even = xp.zeros( (int(np.floor(self.ell_max / 2) + 1), n_r, 2 * self.ell_max + 1, n_data), @@ -355,8 +355,8 @@ def _evaluate_t(self, x): end = np.size(w_odd, 0) w_odd[end - n_odd_ell : end, :, self.ell_max + sgn * m, :] = w_m_odd - w_even = xp.transpose(w_even, (1, 2, 3, 0)) - w_odd = xp.transpose(w_odd, (1, 2, 3, 0)) + w_even = w_even.transpose((1, 2, 3, 0)) + w_odd = w_odd.transpose((1, 2, 3, 0)) # evaluate the radial parts v = xp.zeros((n_data, self.count), dtype=x.dtype) From 75acfb19cea42396615e99ab3e9a162ac3f8f21e Mon Sep 17 00:00:00 2001 From: Josh Carmichael Date: Thu, 13 Jun 2024 15:11:04 -0400 Subject: [PATCH 27/65] last cupy fill --- src/aspire/basis/fle_2d.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 4278331e1a..df1d66c608 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -452,7 +452,7 @@ def _create_basis_functions(self): """ Generate the actual basis functions as Python lambda operators """ - norm_constants = xp.zeros(self.count) + norm_constants = np.zeros(self.count) basis_functions = [None] * self.count for i in range(self.count): # parameters defining the basis function: bessel order and which bessel root @@ -481,7 +481,7 @@ def _create_basis_functions(self): norm_constants[i] = c - self.norm_constants = norm_constants + self.norm_constants = xp.asarray(norm_constants) self.basis_functions = basis_functions def _evaluate(self, coefs): From ab8ca85713186e9ef370a1309ab434112c0eb527 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 18 Jun 2024 09:39:42 -0400 Subject: [PATCH 28/65] revert config to numpy/scipy --- src/aspire/config_default.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aspire/config_default.yaml b/src/aspire/config_default.yaml index fed4cea50a..def78983c0 100644 --- a/src/aspire/config_default.yaml +++ b/src/aspire/config_default.yaml @@ -1,9 +1,9 @@ version: 0.12.3 common: # numeric module to use - one of numpy/cupy - numeric: cupy + numeric: numpy # fft backend to use - one of pyfftw/scipy/cupy/mkl - fft: cupy + fft: scipy # Set cache directory for ASPIRE example data. # By default the cache location will be set by pooch.os_cache(), From 54289abc29d8f7cf65cc2d62e20382ba9211b9ed Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 18 Jun 2024 16:13:28 -0400 Subject: [PATCH 29/65] fft host array preservation --- src/aspire/numeric/cupy_fft.py | 45 +++++++++++++++++++++++++++++++--- 1 file changed, 41 insertions(+), 4 deletions(-) diff --git a/src/aspire/numeric/cupy_fft.py b/src/aspire/numeric/cupy_fft.py index 29939e504c..7d73367bc7 100644 --- a/src/aspire/numeric/cupy_fft.py +++ b/src/aspire/numeric/cupy_fft.py @@ -1,9 +1,36 @@ +import functools + import cupy as cp import cupyx.scipy.fft as cufft from aspire.numeric.base_fft import FFT +def _preserve_host(func): + """ + Method decorator that returns a numpy/cupy array result when passed a numpy/cupy array input. + + This improves the flexibility of our FFT wrappers by allowing for incremental code changes. + """ + + @functools.wraps(func) # Pass metadata (eg name and doctrings) from `func` + def wrapper(self, x, *args, **kwargs): + + _host = False + if not isinstance(x, cp.ndarray): + _host = True + x = cp.asarray(x) + + res = func(self, x, *args, **kwargs) + + if _host: + res = res.get() + + return res + + return wrapper + + class CupyFFT(FFT): """ Define a unified wrapper class for Cupy FFT functions @@ -11,32 +38,42 @@ class CupyFFT(FFT): To be consistent with Scipy and Pyfftw, not all arguments are included. """ + @_preserve_host def fft(self, x, axis=-1, workers=-1): return cp.fft.fft(x, axis=axis) + @_preserve_host def ifft(self, x, axis=-1, workers=-1): return cp.fft.ifft(x, axis=axis) + @_preserve_host def fft2(self, x, axes=(-2, -1), workers=-1): return cp.fft.fft2(x, axes=axes) + @_preserve_host def ifft2(self, x, axes=(-2, -1), workers=-1): return cp.fft.ifft2(x, axes=axes) + @_preserve_host def fftn(self, x, axes=None, workers=-1): return cp.fft.fftn(x, axes=axes) + @_preserve_host def ifftn(self, x, axes=None, workers=-1): return cp.fft.ifftn(x, axes=axes) + @_preserve_host def fftshift(self, x, axes=None): return cp.fft.fftshift(x, axes=axes) + @_preserve_host def ifftshift(self, x, axes=None): return cp.fft.ifftshift(x, axes=axes) - def dct(self, *args, **kwargs): - return cufft.dct(*args, **kwargs) + @_preserve_host + def dct(self, x, **kwargs): + return cufft.dct(x, **kwargs) - def idct(self, *args, **kwargs): - return cufft.idct(*args, **kwargs) + @_preserve_host + def idct(self, x, **kwargs): + return cufft.idct(x, **kwargs) From fc15024dcdc8441026eb692d7aef7a5aec16b01a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 09:29:34 -0400 Subject: [PATCH 30/65] interop crop_pad_2d --- src/aspire/utils/coor_trans.py | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index dfb1c630f1..771c5bd5af 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -369,6 +369,11 @@ def rots_to_clmatrix(rots, n_theta): def crop_pad_2d(im, size, fill_value=0): """ + Crop/pads `im` according to `size`. + + Padding will use `fill_value`. + Return's host/gpu array based on `im`. + :param im: A >=2-dimensional numpy array :param size: Integer size of cropped/padded output :return: A numpy array of shape (..., size, size) @@ -387,8 +392,16 @@ def crop_pad_2d(im, size, fill_value=0): # Determine shape shape = list(im.shape[:-2]) shape.extend([size, size]) - # ensure that we return in the same dtype as the input - to_return = xp.full(shape, fill_value, dtype=im.dtype) + + # Ensure that we return in the same dtype as the input + _full = np.full # Default to numpy array + if isinstance(im, xp.ndarray): + # Use cupy when `im` _and_ xp are cupy ndarray + # Avoids having to handle when cupy is not installed + _full = xp.full + + to_return = _full(shape, fill_value, dtype=im.dtype) + # when padding, start_x and start_y are negative since size is larger # than im_x and im_y; the below line calculates where the original image # is placed in relation to the (now-larger) box size From 0bb40179ce4c1d7e3a1468812f943a76bebd1e14 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 10:11:49 -0400 Subject: [PATCH 31/65] interop fle radial convolve --- src/aspire/basis/fle_2d.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index df1d66c608..332b84f64d 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -721,10 +721,12 @@ def radial_convolve(self, coefs, radial_img): "`radial_convolve` currently only implemented for 1D stacks." ) - coefs = coefs.asnumpy() + # Potentially migrate to GPU + coefs = xp.asarray(coefs.asnumpy()) + radial_img = xp.asarray(radial_img) num_img = coefs.shape[0] - coefs_conv = np.zeros(coefs.shape) + coefs_conv = xp.zeros(coefs.shape) # Convert to internal FLE indices ordering coefs = coefs[..., self._fb_to_fle_indices] @@ -736,25 +738,26 @@ def radial_convolve(self, coefs, radial_img): weights = self._radial_convolve_weights(b) b = weights / (self.h**2) b = b.reshape(self.count) - coefs_conv[k, :] = np.real(self.c2r @ (b * (self.r2c @ _coefs).flatten())) + coefs_conv[k, :] = (self.c2r @ (b * (self.r2c @ _coefs).flatten())).real # Convert from internal FLE ordering to FB convention coefs_conv = coefs_conv[..., self._fle_to_fb_indices] - return Coef(self, coefs_conv) + # Return as Coef on host + return Coef(self, xp.asnumpy(coefs_conv)) def _radial_convolve_weights(self, b): """ Helper function for step 3 of convolving with a radial function. """ - b = np.squeeze(b) - b = np.array(b) + b = xp.squeeze(b) + b = xp.array(b) # implies copy if self.num_interp > self.num_radial_nodes: b = fft.dct(b, axis=0, type=2) / (2 * self.num_radial_nodes) - bz = np.zeros(b.shape) - b = np.concatenate((b, bz), axis=0) + bz = xp.zeros(b.shape) + b = xp.concatenate((b, bz), axis=0) b = fft.idct(b, axis=0, type=2) * 2 * b.shape[0] - a = np.zeros(self.count, dtype=np.float64) + a = xp.zeros(self.count, dtype=np.float64) y = [None] * (self.ell_p_max + 1) for i in range(self.ell_p_max + 1): y[i] = (self.A3[i] @ b[:, 0]).flatten() From 48b93e72df07c490f5a1fa6e6b9af8a04b4a94a2 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 14:05:33 -0400 Subject: [PATCH 32/65] cleanup --- src/aspire/utils/coor_trans.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index 771c5bd5af..ef7857c994 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -401,7 +401,7 @@ def crop_pad_2d(im, size, fill_value=0): _full = xp.full to_return = _full(shape, fill_value, dtype=im.dtype) - + # when padding, start_x and start_y are negative since size is larger # than im_x and im_y; the below line calculates where the original image # is placed in relation to the (now-larger) box size From c6a5c1616d803b5f8424c4ca8fd6dbb027a3c271 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 14:06:43 -0400 Subject: [PATCH 33/65] remove bbenchmark code, hackathon over --- bbenchmark/bbenchmark.py | 58 ------------------------------ bbenchmark/benchmark_gpu0.pkl | Bin 307 -> 0 bytes bbenchmark/benchmark_host.pkl | Bin 307 -> 0 bytes bbenchmark/plot_bb.py | 64 ---------------------------------- 4 files changed, 122 deletions(-) delete mode 100644 bbenchmark/bbenchmark.py delete mode 100644 bbenchmark/benchmark_gpu0.pkl delete mode 100644 bbenchmark/benchmark_host.pkl delete mode 100644 bbenchmark/plot_bb.py diff --git a/bbenchmark/bbenchmark.py b/bbenchmark/bbenchmark.py deleted file mode 100644 index 01aac6e7eb..0000000000 --- a/bbenchmark/bbenchmark.py +++ /dev/null @@ -1,58 +0,0 @@ -import os -import pickle -from pprint import pprint -from time import perf_counter, time - -import matplotlib.pyplot as plt -import numpy as np - -from aspire.basis import FFBBasis2D, FLEBasis2D -from aspire.downloader import emdb_2660 -from aspire.noise import WhiteNoiseAdder -from aspire.source import ArrayImageSource, Simulation - -# Download and cache volume map -vol = emdb_2660().astype(np.float64) # doubles -cached_image_fn = "simulated_images.npy" - -if os.path.exists(cached_image_fn): - print(f"Loading cached image source from {cached_image_fn}.") - sim = ArrayImageSource(np.load(cached_image_fn)) -else: - print("Generating Simulated Datatset") - sim = Simulation( - n=512, C=1, vols=vol, noise_adder=WhiteNoiseAdder.from_snr(0.1) - ).cache() - print(f"Saving to {cached_image_fn}") - np.save(cached_image_fn, sim.images[:].asnumpy()) - - -TIMES = {} -for L in [32, 64, 128, 256]: - print(f"Begin L={L}") - src = sim.downsample(L) - imgs = src.images[:] - TIMES[L] = {} - for basis_type in [FFBBasis2D, FLEBasis2D]: - # Construct basis - TIMES[L][basis_type.__name__] = {} - basis = basis_type(L, dtype=src.dtype) - - # Time expanding into basis - tic = perf_counter() - coef = basis.evaluate_t(imgs) - toc = perf_counter() - TIMES[L][basis_type.__name__]["evaluate_t"] = toc - tic - - # Time expanding back into images - tic = perf_counter() - _ = coef.evaluate() - toc = perf_counter() - TIMES[L][basis_type.__name__]["evaluate"] = toc - tic - - -pprint(TIMES) - - -with open(f"benchmark_{int(time())}.pkl", "wb") as fh: - pickle.dump(TIMES, fh) diff --git a/bbenchmark/benchmark_gpu0.pkl b/bbenchmark/benchmark_gpu0.pkl deleted file mode 100644 index e702dd442dced27acb26fcc3b3e395ce9f465585..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 307 zcmZo*nX19a00y;FG`tmnL=Tsno0C&wab~fR%M>s_wJb5GG_fQ#zGRBK{fB=m-#lPo z=;45g0>v(0*=`CnqZFvs#}!Fy28+7`_dcgM4hDt{R(A)1g!$SUKxL)g4nT7=m_P)J zyZseA`*#jt74}bVm#$9$s>oo2$TFJo@? J0igC$Jpf$(W#0e* diff --git a/bbenchmark/benchmark_host.pkl b/bbenchmark/benchmark_host.pkl deleted file mode 100644 index dc0dd2a1769fc52c9470986e74eb64864f59e7fe..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 307 zcmZo*nX19a00y;FG`tmnL=Tsno0C&wab~fR%M>s_wJb5GG_fQ#zGRBK{l>F_wm|hg z957L!*k;SlN}yONP^*tClGY3scLzS3waO9<3>mEM4m>kTmTLf&m3lh>&COr}5iIWZ zmm4fH4}ewJUv3m%3ovVBHPKy1(9iTm;ktG~fPnyM$W- zvToDPIg_qJbek#on}>jO`!X;hX?O6pRZ8-OC=s5uZo?jA?UmgRJ_s6sonHA^-k?wb IsJ&DV0PQbeIsgCw diff --git a/bbenchmark/plot_bb.py b/bbenchmark/plot_bb.py deleted file mode 100644 index 05f5350f4b..0000000000 --- a/bbenchmark/plot_bb.py +++ /dev/null @@ -1,64 +0,0 @@ -import os -import pickle -from pprint import pprint - -import matplotlib.pyplot as plt -import numpy as np - -host_fn = "benchmark_host.pkl" -gpu_fn = "benchmark_gpu0.pkl" - - -with open(host_fn, "rb") as fh: - host_times = pickle.load(fh) - -with open(gpu_fn, "rb") as fh: - gpu_times = pickle.load(fh) - -markers = {"FFBBasis2D": "8", "FLEBasis2D": "s"} - -# Evaluate_t -Ls = list(host_times.keys()) -for basis_type in markers.keys(): - plt.plot( - Ls, - [host_times[L][basis_type]["evaluate_t"] for L in Ls], - marker=markers[basis_type], - color="blue", - label=basis_type + "-host", - ) - plt.plot( - Ls, - [gpu_times[L][basis_type]["evaluate_t"] for L in Ls], - marker=markers[basis_type], - color="green", - label=basis_type + "-gpu", - ) -plt.title("Basis `evaluate_t` Permformance - Batch of 512 Images") -plt.xlabel("Image Pixel L (LxL)") -plt.ylabel("Time (seconds)") -plt.legend() -plt.savefig("evaluate_t.png") -plt.show() - -for basis_type in markers.keys(): - plt.plot( - Ls, - [host_times[L][basis_type]["evaluate"] for L in Ls], - marker=markers[basis_type], - color="blue", - label=basis_type + "-host", - ) - plt.plot( - Ls, - [gpu_times[L][basis_type]["evaluate"] for L in Ls], - marker=markers[basis_type], - color="green", - label=basis_type + "-gpu", - ) -plt.title("Basis `evaluate` Permformance - Batch of 512 Images") -plt.xlabel("Image Pixel L (LxL)") -plt.ylabel("Time (seconds)") -plt.legend() -plt.savefig("evaluate.png") -plt.show() From d968df3f22bac472cfaed66b6d671e7391ef7795 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 14:25:30 -0400 Subject: [PATCH 34/65] fix interop cp check --- src/aspire/nufft/__init__.py | 12 +++++++----- src/aspire/numeric/numpy.py | 9 +++++++-- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/aspire/nufft/__init__.py b/src/aspire/nufft/__init__.py index f748a10d84..c2db4f2e8e 100644 --- a/src/aspire/nufft/__init__.py +++ b/src/aspire/nufft/__init__.py @@ -2,13 +2,15 @@ import numpy as np +from aspire import config +from aspire.utils import LogFilterByCount, complex_type, real_type + +cp = None try: import cupy as cp except ModuleNotFoundError: - cp = None + pass -from aspire import config -from aspire.utils import LogFilterByCount, complex_type, real_type logger = logging.getLogger(__name__) @@ -196,7 +198,7 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): adjoint = adjoint.real if real else adjoint - if not on_gpu: + if cp and not on_gpu: adjoint = adjoint.get() return adjoint @@ -257,7 +259,7 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): transform = transform.real if real else transform - if not on_gpu: + if cp and not on_gpu: transform = transform.get() return transform diff --git a/src/aspire/numeric/numpy.py b/src/aspire/numeric/numpy.py index 9367409c78..07627399f9 100644 --- a/src/aspire/numeric/numpy.py +++ b/src/aspire/numeric/numpy.py @@ -1,11 +1,16 @@ -import cupy as cp import numpy as np +cp = None +try: + import cupy as cp +except ModuleNotFoundError: + pass + class Numpy: @staticmethod def asnumpy(x): - if isinstance(x, cp.ndarray): + if cp and isinstance(x, cp.ndarray): x = x.get() return x From 29b990c9d194791d5aa0009311cd77ef2a98f430 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 15:22:03 -0400 Subject: [PATCH 35/65] use cupy modes on ampere_gpu jobs --- .github/workflows/workflow.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index f2d5472e52..24528a6b72 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -148,6 +148,8 @@ jobs: echo "WORK_DIR=${WORK_DIR}" >> $GITHUB_ENV echo -e "ray:\n temp_dir: ${WORK_DIR}\n" > ${WORK_DIR}/config.yaml echo -e "common:\n cache_dir: ${CI_CACHE_DIR}\n" >> ${WORK_DIR}/config.yaml + echo -e " numeric: cupy\n" >> ${WORK_DIR}/config.yaml + echo -e " fft: cupy\n" >> ${WORK_DIR}/config.yaml echo "Log the config: ${WORK_DIR}/config.yaml" cat ${WORK_DIR}/config.yaml - name: Run From 3baf067414a8a5b71bcd25b7bf92a9a73db1d787 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 15:24:25 -0400 Subject: [PATCH 36/65] ws cleanup in gha config gen --- .github/workflows/workflow.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index 24528a6b72..fce5a7f6d4 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -147,8 +147,8 @@ jobs: echo "Stash the WORK_DIR to GitHub env so we can clean it up later." echo "WORK_DIR=${WORK_DIR}" >> $GITHUB_ENV echo -e "ray:\n temp_dir: ${WORK_DIR}\n" > ${WORK_DIR}/config.yaml - echo -e "common:\n cache_dir: ${CI_CACHE_DIR}\n" >> ${WORK_DIR}/config.yaml - echo -e " numeric: cupy\n" >> ${WORK_DIR}/config.yaml + echo -e "common:\n cache_dir: ${CI_CACHE_DIR}" >> ${WORK_DIR}/config.yaml + echo -e " numeric: cupy" >> ${WORK_DIR}/config.yaml echo -e " fft: cupy\n" >> ${WORK_DIR}/config.yaml echo "Log the config: ${WORK_DIR}/config.yaml" cat ${WORK_DIR}/config.yaml From eecfbdfe069911e352173bc6d76f1be2a41cfa19 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 20 Jun 2024 17:14:46 -0400 Subject: [PATCH 37/65] remove older GPU environments. --- pyproject.toml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index fcc0f7cf4f..c9c25a9976 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -61,10 +61,6 @@ dependencies = [ "Source" = "https://github.com/ComputationalCryoEM/ASPIRE-Python" [project.optional-dependencies] -gpu-102 = ["cupy-cuda102", "cufinufft==1.3"] -gpu-110 = ["cupy-cuda110", "cufinufft==1.3"] -gpu-111 = ["cupy-cuda111", "cufinufft==1.3"] -gpu-11x = ["cupy-cuda11x", "cufinufft==1.3"] gpu-12x = ["cupy-cuda12x", "cufinufft==2.2.0"] dev = [ "black", From 795905208ac89f9d362b4b4053b68505b44b4ee8 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 21 Jun 2024 09:47:04 -0400 Subject: [PATCH 38/65] fle basis to mat xp conversion --- src/aspire/basis/fle_2d.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 332b84f64d..e82dbb5d2f 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -786,20 +786,26 @@ def filter_to_basis_mat(self, f, **kwargs): # get 2D grid in polar coordinate k_vals, wts = lgwt(n_k, 0, 0.5, dtype=self.dtype) - k, theta = np.meshgrid( - k_vals, np.arange(n_theta) * 2 * np.pi / (2 * n_theta), indexing="ij" + k, theta = xp.meshgrid( + xp.asarray(k_vals), + xp.arange(n_theta) * 2 * np.pi / (2 * n_theta), + indexing="ij", ) # Get function values in polar 2D grid and average out angle contribution # NOTE: should probably just let the ctf objects handle this... - omegax = k * np.cos(theta) - omegay = k * np.sin(theta) - omega = 2 * np.pi * np.vstack((omegax.flatten("C"), omegay.flatten("C"))) - - h_vals2d = h_fun(omega).reshape(n_k, n_theta).astype(self.dtype) - h_vals = np.sum(h_vals2d, axis=1) / n_theta + omegax = k * xp.cos(theta) + omegay = k * xp.sin(theta) + omega = 2 * xp.pi * xp.vstack((omegax.flatten("C"), omegay.flatten("C"))) + + h_vals2d = ( + xp.asarray(h_fun(omega)) + .reshape(n_k, n_theta) + .astype(self.dtype, copy=False) + ) + h_vals = xp.sum(h_vals2d, axis=1) / n_theta - h_basis = np.zeros(self.count, dtype=self.dtype) + h_basis = xp.zeros(self.count, dtype=self.dtype) # For now we just need to handle 1D (stack of one ctf) for j in range(self.ell_p_max + 1): h_basis[self.idx_list[j]] = self.A3[j] @ h_vals @@ -807,4 +813,4 @@ def filter_to_basis_mat(self, f, **kwargs): # Convert from internal FLE ordering to FB convention h_basis = h_basis[self._fle_to_fb_indices] - return DiagMatrix(h_basis) + return DiagMatrix(xp.asnumpy(h_basis)) From c429d0243e3cb28e8e8a8397a2733c8eb2a4e74c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 21 Jun 2024 10:14:39 -0400 Subject: [PATCH 39/65] better eigsh sanity check --- tests/test_numeric_sparse.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_numeric_sparse.py b/tests/test_numeric_sparse.py index 288e41a176..5a8227fe47 100644 --- a/tests/test_numeric_sparse.py +++ b/tests/test_numeric_sparse.py @@ -47,7 +47,8 @@ def test_eigsh(backends): """ xp, sparse = backends - A = xp.eye(1234) + n = 123 + A = xp.diag(xp.arange(1, n + 1, dtype=np.float64)) - lamb, _ = sparse.linalg.eigsh(A) - np.testing.assert_allclose(xp.asnumpy(lamb), 1.0) + lamb, _ = sparse.linalg.eigsh(A, k=1) + np.testing.assert_allclose(xp.asnumpy(lamb), n) From bb8a913ddf86afa079ef5434778022a1cad1966a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 24 Jun 2024 10:50:24 -0400 Subject: [PATCH 40/65] cupy fft accuracy casting work around --- src/aspire/numeric/cupy_fft.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/src/aspire/numeric/cupy_fft.py b/src/aspire/numeric/cupy_fft.py index 7d73367bc7..043780c7a7 100644 --- a/src/aspire/numeric/cupy_fft.py +++ b/src/aspire/numeric/cupy_fft.py @@ -2,6 +2,7 @@ import cupy as cp import cupyx.scipy.fft as cufft +import numpy as np from aspire.numeric.base_fft import FFT @@ -16,6 +17,17 @@ def _preserve_host(func): @functools.wraps(func) # Pass metadata (eg name and doctrings) from `func` def wrapper(self, x, *args, **kwargs): + # CuPy's single precision FFT appears to be too inaccurate for + # many of our unit tests, so the signal is upcast and recast + # on return. + _singles = False + if x.dtype == np.float32: + _singles = True + x = x.astype(np.float64) + elif x.dtype == np.complex64: + _singles = True + x = x.astype(np.complex128) + _host = False if not isinstance(x, cp.ndarray): _host = True @@ -26,6 +38,12 @@ def wrapper(self, x, *args, **kwargs): if _host: res = res.get() + # Recast if needed. + if _singles and res.dtype == np.float64: + res = res.astype(np.float32) + elif _singles and res.dtype == np.complex128: + res = res.astype(np.complex64) + return res return wrapper From 4204efe5a210ab750550935bb164d1d0f7e96503 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 24 Jun 2024 10:51:01 -0400 Subject: [PATCH 41/65] some numpy/cupy interop tweaks --- src/aspire/image/image.py | 15 ++++++++------- src/aspire/volume/volume.py | 8 ++++---- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index a0a954f2d9..13c6e122de 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -377,17 +377,18 @@ def filter(self, filter): im = self.stack_reshape(-1) - filter_values = filter.evaluate_grid(self.resolution) + filter_values = xp.asarray(filter.evaluate_grid(self.resolution)) - im_f = xp.asnumpy(fft.centered_fft2(xp.asarray(im._data))) + im_f = fft.centered_fft2(xp.asarray(im._data)) # TODO: why are these different? Doesn't the broadcast work? if im_f.ndim > filter_values.ndim: im_f *= filter_values else: im_f = filter_values * im_f - im = xp.asnumpy(fft.centered_ifft2(xp.asarray(im_f))) - im = np.real(im) + + im = fft.centered_ifft2(im_f) + im = xp.asnumpy(im.real) return self.__class__(im).stack_reshape(original_stack_shape) @@ -461,7 +462,7 @@ def _im_translate(self, shifts): shifts = shifts.astype(self.dtype) L = self.resolution - im_f = xp.asnumpy(fft.fft2(xp.asarray(im))) + im_f = xp.asnumpy(fft.fft2(xp.asarray(im))) # todo grid_shifted = fft.ifftshift( xp.asarray(np.ceil(np.arange(-L / 2, L / 2, dtype=self.dtype))) ) @@ -477,8 +478,8 @@ def _im_translate(self, shifts): ) mult_f = np.exp(-1j * phase_shifts) im_translated_f = im_f * mult_f - im_translated = xp.asnumpy(fft.ifft2(xp.asarray(im_translated_f))) - im_translated = np.real(im_translated) + im_translated = fft.ifft2(xp.asarray(im_translated_f)) + im_translated = xp.asnumpy(im_translated.real) # Reshape to stack shape return self.__class__(im_translated).stack_reshape(stack_shape) diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index 7883f59190..b7e4245ede 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -342,13 +342,13 @@ def project(self, rot_matrices): if rot_matrices.ndim == 2: rot_matrices = np.expand_dims(rot_matrices, axis=0) - data = self._data + data = xp.asarray(self._data) n_rots = rot_matrices.shape[0] pts_rot = rotated_grids(self.resolution, rot_matrices) if n_rots == self.n_vols: # Apply rotations to Volumes element-wise. - im_f = np.empty( + im_f = xp.empty( (self.n_vols, self.resolution**2), dtype=complex_type(self.dtype) ) pts_rot = pts_rot.reshape((3, n_rots, self.resolution**2)) @@ -370,9 +370,9 @@ def project(self, rot_matrices): im_f[:, 0, :] = 0 im_f[:, :, 0] = 0 - im_f = xp.asnumpy(fft.centered_ifft2(xp.asarray(im_f))) + im_f = fft.centered_ifft2(im_f) - return aspire.image.Image(np.real(im_f)) + return aspire.image.Image(xp.asnumpy(im_f.real)) def to_vec(self): """Returns an N x resolution ** 3 array.""" From c9d3a92253c6234adabbc5a44cb3700f56963321 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 24 Jun 2024 10:57:14 -0400 Subject: [PATCH 42/65] more image interop tweaks --- src/aspire/image/image.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index 13c6e122de..c8cc9cbe24 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -459,15 +459,15 @@ def _im_translate(self, shifts): n_shifts == 1 or n_shifts == self.n_images ), "number of shifts must be 1 or match the number of images" # Cast shifts to this instance's internal dtype - shifts = shifts.astype(self.dtype) + shifts = xp.asarray(shifts, dtype=self.dtype) L = self.resolution - im_f = xp.asnumpy(fft.fft2(xp.asarray(im))) # todo + im_f = fft.fft2(xp.asarray(im)) grid_shifted = fft.ifftshift( - xp.asarray(np.ceil(np.arange(-L / 2, L / 2, dtype=self.dtype))) + xp.ceil(xp.arange(-L / 2, L / 2, dtype=self.dtype)) ) - grid_1d = xp.asnumpy(grid_shifted) * 2 * np.pi / L - om_x, om_y = np.meshgrid(grid_1d, grid_1d, indexing="ij") + grid_1d = grid_shifted * 2 * xp.pi / L + om_x, om_y = xp.meshgrid(grid_1d, grid_1d, indexing="ij") phase_shifts_x = -shifts[:, 0].reshape((n_shifts, 1, 1)) phase_shifts_y = -shifts[:, 1].reshape((n_shifts, 1, 1)) @@ -476,9 +476,9 @@ def _im_translate(self, shifts): om_x[np.newaxis, :, :] * phase_shifts_x + om_y[np.newaxis, :, :] * phase_shifts_y ) - mult_f = np.exp(-1j * phase_shifts) + mult_f = xp.exp(-1j * phase_shifts) im_translated_f = im_f * mult_f - im_translated = fft.ifft2(xp.asarray(im_translated_f)) + im_translated = fft.ifft2(im_translated_f) im_translated = xp.asnumpy(im_translated.real) # Reshape to stack shape From 59ff57effb120bad53e0a6bf05616b21acc5110e Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 24 Jun 2024 14:40:29 -0400 Subject: [PATCH 43/65] misc xp/numeric wrapper cleanup --- src/aspire/nufft/__init__.py | 12 ++++++------ src/aspire/numeric/cupy_fft.py | 12 ++++++++++-- src/aspire/numeric/numpy.py | 4 ++++ src/aspire/numeric/pyfftw_fft.py | 8 ++++---- src/aspire/numeric/scipy_fft.py | 8 ++++---- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/aspire/nufft/__init__.py b/src/aspire/nufft/__init__.py index c2db4f2e8e..953f55ce3e 100644 --- a/src/aspire/nufft/__init__.py +++ b/src/aspire/nufft/__init__.py @@ -172,9 +172,9 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): """ - on_gpu = False + _on_gpu = False if cp and isinstance(sig_f, cp.ndarray): - on_gpu = True + _on_gpu = True if fourier_pts.dtype != real_type(sig_f.dtype): raise RuntimeError( @@ -198,7 +198,7 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): adjoint = adjoint.real if real else adjoint - if cp and not on_gpu: + if cp and not _on_gpu: adjoint = adjoint.get() return adjoint @@ -223,9 +223,9 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): """ - on_gpu = False + _on_gpu = False if cp and isinstance(sig_f, cp.ndarray): - on_gpu = True + _on_gpu = True if fourier_pts.dtype != real_type(sig_f.dtype): raise RuntimeError( @@ -259,7 +259,7 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): transform = transform.real if real else transform - if cp and not on_gpu: + if cp and not _on_gpu: transform = transform.get() return transform diff --git a/src/aspire/numeric/cupy_fft.py b/src/aspire/numeric/cupy_fft.py index 043780c7a7..f67937813f 100644 --- a/src/aspire/numeric/cupy_fft.py +++ b/src/aspire/numeric/cupy_fft.py @@ -7,11 +7,16 @@ from aspire.numeric.base_fft import FFT +# This improves the flexibility of our FFT wrappers by allowing for +# incremental code changes and testing. def _preserve_host(func): """ - Method decorator that returns a numpy/cupy array result when passed a numpy/cupy array input. + Method decorator that returns a numpy/cupy array result when + passed a numpy/cupy array input respectively. - This improves the flexibility of our FFT wrappers by allowing for incremental code changes. + At the time of writing this wrapper will also upcast cupy FFT + operations to doubles as the precision in singles can cause + accuracy issues. """ @functools.wraps(func) # Pass metadata (eg name and doctrings) from `func` @@ -20,6 +25,9 @@ def wrapper(self, x, *args, **kwargs): # CuPy's single precision FFT appears to be too inaccurate for # many of our unit tests, so the signal is upcast and recast # on return. + # Todo, discuss with Joakim whether we want this upcasting + # business configurable or keep singles, both in conjunction + # with xfailing the tests. _singles = False if x.dtype == np.float32: _singles = True diff --git a/src/aspire/numeric/numpy.py b/src/aspire/numeric/numpy.py index 07627399f9..ddc8355816 100644 --- a/src/aspire/numeric/numpy.py +++ b/src/aspire/numeric/numpy.py @@ -8,8 +8,12 @@ class Numpy: + # This can be required when mixing nufft/fft/numpy backend combinations. @staticmethod def asnumpy(x): + """ + Ensure `asnumpy` is always available and returns a numpy array. + """ if cp and isinstance(x, cp.ndarray): x = x.get() return x diff --git a/src/aspire/numeric/pyfftw_fft.py b/src/aspire/numeric/pyfftw_fft.py index afcad98d28..95a8ea80f7 100644 --- a/src/aspire/numeric/pyfftw_fft.py +++ b/src/aspire/numeric/pyfftw_fft.py @@ -160,8 +160,8 @@ def fftshift(self, a, axes=None): def ifftshift(self, a, axes=None): return scipy_fft.ifftshift(a, axes=axes) - def dct(self, *args, **kwargs): - return scipy_fft.dct(*args, **kwargs) + def dct(self, x, **kwargs): + return scipy_fft.dct(x, **kwargs) - def idct(self, *args, **kwargs): - return scipy_fft.idct(*args, **kwargs) + def idct(self, x, **kwargs): + return scipy_fft.idct(x, **kwargs) diff --git a/src/aspire/numeric/scipy_fft.py b/src/aspire/numeric/scipy_fft.py index d78e463803..3891d45671 100644 --- a/src/aspire/numeric/scipy_fft.py +++ b/src/aspire/numeric/scipy_fft.py @@ -34,8 +34,8 @@ def fftshift(self, x, axes=None): def ifftshift(self, x, axes=None): return sp.fft.ifftshift(x, axes=axes) - def dct(self, *args, **kwargs): - return sp.fft.dct(*args, **kwargs) + def dct(self, x, **kwargs): + return sp.fft.dct(x, **kwargs) - def idct(self, *args, **kwargs): - return sp.fft.idct(*args, **kwargs) + def idct(self, x, **kwargs): + return sp.fft.idct(x, **kwargs) From f62e93549783087180c40ca60f7b451aa6ba083b Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 26 Jun 2024 14:20:20 -0400 Subject: [PATCH 44/65] precache fle x y grids on gpu --- src/aspire/basis/fle_2d.py | 22 ++++++++++------------ src/aspire/nufft/cufinufft.py | 14 +++++--------- 2 files changed, 15 insertions(+), 21 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index e82dbb5d2f..2ca41a2994 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -308,12 +308,13 @@ def _compute_nufft_points(self): * np.arange(self.num_angular_nodes // 2, dtype=self.dtype) / self.num_angular_nodes ) - x = np.cos(phi).reshape(1, self.num_angular_nodes // 2) - y = np.sin(phi).reshape(1, self.num_angular_nodes // 2) - x = x * nodes * h - y = y * nodes * h - self.grid_x = x.flatten() - self.grid_y = y.flatten() + grid_xy = np.empty( + (2, self.num_radial_nodes, self.num_angular_nodes // 2), dtype=self.dtype + ) + grid_xy[0] = np.cos(phi) # x + grid_xy[1] = np.sin(phi) # y + grid_xy *= nodes * h + self.grid_xy = xp.asarray(grid_xy.reshape(2, -1)) def _build_interpolation_matrix(self): """ @@ -531,7 +532,7 @@ def _evaluate_t(self, imgs): def _step1_t(self, im): """ Step 1 of the adjoint transformation (images to coefficients). - Calculates the NUFFT of the image on gridpoints `self.grid_x` and `self.grid_y`. + Calculates the NUFFT of the image on gridpoints `grid_x` and `grid_y`. """ im = im.reshape(-1, self.nres, self.nres).astype(complex_type(self.dtype)) num_img = im.shape[0] @@ -539,10 +540,7 @@ def _step1_t(self, im): (num_img, self.num_radial_nodes, self.num_angular_nodes), dtype=complex_type(self.dtype), ) - _z = ( - nufft(im, np.stack((self.grid_x, self.grid_y)), epsilon=self.epsilon) - * self.h**2 - ) + _z = nufft(im, self.grid_xy, epsilon=self.epsilon) * self.h**2 _z = _z.reshape(num_img, self.num_radial_nodes, self.num_angular_nodes // 2) z[:, :, : self.num_angular_nodes // 2] = _z z[:, :, self.num_angular_nodes // 2 :] = np.conj(_z) @@ -645,7 +643,7 @@ def _step1(self, z): z = z[:, :, : self.num_angular_nodes // 2].reshape(num_img, -1) im = anufft( z.astype(complex_type(self.dtype)), - np.stack((self.grid_x, self.grid_y)), + self.grid_xy, (self.nres, self.nres), epsilon=self.epsilon, ) diff --git a/src/aspire/nufft/cufinufft.py b/src/aspire/nufft/cufinufft.py index 2dceb08b80..c1d15ff686 100644 --- a/src/aspire/nufft/cufinufft.py +++ b/src/aspire/nufft/cufinufft.py @@ -51,11 +51,11 @@ def __init__(self, sz, fourier_pts, epsilon=1e-8, ntransforms=1, **kwargs): "cufinufft has caught a non C_CONTIGUOUS array," " `fourier_pts` will be copied to C_CONTIGUOUS." ) - self.fourier_pts = np.ascontiguousarray( - np.mod(fourier_pts + np.pi, 2 * np.pi) - np.pi, dtype=self.dtype + self.fourier_pts = cp.ascontiguousarray( + cp.mod(cp.asarray(fourier_pts, dtype=self.dtype) + cp.pi, 2 * cp.pi) - cp.pi ) - self.num_pts = fourier_pts.shape[1] + self.num_pts = self.fourier_pts.shape[1] self.epsilon = max(epsilon, np.finfo(self.dtype).eps) self._transform_plan = cufPlan( @@ -81,12 +81,8 @@ def __init__(self, sz, fourier_pts, epsilon=1e-8, ntransforms=1, **kwargs): **self.adjoint_opts, ) - # Note, I store self.fourier_pts_gpu so the GPUArrray life - # is tied to instance, instead of this method. - self.fourier_pts_gpu = cp.array(self.fourier_pts) - - self._transform_plan.setpts(*self.fourier_pts_gpu) - self._adjoint_plan.setpts(*self.fourier_pts_gpu) + self._transform_plan.setpts(*self.fourier_pts) + self._adjoint_plan.setpts(*self.fourier_pts) def transform(self, signal): """ From 3f1208baff9e9b99cadbce3d48e2749401e8d7f5 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 27 Jun 2024 15:32:28 -0400 Subject: [PATCH 45/65] Rm unneeded gc call --- src/aspire/basis/fle_2d.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 2ca41a2994..ba7d4636e9 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -1,4 +1,3 @@ -import gc import logging import numpy as np @@ -21,9 +20,8 @@ def _cleanup(): """ - Utility for informing python+cupy to cleanup memory held by old vars. + Utility for informing cupy to cleanup memory held by old vars. """ - gc.collect() try: import cupy @@ -511,18 +509,18 @@ def _evaluate_t(self, imgs): coefficients. """ # See Section 3.5 - imgs = xp.array(imgs) # Copy here, mutating. + imgs = xp.array(imgs) # Intentionally copying here, mutating. imgs[:, self.radial_mask] = 0 z = self._step1_t(imgs) - del imgs + del imgs # inform python we're done with imgs _cleanup() b = self._step2_t(z) - del z + del z # inform python we're done with z _cleanup() coefs = self._step3_t(b) - del b + del b # inform python we're done with b _cleanup() # return in FB order From cba8d7c5ee6179722de14b39f190440798494d9b Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 28 Jun 2024 08:50:25 -0400 Subject: [PATCH 46/65] Add cupy GPU options to config tutorial --- gallery/tutorials/configuration.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/gallery/tutorials/configuration.py b/gallery/tutorials/configuration.py index 819ff9b675..75354bc422 100644 --- a/gallery/tutorials/configuration.py +++ b/gallery/tutorials/configuration.py @@ -102,6 +102,36 @@ time.sleep(1) print("Done Loop 2\n") +# %% +# Enabling GPU Acceleration +# ------------------------- +# Enabling GPU acceleration requires installing supporting software +# packages and small config changes. Installing the supporting +# software is most easily accomplished by installing ASPIRE with one +# of the published GPU extensions, for example ``pip install +# "aspire[dev,gpu_12x]"``. Once the packages are installed users +# should automatically find that the NUFFT calls are running on the +# GPU. Additional acceleration is achieved by enabling `cupy` for +# `numeric` and `fft` components. +# +# .. code-block:: yaml +# +# common: +# # numeric module to use - one of numpy/cupy +# numeric: cupy +# # fft backend to use - one of pyfftw/scipy/cupy/mkl +# fft: cupy +# +# Alternatively, like other config options, this can be changed +# dynamically with code. +# +# .. code-block:: python +# +# from aspire import config +# +# config["common"]["numeric"] = "cupy" +# config["common"]["fft"] = "cupy" +# # %% # Resolution From f9cc8925a5cdee1b9891b75b3d3bbeeb5e5b1939 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 28 Jun 2024 09:12:55 -0400 Subject: [PATCH 47/65] update GPU install docs --- docs/source/installation.rst | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/docs/source/installation.rst b/docs/source/installation.rst index 4a48e3a505..5fa608ecdf 100644 --- a/docs/source/installation.rst +++ b/docs/source/installation.rst @@ -129,10 +129,10 @@ an M1 laptop: Installing GPU Extensions ************************* -ASPIRE does support GPUs, depending on several external packages. The -collection of GPU extensions can be installed using ``pip``. -Extensions are grouped based on CUDA versions. To find the CUDA -driver version, run ``nvidia-smi`` on the intended system. +ASPIRE does support using a GPU, depending on several external +packages. The collection of GPU extensions can be installed using +``pip``. Extensions are grouped based on CUDA versions. To find the +CUDA driver version, run ``nvidia-smi`` on the intended system. .. list-table:: CUDA GPU Extension Versions :widths: 25 25 @@ -140,14 +140,6 @@ driver version, run ``nvidia-smi`` on the intended system. * - CUDA Version - ASPIRE Extension - * - 10.2 - - gpu-102 - * - 11.0 - - gpu-110 - * - 11.1 - - gpu-111 - * - >=11.2 - - gpu-11x * - >=12 - gpu-12x @@ -164,12 +156,15 @@ the command below would install GPU packages required for ASPIRE. By default if the required GPU extensions are correctly installed, -ASPIRE should automatically begin using the GPU for select components -(such as those using ``nufft``). - -Because GPU extensions depend on several third party packages and -libraries, we can only offer limited support if one of the packages -has a problem on your system. +ASPIRE should automatically begin using the GPU calls to our ``nufft`` module. + +Using GPU in other areas of the code is still an experimental feature +and requires a minor configuration setting to enable ``cupy``. See the +:ref:`sphx_glr_auto_tutorials_configuration.py` for details. Because +GPU extensions depend on several third party softwares and machines +vary wildly, we can only offer limited support if one of the packages +has a problem on your system. We are currently expanding GPU code +coverage. Generating Documentation ************************ From 7675e6638e6e29918fde691cdf44d52bd943ad3a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 28 Jun 2024 10:14:57 -0400 Subject: [PATCH 48/65] improve crop 3d xp interop --- src/aspire/utils/coor_trans.py | 63 ++++++++++++++++++++++++---------- 1 file changed, 45 insertions(+), 18 deletions(-) diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index ef7857c994..457e29f9f8 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -376,7 +376,7 @@ def crop_pad_2d(im, size, fill_value=0): :param im: A >=2-dimensional numpy array :param size: Integer size of cropped/padded output - :return: A numpy array of shape (..., size, size) + :return: Array of shape (..., size, size) """ im_y, im_x = im.shape[-2:] @@ -393,7 +393,7 @@ def crop_pad_2d(im, size, fill_value=0): shape = list(im.shape[:-2]) shape.extend([size, size]) - # Ensure that we return in the same dtype as the input + # Ensure that we return the same dtype as the input _full = np.full # Default to numpy array if isinstance(im, xp.ndarray): # Use cupy when `im` _and_ xp are cupy ndarray @@ -405,34 +405,61 @@ def crop_pad_2d(im, size, fill_value=0): # when padding, start_x and start_y are negative since size is larger # than im_x and im_y; the below line calculates where the original image # is placed in relation to the (now-larger) box size - to_return[-start_y : im_y - start_y, -start_x : im_x - start_x] = im + to_return[..., -start_y : im_y - start_y, -start_x : im_x - start_x] = im return to_return else: # target size is between mat_x and mat_y raise ValueError("Cannot crop and pad an image at the same time.") -def crop_pad_3d(im, size, fill_value=0): - im_y, im_x, im_z = im.shape +def crop_pad_3d(vol, size, fill_value=0): + """ + Crop/pads `vol` according to `size`. + + Padding will use `fill_value`. + Return's host/gpu array based on `vol`. + + :param vol: A >=3-dimensional numpy array + :param size: Integer size of cropped/padded output + :return: Array of shape (..., size, size, size) + """ + + vol_z, vol_y, vol_x = vol.shape[-3:] # shift terms - start_x = math.floor(im_x / 2) - math.floor(size / 2) - start_y = math.floor(im_y / 2) - math.floor(size / 2) - start_z = math.floor(im_z / 2) - math.floor(size / 2) + start_z = math.floor(vol_z / 2) - math.floor(size / 2) + start_y = math.floor(vol_y / 2) - math.floor(size / 2) + start_x = math.floor(vol_x / 2) - math.floor(size / 2) # cropping - if size <= min(im_y, im_x, im_z): - return im[ - start_y : start_y + size, start_x : start_x + size, start_z : start_z + size + if size <= min(vol_z, vol_y, vol_x): + return vol[ + ..., + start_z : start_z + size, + start_y : start_y + size, + start_x : start_x + size, ] # padding - elif size >= max(im_y, im_x, im_z): - to_return = fill_value * np.ones((size, size, size), dtype=im.dtype) + elif size >= max(vol_z, vol_y, vol_x): + # Determine shape + shape = list(vol.shape[:-3]) + shape.extend([size, size, size]) + + # Ensure that we return the same dtype as the input + _full = np.full # Default to numpy array + if isinstance(vol, xp.ndarray): + # Use cupy when `vol` _and_ xp are cupy ndarray + # Avoids having to handle when cupy is not installed + _full = xp.full + + to_return = _full(shape, fill_value, dtype=vol.dtype) + to_return[ - -start_y : im_y - start_y, - -start_x : im_x - start_x, - -start_z : im_z - start_z, - ] = im + ..., + -start_z : vol_z - start_z, + -start_y : vol_y - start_y, + -start_x : vol_x - start_x, + ] = vol return to_return else: - # target size is between min and max of (im_y, im_x, im_z) + # target size is between min and max of (vol_y, vol_x, vol_z) raise ValueError("Cannot crop and pad a volume at the same time.") From 11986f32bae996af151507897c2545fe37cc3c44 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 11:13:04 -0400 Subject: [PATCH 49/65] ffb2d self review cleanup --- src/aspire/basis/ffb_2d.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py index 116e009757..8d46e8419c 100644 --- a/src/aspire/basis/ffb_2d.py +++ b/src/aspire/basis/ffb_2d.py @@ -131,16 +131,16 @@ def _evaluate(self, v): ind = 0 - idx = ind + xp.arange(self.k_max[0], dtype=int) + idx = ind + np.arange(self.k_max[0], dtype=int) - pf[:, 0, :] = v[:, xp.asarray(self._zero_angular_inds)] @ self.radial_norm[idx] + pf[:, 0, :] = v[:, self._zero_angular_inds] @ self.radial_norm[idx] ind = ind + idx.size ind_pos = ind for ell in range(1, self.ell_max + 1): idx = ind + xp.arange(self.k_max[ell], dtype=int) - idx_pos = ind_pos + xp.arange(self.k_max[ell], dtype=int) + idx_pos = ind_pos + np.arange(self.k_max[ell], dtype=int) idx_neg = idx_pos + self.k_max[ell] v_ell = (v[:, idx_pos] - 1j * v[:, idx_neg]) / 2.0 @@ -171,9 +171,7 @@ def _evaluate(self, v): # perform inverse non-uniformly FFT transform back to 2D coordinate basis freqs = m_reshape(self._precomp["freqs"], (2, n_r * n_theta)) - x = 2 * anufft( - pf.astype(complex_type(self.dtype)), 2 * pi * freqs, self.sz, real=True - ) + x = 2 * anufft(pf, 2 * pi * freqs, self.sz, real=True) # Return X as Image instance with the last two dimensions as *self.sz x = x.reshape((*sz_roll, *self.sz)) @@ -206,7 +204,7 @@ def _evaluate_t(self, x): pf = xp.concatenate((pf, pf.conjugate()), axis=2) # evaluate radial integral using the Gauss-Legendre quadrature rule - pf *= self.gl_weighted_nodes[None, :, None] + pf = pf * self.gl_weighted_nodes[None, :, None] # 1D FFT on the angular dimension for each concentric circle pf = 2 * xp.pi / (2 * n_theta) * fft.fft(pf) From 1b89886f8a6e7f4f19e095b9208f17649ca657ff Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 14:43:57 -0400 Subject: [PATCH 50/65] ffb3d move more grid precomp to gpu --- src/aspire/basis/ffb_3d.py | 76 +++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 37 deletions(-) diff --git a/src/aspire/basis/ffb_3d.py b/src/aspire/basis/ffb_3d.py index 5740900a34..7a2509382f 100644 --- a/src/aspire/basis/ffb_3d.py +++ b/src/aspire/basis/ffb_3d.py @@ -1,7 +1,6 @@ import logging import numpy as np -from numpy import pi from aspire.basis import FBBasis3D from aspire.basis.basis_utils import lgwt, norm_assoc_legendre, sph_bessel @@ -61,26 +60,29 @@ def _precomp(self): r, wt_r = lgwt(n_r, 0.0, self.kcut, dtype=self.dtype) z, wt_z = lgwt(n_phi, -1, 1, dtype=self.dtype) - r = m_reshape(r, (n_r, 1)) - wt_r = m_reshape(wt_r, (n_r, 1)) - z = m_reshape(z, (n_phi, 1)) - wt_z = m_reshape(wt_z, (n_phi, 1)) - phi = np.arccos(z) + r = m_reshape(xp.asarray(r), (n_r, 1)) + rh = xp.asnumpy(r) + wt_r = m_reshape(xp.asarray(wt_r), (n_r, 1)) + z = m_reshape(xp.asarray(z), (n_phi, 1)) + wt_z = m_reshape(xp.asarray(wt_z), (n_phi, 1)) + phi = xp.arccos(z) wt_phi = wt_z - theta = 2 * pi * np.arange(n_theta, dtype=self.dtype).T / (2 * n_theta) + theta = 2 * xp.pi * xp.arange(n_theta, dtype=self.dtype).T / (2 * n_theta) theta = m_reshape(theta, (n_theta, 1)) # evaluate basis function in the radial dimension - radial_wtd = np.zeros( + radial_wtd = xp.zeros( shape=(n_r, np.max(self.k_max), self.ell_max + 1), dtype=self.dtype ) for ell in range(0, self.ell_max + 1): k_max_ell = self.k_max[ell] - rmat = r * self.r0[ell][0:k_max_ell].T / self.kcut - radial_ell = np.zeros_like(rmat) + rmat = rh * self.r0[ell][0:k_max_ell].T / self.kcut # host + radial_ell = xp.zeros_like(rmat) for ik in range(0, k_max_ell): - radial_ell[:, ik] = sph_bessel(ell, rmat[:, ik]) - nrm = np.abs(sph_bessel(ell + 1, self.r0[ell][0:k_max_ell].T) / 4) + radial_ell[:, ik] = xp.asarray(sph_bessel(ell, rmat[:, ik])) + nrm = xp.abs( + xp.asarray(sph_bessel(ell + 1, self.r0[ell][0:k_max_ell].T)) / 4 + ) radial_ell = radial_ell / nrm radial_ell_wtd = r**2 * wt_r * radial_ell radial_wtd[:, 0:k_max_ell, ell] = radial_ell_wtd @@ -95,14 +97,14 @@ def _precomp(self): - np.mod(self.ell_max, 2) * np.mod(m, 2) ) n_odd_ell = int(self.ell_max - m + 1 - n_even_ell) - phi_wtd_m_even = np.zeros((n_phi, n_even_ell), dtype=phi.dtype) - phi_wtd_m_odd = np.zeros((n_phi, n_odd_ell), dtype=phi.dtype) + phi_wtd_m_even = xp.zeros((n_phi, n_even_ell), dtype=phi.dtype) + phi_wtd_m_odd = xp.zeros((n_phi, n_odd_ell), dtype=phi.dtype) ind_even = 0 ind_odd = 0 for ell in range(m, self.ell_max + 1): - phi_m_ell = norm_assoc_legendre(ell, m, z) - nrm_inv = np.sqrt(0.5 / pi) + phi_m_ell = xp.asarray(norm_assoc_legendre(ell, m, z)) + nrm_inv = np.sqrt(0.5 / np.pi) phi_m_ell = nrm_inv * phi_m_ell phi_wtd_m_ell = wt_phi * phi_m_ell if np.mod(ell, 2) == 0: @@ -116,41 +118,41 @@ def _precomp(self): ang_phi_wtd_odd.append(phi_wtd_m_odd) # evaluate basis function in the theta dimension - ang_theta = np.zeros((n_theta, 2 * self.ell_max + 1), dtype=theta.dtype) + ang_theta = xp.zeros((n_theta, 2 * self.ell_max + 1), dtype=theta.dtype) - ang_theta[:, 0 : self.ell_max] = np.sqrt(2) * np.sin( - theta @ m_reshape(np.arange(self.ell_max, 0, -1), (1, self.ell_max)) + ang_theta[:, 0 : self.ell_max] = np.sqrt(2) * xp.sin( + theta @ m_reshape(xp.arange(self.ell_max, 0, -1), (1, self.ell_max)) ) - ang_theta[:, self.ell_max] = np.ones(n_theta, dtype=theta.dtype) - ang_theta[:, self.ell_max + 1 : 2 * self.ell_max + 1] = np.sqrt(2) * np.cos( - theta @ m_reshape(np.arange(1, self.ell_max + 1), (1, self.ell_max)) + ang_theta[:, self.ell_max] = xp.ones(n_theta, dtype=theta.dtype) + ang_theta[:, self.ell_max + 1 : 2 * self.ell_max + 1] = np.sqrt(2) * xp.cos( + theta @ m_reshape(xp.arange(1, self.ell_max + 1), (1, self.ell_max)) ) - ang_theta_wtd = (2 * pi / n_theta) * ang_theta + ang_theta_wtd = (2 * np.pi / n_theta) * ang_theta - theta_grid, phi_grid, r_grid = np.meshgrid( - theta, phi, r, sparse=False, indexing="ij" + theta_grid, phi_grid, r_grid = xp.meshgrid( + theta.flatten(), phi.flatten(), r.flatten(), sparse=False, indexing="ij" ) - fourier_x = m_flatten(r_grid * np.cos(theta_grid) * np.sin(phi_grid)) - fourier_y = m_flatten(r_grid * np.sin(theta_grid) * np.sin(phi_grid)) - fourier_z = m_flatten(r_grid * np.cos(phi_grid)) + fourier_x = m_flatten(r_grid * xp.cos(theta_grid) * xp.sin(phi_grid)) + fourier_y = m_flatten(r_grid * xp.sin(theta_grid) * xp.sin(phi_grid)) + fourier_z = m_flatten(r_grid * xp.cos(phi_grid)) fourier_pts = ( 2 - * pi - * np.vstack( + * xp.pi + * xp.vstack( ( - fourier_z[np.newaxis, ...], - fourier_y[np.newaxis, ...], - fourier_x[np.newaxis, ...], + fourier_z[xp.newaxis, ...], + fourier_y[xp.newaxis, ...], + fourier_x[xp.newaxis, ...], ) ) ) return { - "radial_wtd": xp.asarray(radial_wtd), - "ang_phi_wtd_even": [xp.asarray(x) for x in ang_phi_wtd_even], - "ang_phi_wtd_odd": [xp.asarray(x) for x in ang_phi_wtd_odd], - "ang_theta_wtd": xp.asarray(ang_theta_wtd), + "radial_wtd": radial_wtd, + "ang_phi_wtd_even": ang_phi_wtd_even, + "ang_phi_wtd_odd": ang_phi_wtd_odd, + "ang_theta_wtd": ang_theta_wtd, "fourier_pts": fourier_pts, } From d9d313b8132eb7b63024f1242bc0854a91c9b747 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 14:58:08 -0400 Subject: [PATCH 51/65] Move more FLE2D grid precomp to GPU --- src/aspire/basis/fle_2d.py | 23 +++++++++++++---------- src/aspire/basis/fle_2d_utils.py | 1 - 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index ba7d4636e9..631161d0fe 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -21,6 +21,9 @@ def _cleanup(): """ Utility for informing cupy to cleanup memory held by old vars. + + This method is designed to be safely called even when `CuPy` is + not installed, in which case it is a no-op. """ try: import cupy @@ -288,10 +291,10 @@ def _compute_nufft_points(self): self.num_angular_nodes = num_angular_nodes # create gridpoints - nodes = 1 - (2 * np.arange(self.num_radial_nodes, dtype=self.dtype) + 1) / ( + nodes = 1 - (2 * xp.arange(self.num_radial_nodes, dtype=self.dtype) + 1) / ( 2 * self.num_radial_nodes ) - nodes = (np.cos(np.pi * nodes) + 1) / 2 + nodes = (xp.cos(np.pi * nodes) + 1) / 2 nodes = ( self.greatest_lambda - self.smallest_lambda ) * nodes + self.smallest_lambda @@ -302,17 +305,17 @@ def _compute_nufft_points(self): phi = ( 2 - * np.pi - * np.arange(self.num_angular_nodes // 2, dtype=self.dtype) + * xp.pi + * xp.arange(self.num_angular_nodes // 2, dtype=self.dtype) / self.num_angular_nodes ) - grid_xy = np.empty( + grid_xy = xp.empty( (2, self.num_radial_nodes, self.num_angular_nodes // 2), dtype=self.dtype ) - grid_xy[0] = np.cos(phi) # x - grid_xy[1] = np.sin(phi) # y - grid_xy *= nodes * h - self.grid_xy = xp.asarray(grid_xy.reshape(2, -1)) + grid_xy[0] = xp.cos(phi) # x + grid_xy[1] = xp.sin(phi) # y + grid_xy = grid_xy * nodes * h + self.grid_xy = grid_xy.reshape(2, -1) def _build_interpolation_matrix(self): """ @@ -530,7 +533,7 @@ def _evaluate_t(self, imgs): def _step1_t(self, im): """ Step 1 of the adjoint transformation (images to coefficients). - Calculates the NUFFT of the image on gridpoints `grid_x` and `grid_y`. + Calculates the NUFFT of the image on gridpoints `grid_xy`. """ im = im.reshape(-1, self.nres, self.nres).astype(complex_type(self.dtype)) num_img = im.shape[0] diff --git a/src/aspire/basis/fle_2d_utils.py b/src/aspire/basis/fle_2d_utils.py index 33f237165e..ea459988b0 100644 --- a/src/aspire/basis/fle_2d_utils.py +++ b/src/aspire/basis/fle_2d_utils.py @@ -195,7 +195,6 @@ def barycentric_interp_sparse(target_points, known_points, numsparse): # note that const cancels in numerator and denominator vals = vals / denom.reshape(-1, 1) - # TODO, migrate more of this method towards `xp` vals = xp.array(vals.flatten()) idx = xp.array(idx.flatten()) jdx = xp.array(jdx.flatten()) From 4f9922224b7d4109ecd564c5b42678024f53b68c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 15:05:58 -0400 Subject: [PATCH 52/65] image self review cleanup --- src/aspire/image/image.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index c8cc9cbe24..ba383f49e9 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -356,6 +356,10 @@ def downsample(self, ds_res): original_stack_shape = self.stack_shape im = self.stack_reshape(-1) + # Note image data is intentionally migrated via `xp.asarray` + # because all of the subsequent calls until `asnumpy` are GPU + # when xp and fft in `cupy` mode. + # compute FT with centered 0-frequency fx = fft.centered_fft2(xp.asarray(im._data)) # crop 2D Fourier transform for each image @@ -377,17 +381,16 @@ def filter(self, filter): im = self.stack_reshape(-1) + # Note image and filter data is intentionally migrated via + # `xp.asarray` because all of the subsequent calls until + # `asnumpy` are GPU when xp and fft in `cupy` mode. filter_values = xp.asarray(filter.evaluate_grid(self.resolution)) + # Convolve im_f = fft.centered_fft2(xp.asarray(im._data)) - - # TODO: why are these different? Doesn't the broadcast work? - if im_f.ndim > filter_values.ndim: - im_f *= filter_values - else: - im_f = filter_values * im_f - + im_f = filter_values * im_f im = fft.centered_ifft2(im_f) + im = xp.asnumpy(im.real) return self.__class__(im).stack_reshape(original_stack_shape) From 19cb24f5fc9c1982bc25b880a5ddc36a00f96f77 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 15:10:56 -0400 Subject: [PATCH 53/65] var name improvement --- src/aspire/nufft/__init__.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/aspire/nufft/__init__.py b/src/aspire/nufft/__init__.py index 953f55ce3e..fcfe182918 100644 --- a/src/aspire/nufft/__init__.py +++ b/src/aspire/nufft/__init__.py @@ -172,9 +172,9 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): """ - _on_gpu = False + _keep_on_gpu = False if cp and isinstance(sig_f, cp.ndarray): - _on_gpu = True + _keep_on_gpu = True if fourier_pts.dtype != real_type(sig_f.dtype): raise RuntimeError( @@ -198,7 +198,7 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): adjoint = adjoint.real if real else adjoint - if cp and not _on_gpu: + if cp and not _keep_on_gpu: adjoint = adjoint.get() return adjoint @@ -223,9 +223,9 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): """ - _on_gpu = False + _keep_on_gpu = False if cp and isinstance(sig_f, cp.ndarray): - _on_gpu = True + _keep_on_gpu = True if fourier_pts.dtype != real_type(sig_f.dtype): raise RuntimeError( @@ -259,7 +259,7 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): transform = transform.real if real else transform - if cp and not _on_gpu: + if cp and not _keep_on_gpu: transform = transform.get() return transform From d1f41a21895610a7239a2f4d9faa531cdb4c6db7 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 15:16:42 -0400 Subject: [PATCH 54/65] minor crop pad string improvements --- src/aspire/utils/coor_trans.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index 457e29f9f8..53f86714c8 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -409,7 +409,11 @@ def crop_pad_2d(im, size, fill_value=0): return to_return else: # target size is between mat_x and mat_y - raise ValueError("Cannot crop and pad an image at the same time.") + raise ValueError( + "Cannot crop and pad Image at the same time." + "If this is really what you intended," + " make two seperate calls for cropping and padding." + ) def crop_pad_3d(vol, size, fill_value=0): @@ -461,5 +465,9 @@ def crop_pad_3d(vol, size, fill_value=0): ] = vol return to_return else: - # target size is between min and max of (vol_y, vol_x, vol_z) - raise ValueError("Cannot crop and pad a volume at the same time.") + # target size is between min and max of (vol_x, vol_y, vol_z) + raise ValueError( + "Cannot crop and pad Volume at the same time." + "If this is really what you intended," + " make two seperate calls for cropping and padding." + ) From 0bde3ceedf2c04e218c8b6105aa69416f61a65bd Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 15:23:54 -0400 Subject: [PATCH 55/65] Update volume downsample with crop_pad_3d improvements --- src/aspire/volume/volume.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/src/aspire/volume/volume.py b/src/aspire/volume/volume.py index b7e4245ede..0f01ef5e61 100644 --- a/src/aspire/volume/volume.py +++ b/src/aspire/volume/volume.py @@ -468,27 +468,27 @@ def downsample(self, ds_res, mask=None): :param ds_res: Desired resolution. :param mask: Optional NumPy array mask to multiply in Fourier space. """ - if mask is None: - mask = 1.0 original_stack_shape = self.stack_shape v = self.stack_reshape(-1) # take 3D Fourier transform of each volume in the stack - fx = xp.asnumpy(fft.fftshift(fft.fftn(xp.asarray(v._data), axes=(1, 2, 3)))) + fx = fft.fftshift(fft.fftn(xp.asarray(v._data), axes=(1, 2, 3))) + # crop each volume to the desired resolution in frequency space - crop_fx = ( - np.array([crop_pad_3d(fx[i, :, :, :], ds_res) for i in range(self.n_vols)]) - * mask - ) + fx = crop_pad_3d(fx, ds_res) + + # Optionally apply mask + if mask is not None: + fx = fx * xp.asarray(mask) + # inverse Fourier transform of each volume - out = xp.asnumpy( - fft.ifftn(fft.ifftshift(xp.asarray(crop_fx)), axes=(1, 2, 3)) - * (ds_res**3 / self.resolution**3) - ) + out = fft.ifftn(fft.ifftshift(fx), axes=(1, 2, 3)).real + out = out.real * (ds_res**3 / self.resolution**3) + # returns a new Volume object return self.__class__( - np.real(out), symmetry_group=self.symmetry_group + xp.asnumpy(out), symmetry_group=self.symmetry_group ).stack_reshape(original_stack_shape) def shift(self): From fe3ab8d2e8f8aaa575ff7aa3cc3fd282099dbe22 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 15:25:59 -0400 Subject: [PATCH 56/65] add docstring --- tests/test_numeric_sparse.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/test_numeric_sparse.py b/tests/test_numeric_sparse.py index 5a8227fe47..e58aa02e6a 100644 --- a/tests/test_numeric_sparse.py +++ b/tests/test_numeric_sparse.py @@ -1,3 +1,7 @@ +""" +Tests basic numpy/cupy functionality of sparse numeric wrappers. +""" + import numpy as np import pytest From f80e191d7c5102c3f13881a89d7840598badfc3a Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 16:02:58 -0400 Subject: [PATCH 57/65] enforce filter dtype --- src/aspire/image/image.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index ba383f49e9..0fb3b0dc0f 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -384,7 +384,9 @@ def filter(self, filter): # Note image and filter data is intentionally migrated via # `xp.asarray` because all of the subsequent calls until # `asnumpy` are GPU when xp and fft in `cupy` mode. - filter_values = xp.asarray(filter.evaluate_grid(self.resolution)) + filter_values = xp.asarray( + filter.evaluate_grid(self.resolution), dtype=self.dtype + ) # Convolve im_f = fft.centered_fft2(xp.asarray(im._data)) From a71ba260f2b09094856d881c0d8bbc08f7792640 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 1 Jul 2024 16:18:53 -0400 Subject: [PATCH 58/65] explicitly force C order before cufinufft call --- src/aspire/nufft/cufinufft.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aspire/nufft/cufinufft.py b/src/aspire/nufft/cufinufft.py index c1d15ff686..218fbd5fb7 100644 --- a/src/aspire/nufft/cufinufft.py +++ b/src/aspire/nufft/cufinufft.py @@ -165,7 +165,8 @@ def adjoint(self, signal): " In the future this will be an error." ) - signal = cp.asarray(signal, dtype=self.complex_dtype) + # Note, if not C order, cuFINUFFT will copy-cast anyway. + signal = cp.asarray(signal, order="C", dtype=self.complex_dtype) res_shape = self.sz # Note, there is a corner case for ntransforms == 1. From 333ce7608ccb0bd41ee6443b65b0b439f2ea32d7 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 2 Jul 2024 10:56:38 -0400 Subject: [PATCH 59/65] Add dtype note and utest tolerance for singles --- src/aspire/image/image.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py index 0fb3b0dc0f..b04f2b4b3d 100644 --- a/src/aspire/image/image.py +++ b/src/aspire/image/image.py @@ -384,6 +384,8 @@ def filter(self, filter): # Note image and filter data is intentionally migrated via # `xp.asarray` because all of the subsequent calls until # `asnumpy` are GPU when xp and fft in `cupy` mode. + # + # Second note, filter dtype may not match image dtype. filter_values = xp.asarray( filter.evaluate_grid(self.resolution), dtype=self.dtype ) From 9fd86f96e5114c636d7de27e7b087929cb7ffdf1 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 15 Jul 2024 10:35:56 -0400 Subject: [PATCH 60/65] configuration doc wording (strings) --- gallery/tutorials/configuration.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gallery/tutorials/configuration.py b/gallery/tutorials/configuration.py index 75354bc422..372d97df06 100644 --- a/gallery/tutorials/configuration.py +++ b/gallery/tutorials/configuration.py @@ -110,7 +110,7 @@ # software is most easily accomplished by installing ASPIRE with one # of the published GPU extensions, for example ``pip install # "aspire[dev,gpu_12x]"``. Once the packages are installed users -# should automatically find that the NUFFT calls are running on the +# should find that the NUFFT calls are automatically running on the # GPU. Additional acceleration is achieved by enabling `cupy` for # `numeric` and `fft` components. # From 80780db57a505e5a1fd504717ab7e25c8c4ec1bf Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 15 Jul 2024 11:04:40 -0400 Subject: [PATCH 61/65] keep a few more vars as cupy --- src/aspire/basis/fle_2d.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aspire/basis/fle_2d.py b/src/aspire/basis/fle_2d.py index 631161d0fe..76330e6fba 100644 --- a/src/aspire/basis/fle_2d.py +++ b/src/aspire/basis/fle_2d.py @@ -544,7 +544,7 @@ def _step1_t(self, im): _z = nufft(im, self.grid_xy, epsilon=self.epsilon) * self.h**2 _z = _z.reshape(num_img, self.num_radial_nodes, self.num_angular_nodes // 2) z[:, :, : self.num_angular_nodes // 2] = _z - z[:, :, self.num_angular_nodes // 2 :] = np.conj(_z) + z[:, :, self.num_angular_nodes // 2 :] = _z.conj() return z def _step2_t(self, z): @@ -643,13 +643,13 @@ def _step1(self, z): num_img = z.shape[0] z = z[:, :, : self.num_angular_nodes // 2].reshape(num_img, -1) im = anufft( - z.astype(complex_type(self.dtype)), + z.astype(complex_type(self.dtype), copy=False), self.grid_xy, (self.nres, self.nres), epsilon=self.epsilon, ) - im = im + np.conj(im) - im = np.real(im) + im = im + im.conj() + im = im.real im = im.reshape(num_img, self.nres, self.nres) im[:, self.radial_mask] = 0 From bf9e2b467c1cbaaec87a793251e79f7c7a8d7cb0 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 15 Jul 2024 15:09:41 -0400 Subject: [PATCH 62/65] replace xp.newaxis with None --- src/aspire/basis/ffb_3d.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aspire/basis/ffb_3d.py b/src/aspire/basis/ffb_3d.py index 7a2509382f..4137e572a9 100644 --- a/src/aspire/basis/ffb_3d.py +++ b/src/aspire/basis/ffb_3d.py @@ -141,9 +141,9 @@ def _precomp(self): * xp.pi * xp.vstack( ( - fourier_z[xp.newaxis, ...], - fourier_y[xp.newaxis, ...], - fourier_x[xp.newaxis, ...], + fourier_z[None, ...], + fourier_y[None, ...], + fourier_x[None, ...], ) ) ) From b2834dd97b38ad21434e39176b812a883e16ed79 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 23 Jul 2024 08:40:19 -0400 Subject: [PATCH 63/65] put cache dir on new line --- .github/workflows/workflow.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml index fce5a7f6d4..c41b221ec4 100644 --- a/.github/workflows/workflow.yml +++ b/.github/workflows/workflow.yml @@ -147,7 +147,8 @@ jobs: echo "Stash the WORK_DIR to GitHub env so we can clean it up later." echo "WORK_DIR=${WORK_DIR}" >> $GITHUB_ENV echo -e "ray:\n temp_dir: ${WORK_DIR}\n" > ${WORK_DIR}/config.yaml - echo -e "common:\n cache_dir: ${CI_CACHE_DIR}" >> ${WORK_DIR}/config.yaml + echo -e "common:" >> ${WORK_DIR}/config.yaml + echo -e " cache_dir: ${CI_CACHE_DIR}" >> ${WORK_DIR}/config.yaml echo -e " numeric: cupy" >> ${WORK_DIR}/config.yaml echo -e " fft: cupy\n" >> ${WORK_DIR}/config.yaml echo "Log the config: ${WORK_DIR}/config.yaml" From bb696e297cf90d1b666b3648890fa2ac988b6aa4 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 23 Jul 2024 08:52:57 -0400 Subject: [PATCH 64/65] rename tmp to ang_theta_wtd_trans --- src/aspire/basis/ffb_3d.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/aspire/basis/ffb_3d.py b/src/aspire/basis/ffb_3d.py index 4137e572a9..7f0821b99a 100644 --- a/src/aspire/basis/ffb_3d.py +++ b/src/aspire/basis/ffb_3d.py @@ -308,9 +308,9 @@ def _evaluate_t(self, x): pf = m_reshape(pf.T, (n_theta, n_phi * n_r * n_data)) # evaluate the theta parts - tmp = self._precomp["ang_theta_wtd"].T - u_even = tmp @ pf.real - u_odd = tmp @ pf.imag + ang_theta_wtd_trans = self._precomp["ang_theta_wtd"].T + u_even = ang_theta_wtd_trans @ pf.real + u_odd = ang_theta_wtd_trans @ pf.imag u_even = m_reshape(u_even, (2 * self.ell_max + 1, n_phi, n_r, n_data)) u_odd = m_reshape(u_odd, (2 * self.ell_max + 1, n_phi, n_r, n_data)) From e22d562685ea797db3bc4557d987c854ca1e5689 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 23 Jul 2024 08:57:42 -0400 Subject: [PATCH 65/65] gpu to GPU and rm dev comment --- src/aspire/nufft/__init__.py | 4 ++-- src/aspire/numeric/cupy_fft.py | 3 --- src/aspire/utils/coor_trans.py | 4 ++-- tests/test_orient_sdp.py | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/src/aspire/nufft/__init__.py b/src/aspire/nufft/__init__.py index fcfe182918..07d92c736c 100644 --- a/src/aspire/nufft/__init__.py +++ b/src/aspire/nufft/__init__.py @@ -159,7 +159,7 @@ def anufft(sig_f, fourier_pts, sz, real=False, epsilon=1e-8): Selects best available package from `nfft` `backends` configuration list. - When sig_f is provided as a CuPy gpu array with a cufinufft + When sig_f is provided as a CuPy GPU array with a cufinufft backend, result is maintained on GPU. :param sig_f: Array representing the signal(s) in Fourier space to be transformed. \ @@ -211,7 +211,7 @@ def nufft(sig_f, fourier_pts, real=False, epsilon=1e-8): Selects best available package from `nfft` `backends` configuration list. - When sig_f is provided as a CuPy gpu array with a cufinufft + When sig_f is provided as a CuPy GPU array with a cufinufft backend, result is maintained on GPU. :param sig_f: Array representing the signal(s) in real space to be transformed. \ diff --git a/src/aspire/numeric/cupy_fft.py b/src/aspire/numeric/cupy_fft.py index f67937813f..6ad6a4e9da 100644 --- a/src/aspire/numeric/cupy_fft.py +++ b/src/aspire/numeric/cupy_fft.py @@ -25,9 +25,6 @@ def wrapper(self, x, *args, **kwargs): # CuPy's single precision FFT appears to be too inaccurate for # many of our unit tests, so the signal is upcast and recast # on return. - # Todo, discuss with Joakim whether we want this upcasting - # business configurable or keep singles, both in conjunction - # with xfailing the tests. _singles = False if x.dtype == np.float32: _singles = True diff --git a/src/aspire/utils/coor_trans.py b/src/aspire/utils/coor_trans.py index 53f86714c8..cad8fb0295 100644 --- a/src/aspire/utils/coor_trans.py +++ b/src/aspire/utils/coor_trans.py @@ -372,7 +372,7 @@ def crop_pad_2d(im, size, fill_value=0): Crop/pads `im` according to `size`. Padding will use `fill_value`. - Return's host/gpu array based on `im`. + Return's host/GPU array based on `im`. :param im: A >=2-dimensional numpy array :param size: Integer size of cropped/padded output @@ -421,7 +421,7 @@ def crop_pad_3d(vol, size, fill_value=0): Crop/pads `vol` according to `size`. Padding will use `fill_value`. - Return's host/gpu array based on `vol`. + Return's host/GPU array based on `vol`. :param vol: A >=3-dimensional numpy array :param size: Integer size of cropped/padded output diff --git a/tests/test_orient_sdp.py b/tests/test_orient_sdp.py index a161d2fdd7..22658ee06a 100644 --- a/tests/test_orient_sdp.py +++ b/tests/test_orient_sdp.py @@ -77,7 +77,7 @@ def test_estimate_rotations(src_orient_est_fixture): src, orient_est = src_orient_est_fixture if backend_available("cufinufft") and src.dtype == np.float32: - pytest.skip("CI on gpu fails for singles.") + pytest.skip("CI on GPU fails for singles.") orient_est.estimate_rotations()