From 6a0d5b9a49464c7c71134b0785be66d46fc75558 Mon Sep 17 00:00:00 2001
From: "Joshua C. Carmichael" <jc5485@della-gpu.princeton.edu>
Date: Tue, 4 Jun 2024 16:06:28 -0400
Subject: [PATCH 1/4] evaluate_t on gpu.

---
 src/aspire/basis/ffb_2d.py | 37 +++++++++++++++++--------------------
 src/aspire/image/image.py  |  2 +-
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py
index 5a5c7c3f27..e9e6ab90a8 100644
--- a/src/aspire/basis/ffb_2d.py
+++ b/src/aspire/basis/ffb_2d.py
@@ -193,56 +193,53 @@ def _evaluate_t(self, x):
         n_images = x.shape[0]
 
         # resamping x in a polar Fourier gird using nonuniform discrete Fourier transform
-        pf = nufft(x, 2 * pi * freqs)
-        pf = np.reshape(pf, (n_images, n_r, n_theta))
+        pf = nufft(xp.array(x), 2 * pi * freqs)
+        pf = pf.reshape(n_images, n_r, n_theta)
 
         # Recover "negative" frequencies from "positive" half plane.
-        pf = np.concatenate((pf, pf.conjugate()), axis=2)
+        pf = xp.concatenate((pf, pf.conjugate()), axis=2)
 
         # evaluate radial integral using the Gauss-Legendre quadrature rule
-        for i_r in range(0, n_r):
-            pf[:, i_r, :] = pf[:, i_r, :] * (
-                self._precomp["gl_weights"][i_r] * self._precomp["gl_nodes"][i_r]
-            )
+        pf = pf * (xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"]))[None, :, None]
 
         #  1D FFT on the angular dimension for each concentric circle
-        pf = 2 * pi / (2 * n_theta) * xp.asnumpy(fft.fft(xp.asarray(pf)))
+        pf = 2 * xp.pi / (2 * n_theta) * fft.fft(pf)
 
         # This only makes it easier to slice the array later.
-        v = np.zeros((n_images, self.count), dtype=x.dtype)
+        v = xp.zeros((n_images, self.count), dtype=x.dtype)
 
         # go through each basis function and find the corresponding coefficient
         ind = 0
-        idx = ind + np.arange(self.k_max[0])
+        idx = ind + xp.arange(self.k_max[0])
 
         # include the normalization factor of angular part into radial part
-        radial_norm = self._precomp["radial"] / np.expand_dims(self.angular_norms, 1)
+        radial_norm = xp.array(self._precomp["radial"] / np.expand_dims(self.angular_norms, 1))
         v[:, self._zero_angular_inds] = pf[:, :, 0].real @ radial_norm[idx].T
-        ind = ind + np.size(idx)
+        ind = ind + idx.size
 
         ind_pos = ind
         for ell in range(1, self.ell_max + 1):
-            idx = ind + np.arange(self.k_max[ell])
-            idx_pos = ind_pos + np.arange(self.k_max[ell])
+            idx = ind + xp.arange(self.k_max[ell])
+            idx_pos = ind_pos + xp.arange(self.k_max[ell])
             idx_neg = idx_pos + self.k_max[ell]
 
             v_ell = pf[:, :, ell] @ radial_norm[idx].T
 
             if np.mod(ell, 2) == 0:
-                v_pos = np.real(v_ell)
-                v_neg = -np.imag(v_ell)
+                v_pos = v_ell.real
+                v_neg = -v_ell.imag
             else:
-                v_pos = np.imag(v_ell)
-                v_neg = np.real(v_ell)
+                v_pos = v_ell.imag
+                v_neg = v_ell.real
 
             v[:, idx_pos] = v_pos
             v[:, idx_neg] = v_neg
 
-            ind = ind + np.size(idx)
+            ind = ind + idx.size
 
             ind_pos = ind_pos + 2 * self.k_max[ell]
 
-        return v
+        return xp.asnumpy(v)
 
     def filter_to_basis_mat(self, f, **kwargs):
         """
diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py
index 12c58c6308..a5cf718c4d 100644
--- a/src/aspire/image/image.py
+++ b/src/aspire/image/image.py
@@ -364,7 +364,7 @@ def downsample(self, ds_res):
         out = fft.centered_ifft2(crop_fx).real * (ds_res**2 / self.resolution**2)
         out = xp.asnumpy(out)
 
-        return self.__class__(out).stack_reshape(original_stack_shape)
+        return self.__class__(np.array(out.get())).stack_reshape(original_stack_shape)
 
     def filter(self, filter):
         """

From 699d112763e7f383de70c13d5c6ccd65993c16c5 Mon Sep 17 00:00:00 2001
From: "Joshua C. Carmichael" <jc5485@della-gpu.princeton.edu>
Date: Wed, 5 Jun 2024 16:06:21 -0400
Subject: [PATCH 2/4] Optimize ffb2d for gpu.

---
 src/aspire/basis/ffb_2d.py | 56 ++++++++++++++++++++++----------------
 1 file changed, 32 insertions(+), 24 deletions(-)

diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py
index e9e6ab90a8..996e44f95a 100644
--- a/src/aspire/basis/ffb_2d.py
+++ b/src/aspire/basis/ffb_2d.py
@@ -105,6 +105,7 @@ def _evaluate(self, v):
             coordinate basis. This is Image instance with resolution of `self.sz`
             and the first dimension correspond to remaining dimension of `v`.
         """
+        v = xp.array(v)
         sz_roll = v.shape[:-1]
         v = v.reshape(-1, self.count)
 
@@ -112,27 +113,29 @@ def _evaluate(self, v):
         n_data = v.shape[0]
 
         # get information on polar grids from precomputed data
-        n_theta = np.size(self._precomp["freqs"], 2)
-        n_r = np.size(self._precomp["freqs"], 1)
+        n_theta = self._precomp["freqs"].shape[2]
+        n_r = self._precomp["freqs"].shape[1]
 
         # go through  each basis function and find corresponding coefficient
-        pf = np.zeros((n_data, 2 * n_theta, n_r), dtype=complex_type(self.dtype))
+        pf = xp.zeros((n_data, 2 * n_theta, n_r), dtype=complex_type(self.dtype))
 
         ind = 0
 
-        idx = ind + np.arange(self.k_max[0], dtype=int)
+        idx = ind + xp.arange(self.k_max[0], dtype=int)
 
         # include the normalization factor of angular part into radial part
-        radial_norm = self._precomp["radial"] / np.expand_dims(self.angular_norms, 1)
-        pf[:, 0, :] = v[:, self._zero_angular_inds] @ radial_norm[idx]
-        ind = ind + np.size(idx)
+        radial_norm = xp.array(self._precomp["radial"]) / xp.array(
+            np.expand_dims(self.angular_norms, 1)
+        )
+        pf[:, 0, :] = v[:, xp.array(self._zero_angular_inds)] @ radial_norm[idx]
+        ind = ind + idx.size
 
         ind_pos = ind
 
         for ell in range(1, self.ell_max + 1):
-            idx = ind + np.arange(self.k_max[ell], dtype=int)
-            idx_pos = ind_pos + np.arange(self.k_max[ell], dtype=int)
-            idx_neg = idx_pos + self.k_max[ell]
+            idx = ind + xp.arange(self.k_max[ell], dtype=int)
+            idx_pos = ind_pos + xp.arange(self.k_max[ell], dtype=int)
+            idx_neg = idx_pos + xp.array(self.k_max[ell])
 
             v_ell = (v[:, idx_pos] - 1j * v[:, idx_neg]) / 2.0
 
@@ -147,22 +150,19 @@ def _evaluate(self, v):
             else:
                 pf[:, 2 * n_theta - ell, :] = -pf_ell.conjugate()
 
-            ind = ind + np.size(idx)
-            ind_pos = ind_pos + 2 * self.k_max[ell]
+            ind = ind + idx.size
+            ind_pos = ind_pos + 2 * xp.array(self.k_max[ell])
 
         # 1D inverse FFT in the degree of polar angle
-        pf = 2 * pi * xp.asnumpy(fft.ifft(xp.asarray(pf), axis=1))
+        pf = 2 * xp.pi * fft.ifft(xp.asarray(pf), axis=1)
 
         # Only need "positive" frequencies.
-        hsize = int(np.size(pf, 1) / 2)
+        hsize = int(pf.shape[1] / 2)
         pf = pf[:, 0:hsize, :]
-
-        for i_r in range(0, n_r):
-            pf[..., i_r] = pf[..., i_r] * (
-                self._precomp["gl_weights"][i_r] * self._precomp["gl_nodes"][i_r]
-            )
-
-        pf = np.reshape(pf, (n_data, n_r * n_theta))
+        pf *= (
+            xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"])
+        )[None, None, :]
+        pf = pf.reshape(n_data, n_r * n_theta)
 
         # perform inverse non-uniformly FFT transform back to 2D coordinate basis
         freqs = m_reshape(self._precomp["freqs"], (2, n_r * n_theta))
@@ -172,7 +172,7 @@ def _evaluate(self, v):
         # Return X as Image instance with the last two dimensions as *self.sz
         x = x.reshape((*sz_roll, *self.sz))
 
-        return x
+        return xp.asnumpy(x)
 
     def _evaluate_t(self, x):
         """
@@ -200,7 +200,13 @@ def _evaluate_t(self, x):
         pf = xp.concatenate((pf, pf.conjugate()), axis=2)
 
         # evaluate radial integral using the Gauss-Legendre quadrature rule
-        pf = pf * (xp.array(self._precomp["gl_weights"]) * xp.array(self._precomp["gl_nodes"]))[None, :, None]
+        pf = (
+            pf
+            * (
+                xp.array(self._precomp["gl_weights"])
+                * xp.array(self._precomp["gl_nodes"])
+            )[None, :, None]
+        )
 
         #  1D FFT on the angular dimension for each concentric circle
         pf = 2 * xp.pi / (2 * n_theta) * fft.fft(pf)
@@ -213,7 +219,9 @@ def _evaluate_t(self, x):
         idx = ind + xp.arange(self.k_max[0])
 
         # include the normalization factor of angular part into radial part
-        radial_norm = xp.array(self._precomp["radial"] / np.expand_dims(self.angular_norms, 1))
+        radial_norm = xp.array(
+            self._precomp["radial"] / np.expand_dims(self.angular_norms, 1)
+        )
         v[:, self._zero_angular_inds] = pf[:, :, 0].real @ radial_norm[idx].T
         ind = ind + idx.size
 

From faff4f0e756c7c24d842a1128d62037763cdc4d5 Mon Sep 17 00:00:00 2001
From: Josh Carmichael <carmichael@princeton.edu>
Date: Tue, 11 Jun 2024 11:01:51 -0400
Subject: [PATCH 3/4] downsample return

---
 src/aspire/image/image.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aspire/image/image.py b/src/aspire/image/image.py
index a5cf718c4d..12c58c6308 100644
--- a/src/aspire/image/image.py
+++ b/src/aspire/image/image.py
@@ -364,7 +364,7 @@ def downsample(self, ds_res):
         out = fft.centered_ifft2(crop_fx).real * (ds_res**2 / self.resolution**2)
         out = xp.asnumpy(out)
 
-        return self.__class__(np.array(out.get())).stack_reshape(original_stack_shape)
+        return self.__class__(out).stack_reshape(original_stack_shape)
 
     def filter(self, filter):
         """

From 629373f34ea952dd38efa08598a8bc4a8a09090c Mon Sep 17 00:00:00 2001
From: Josh Carmichael <carmichael@princeton.edu>
Date: Tue, 11 Jun 2024 11:43:52 -0400
Subject: [PATCH 4/4] remove unnecessary xp.array

---
 src/aspire/basis/ffb_2d.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/aspire/basis/ffb_2d.py b/src/aspire/basis/ffb_2d.py
index 996e44f95a..4971b44f77 100644
--- a/src/aspire/basis/ffb_2d.py
+++ b/src/aspire/basis/ffb_2d.py
@@ -154,7 +154,7 @@ def _evaluate(self, v):
             ind_pos = ind_pos + 2 * xp.array(self.k_max[ell])
 
         # 1D inverse FFT in the degree of polar angle
-        pf = 2 * xp.pi * fft.ifft(xp.asarray(pf), axis=1)
+        pf = 2 * xp.pi * fft.ifft(pf, axis=1)
 
         # Only need "positive" frequencies.
         hsize = int(pf.shape[1] / 2)