From ab54323799ea0db9b9c6a4b74747e175d6e1a3fc Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 27 Mar 2024 14:48:21 -0400 Subject: [PATCH 01/60] init add --- src/aspire/abinitio/commonline_sync3n.py | 269 +++++++++++++++++++++++ 1 file changed, 269 insertions(+) create mode 100644 src/aspire/abinitio/commonline_sync3n.py diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py new file mode 100644 index 0000000000..a31ec87032 --- /dev/null +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -0,0 +1,269 @@ +import logging + +import numpy as np +from numpy.linalg import eigh, norm, svd + +from aspire.abinitio import CLOrient3D, SyncVotingMixin +from aspire.operators import PolarFT +from aspire.utils import ( + J_conjugate, + Rotation, + all_pairs, + all_triplets, + anorm, + cyclic_rotations, + tqdm, + trange, +) +from aspire.utils.random import randn + +logger = logging.getLogger(__name__) + + +class CLSync3N(CLOrient3D, SyncVotingMixin): + """ + Define a class to estimate 3D orientations using common lines (2017) methods. + """ + + def __init__( + self, + src, + n_rad=None, + n_theta=None, + max_shift=0.15, + shift_step=1, + epsilon=1e-3, + max_iters=1000, + degree_res=1, + seed=None, + mask=True, + ): + """ + Initialize object for estimating 3D orientations. + + :param src: The source object of 2D denoised or class-averaged images with metadata + :param n_rad: The number of points in the radial direction + :param n_theta: The number of points in the theta direction + :param max_shift: Maximum range for shifts as a proportion of resolution. Default = 0.15. + :param shift_step: Resolution of shift estimation in pixels. Default = 1 pixel. + :param epsilon: Tolerance for the power method. + :param max_iter: Maximum iterations for the power method. + :param degree_res: Degree resolution for estimating in-plane rotations. + :param seed: Optional seed for RNG. + :param mask: Option to mask `src.images` with a fuzzy mask (boolean). + Default, `True`, applies a mask. + """ + + super().__init__( + src, + n_rad=n_rad, + n_theta=n_theta, + max_shift=max_shift, + shift_step=shift_step, + mask=mask, + ) + + self.epsilon = epsilon + self.max_iters = max_iters + self.degree_res = degree_res + self.seed = seed + + def estimate_rotations(self): + """ + Estimate rotation matrices for molecules with C3 or C4 symmetry. + + :return: Array of rotation matrices, size n_imgx3x3. + """ + Rij0 = self._estimate_relative_viewing_directions() + + logger.info("Performing global handedness synchronization.") + Rij = self._global_J_sync(Rij0) + + # sync3n + S = cryo_sync3n_syncmatrix(Rij) + + # optionally S weights + + # S to rot + # cryo_sync3n_S_to_rot(S) + + self.rotations = Ris + + ########################################### + # The hackberries taste like hackberries # + ########################################### + def cryo_sync3n_S_to_rot(S): + """ + S is (n_img, n_img, 3,3) + """ + + # Convert S to stupid shape + S = np.transpose(S, (0, 2, 1, 3)).reshape(3 * n_img, 3 * n_img) + + # Extract three eigenvectors corresponding to non-zero eigenvalues. + d, v = stable_eigsh(S, 10) + sort_idx = np.argsort(-d) + logger.info( + f"Top 10 eigenvalues from synchronization voting matrix: {d[sort_idx]}" + ) + + # Only need the top 3 eigen-vectors. + v = v[:, sort_idx[:3]] + + v1 = v[: 3 * n_img : 3].T.copy() + v2 = v[1 : 3 * n_img : 3].T.copy() + v3 = v[2 : 3 * n_img : 3].T.copy() + + rotations = np.empty((n_img, 3, 3), dtype=self.dtype) + rotations[:, :, 0] = v1.T + rotations[:, :, 1] = v2.T + rotations[:, :, 2] = v3.T + # Make sure that we got rotations by enforcing R to be + # a rotation (in case the error is large) + rotations = nearest_rotations(rotations) + + return rotations + + def cryo_sync3n_syncmatrix(Rij): + + S = np.zeros((self.n_img, self.n_img, 3, 3), dtype=self.dtype) + I = np.eye(3, dtype=self.dtype) + + idx = 0 + for i in range(self.n_img): + # S( (3*i-2):(3*i) , (3*i-2):(3*i) ) = I; % Rii = I + S[i, i] = I + for j in range(i + 1, N): + idx += 1 + # S( (3*i-2):(3*i) , (3*j-2):(3*j) ) = Rij(:,:,idx); % Rij + S[i, j] = Rij[idx] + # S( (3*j-2):(3*j) , (3*i-2):(3*i) ) = Rij(:,:,idx)'; % Rji = Rij' + S[j, i] = Rij[idx].T + + return S + + ########################################### + # Primary Methods # + ########################################### + + def _estimate_relative_viewing_directions(self): + """ + Estimate the relative viewing directions vij = vi*vj^T, i epsilon: + itr += 1 + vec_new = self._signs_times_v(vijs, vec) + vec_new = vec_new / norm(vec_new) + residual = norm(vec_new - vec) + vec = vec_new + logger.info( + f"Iteration {itr}, residual {round(residual, 5)} (target {epsilon})" + ) + + # We need only the signs of the eigenvector + J_sync = np.sign(vec) + + return J_sync From f8cff85af445d3cdf3162ff952307a17dd750e20 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 27 Mar 2024 16:19:48 -0400 Subject: [PATCH 02/60] fix typos --- src/aspire/abinitio/__init__.py | 1 + src/aspire/abinitio/commonline_sync3n.py | 112 ++++++++++++++++++++--- 2 files changed, 100 insertions(+), 13 deletions(-) diff --git a/src/aspire/abinitio/__init__.py b/src/aspire/abinitio/__init__.py index ff14cc2d45..9d4b0f483c 100644 --- a/src/aspire/abinitio/__init__.py +++ b/src/aspire/abinitio/__init__.py @@ -4,6 +4,7 @@ # isort: off from .commonline_sync import CLSyncVoting +from .commonline_sync3n import CLSync3N from .commonline_c3_c4 import CLSymmetryC3C4 from .commonline_cn import CLSymmetryCn from .commonline_c2 import CLSymmetryC2 diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index a31ec87032..efe6c8b179 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -16,6 +16,8 @@ trange, ) from aspire.utils.random import randn +from aspire.utils.matlab_compat import stable_eigsh +from aspire.utils import nearest_rotations logger = logging.getLogger(__name__) @@ -32,7 +34,7 @@ def __init__( n_theta=None, max_shift=0.15, shift_step=1, - epsilon=1e-3, + epsilon=1e-2, max_iters=1000, degree_res=1, seed=None, @@ -80,25 +82,25 @@ def estimate_rotations(self): Rij = self._global_J_sync(Rij0) # sync3n - S = cryo_sync3n_syncmatrix(Rij) + S = self.cryo_sync3n_syncmatrix(Rij) # optionally S weights # S to rot - # cryo_sync3n_S_to_rot(S) + Ris = self.cryo_sync3n_S_to_rot(S) self.rotations = Ris ########################################### # The hackberries taste like hackberries # ########################################### - def cryo_sync3n_S_to_rot(S): + def cryo_sync3n_S_to_rot(self, S): """ S is (n_img, n_img, 3,3) """ # Convert S to stupid shape - S = np.transpose(S, (0, 2, 1, 3)).reshape(3 * n_img, 3 * n_img) + S = np.transpose(S, (0, 2, 1, 3)).reshape(3 * self.n_img, 3 * self.n_img) # Extract three eigenvectors corresponding to non-zero eigenvalues. d, v = stable_eigsh(S, 10) @@ -110,11 +112,11 @@ def cryo_sync3n_S_to_rot(S): # Only need the top 3 eigen-vectors. v = v[:, sort_idx[:3]] - v1 = v[: 3 * n_img : 3].T.copy() - v2 = v[1 : 3 * n_img : 3].T.copy() - v3 = v[2 : 3 * n_img : 3].T.copy() + v1 = v[: 3 * self.n_img : 3].T.copy() + v2 = v[1 : 3 * self.n_img : 3].T.copy() + v3 = v[2 : 3 * self.n_img : 3].T.copy() - rotations = np.empty((n_img, 3, 3), dtype=self.dtype) + rotations = np.empty((self.n_img, 3, 3), dtype=self.dtype) rotations[:, :, 0] = v1.T rotations[:, :, 1] = v2.T rotations[:, :, 2] = v3.T @@ -124,7 +126,7 @@ def cryo_sync3n_S_to_rot(S): return rotations - def cryo_sync3n_syncmatrix(Rij): + def cryo_sync3n_syncmatrix(self, Rij): S = np.zeros((self.n_img, self.n_img, 3, 3), dtype=self.dtype) I = np.eye(3, dtype=self.dtype) @@ -133,12 +135,12 @@ def cryo_sync3n_syncmatrix(Rij): for i in range(self.n_img): # S( (3*i-2):(3*i) , (3*i-2):(3*i) ) = I; % Rii = I S[i, i] = I - for j in range(i + 1, N): - idx += 1 + for j in range(i + 1, self.n_img): # S( (3*i-2):(3*i) , (3*j-2):(3*j) ) = Rij(:,:,idx); % Rij S[i, j] = Rij[idx] # S( (3*j-2):(3*j) , (3*i-2):(3*i) ) = Rij(:,:,idx)'; % Rji = Rij' S[j, i] = Rij[idx].T + idx += 1 return S @@ -156,7 +158,7 @@ def _estimate_relative_viewing_directions(self): self.build_clmatrix() # Step 4: Calculate relative rotations - Rijs = self._estimate_all_Rijs_c3_c4(clmatrix) + Rijs = self._estimate_all_Rijs_c3_c4(self.clmatrix) return Rijs @@ -267,3 +269,87 @@ def _J_sync_power_method(self, vijs): J_sync = np.sign(vec) return J_sync + def _signs_times_v(self, vijs, vec): + """ + Multiplication of the J-synchronization matrix by a candidate eigenvector. + + The J-synchronization matrix is a matrix representation of the handedness graph, Gamma, whose set of + nodes consists of the estimates vijs and whose set of edges consists of the undirected edges between + all triplets of estimates vij, vjk, and vik, where i Date: Tue, 2 Apr 2024 11:33:49 -0400 Subject: [PATCH 03/60] cleanup S init and usage, func names, etc --- src/aspire/abinitio/commonline_sync3n.py | 103 +++++++++++------------ 1 file changed, 51 insertions(+), 52 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index efe6c8b179..ce41d687f0 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -1,30 +1,19 @@ import logging import numpy as np -from numpy.linalg import eigh, norm, svd +from numpy.linalg import norm from aspire.abinitio import CLOrient3D, SyncVotingMixin -from aspire.operators import PolarFT -from aspire.utils import ( - J_conjugate, - Rotation, - all_pairs, - all_triplets, - anorm, - cyclic_rotations, - tqdm, - trange, -) -from aspire.utils.random import randn +from aspire.utils import J_conjugate, all_pairs, all_triplets, nearest_rotations from aspire.utils.matlab_compat import stable_eigsh -from aspire.utils import nearest_rotations +from aspire.utils.random import randn logger = logging.getLogger(__name__) class CLSync3N(CLOrient3D, SyncVotingMixin): """ - Define a class to estimate 3D orientations using common lines (2017) methods. + Define a class to estimate 3D orientations using common lines Sync3N methods (2017). """ def __init__( @@ -70,78 +59,87 @@ def __init__( self.degree_res = degree_res self.seed = seed + ########################################### + # High level algorithm steps # + ########################################### def estimate_rotations(self): """ - Estimate rotation matrices for molecules with C3 or C4 symmetry. + Estimate rotation matrices. :return: Array of rotation matrices, size n_imgx3x3. """ + + # Initial estimate of viewing directions Rij0 = self._estimate_relative_viewing_directions() - logger.info("Performing global handedness synchronization.") + # Compute and apply global handedness Rij = self._global_J_sync(Rij0) - # sync3n - S = self.cryo_sync3n_syncmatrix(Rij) + # Build sync3n matrix + S = self._construct_sync3n_matrix(Rij) - # optionally S weights + # Optionally S weights + # todo - # S to rot - Ris = self.cryo_sync3n_S_to_rot(S) + # Yield rotations from S + Ris = self._sync3n_S_to_rot(S) self.rotations = Ris ########################################### # The hackberries taste like hackberries # ########################################### - def cryo_sync3n_S_to_rot(self, S): + def _sync3n_S_to_rot(self, S, n_eigs=4): """ - S is (n_img, n_img, 3,3) + Use eigen decomposition of S to estimate transforms, + then project transforms to nearest rotations. """ - # Convert S to stupid shape - S = np.transpose(S, (0, 2, 1, 3)).reshape(3 * self.n_img, 3 * self.n_img) + if n_eigs < 3: + raise ValueError( + f"n_eigs must be greater than 3, default is 4. Invoked with {n_eigs}" + ) # Extract three eigenvectors corresponding to non-zero eigenvalues. - d, v = stable_eigsh(S, 10) + d, v = stable_eigsh(S, n_eigs) sort_idx = np.argsort(-d) logger.info( - f"Top 10 eigenvalues from synchronization voting matrix: {d[sort_idx]}" + f"Top {n_eigs} eigenvalues from synchronization voting matrix: {d[sort_idx]}" ) # Only need the top 3 eigen-vectors. v = v[:, sort_idx[:3]] - v1 = v[: 3 * self.n_img : 3].T.copy() - v2 = v[1 : 3 * self.n_img : 3].T.copy() - v3 = v[2 : 3 * self.n_img : 3].T.copy() + # Yield estimated rotations from the eigen-vectors + v = v.reshape(3, self.n_img, 3) + rotations = np.transpose(v, (1, 0, 2)) # Check, may be (1, 2 , 0) for T - rotations = np.empty((self.n_img, 3, 3), dtype=self.dtype) - rotations[:, :, 0] = v1.T - rotations[:, :, 1] = v2.T - rotations[:, :, 2] = v3.T - # Make sure that we got rotations by enforcing R to be - # a rotation (in case the error is large) + # Enforce we are returning actual rotations rotations = nearest_rotations(rotations) return rotations - def cryo_sync3n_syncmatrix(self, Rij): + def _construct_sync3n_matrix(self, Rij): + """ + Construct sync3n matrix from estimated rotations Rij. + """ - S = np.zeros((self.n_img, self.n_img, 3, 3), dtype=self.dtype) - I = np.eye(3, dtype=self.dtype) + # Initialize S with diag identity blocks + n = self.n_img + S = np.eye(3 * n, dtype=self.dtype).reshape(n, 3, n, 3) idx = 0 - for i in range(self.n_img): - # S( (3*i-2):(3*i) , (3*i-2):(3*i) ) = I; % Rii = I - S[i, i] = I - for j in range(i + 1, self.n_img): + for i in range(n): + for j in range(i + 1, n): # S( (3*i-2):(3*i) , (3*j-2):(3*j) ) = Rij(:,:,idx); % Rij - S[i, j] = Rij[idx] + S[i, :, j, :] = Rij[idx] # S( (3*j-2):(3*j) , (3*i-2):(3*i) ) = Rij(:,:,idx)'; % Rji = Rij' - S[j, i] = Rij[idx].T + S[j, :, i, :] = Rij[idx].T idx += 1 + # Convert S shape to 3Nx3N + S = S.reshape(3 * n, 3 * n) + return S ########################################### @@ -154,22 +152,22 @@ def _estimate_relative_viewing_directions(self): vi is the third row of the i'th rotation matrix Ri. """ logger.info(f"Estimating relative viewing directions for {self.n_img} images.") - # Step 1: Detect a single pair of common-lines between each pair of images + # Detect a single pair of common-lines between each pair of images self.build_clmatrix() - # Step 4: Calculate relative rotations + # Calculate relative rotations Rijs = self._estimate_all_Rijs_c3_c4(self.clmatrix) return Rijs def _global_J_sync(self, vijs): """ """ - n_img = self.n_img # Determine relative handedness of vijs. sign_ij_J = self._J_sync_power_method(vijs) # Synchronize vijs + logger.info("Applying global handedness synchronization.") for i, sign in enumerate(sign_ij_J): if sign == -1: vijs[i] = J_conjugate(vijs[i]) @@ -240,6 +238,9 @@ def _J_sync_power_method(self, vijs): i'th entry indicates whether the i'th relative orientation matrix will be J-conjugated. """ + logger.info( + "Initiating power method to estimate J-synchronization matrix eigenvector." + ) # Set power method tolerance and maximum iterations. epsilon = self.epsilon max_iters = self.max_iters @@ -252,9 +253,6 @@ def _J_sync_power_method(self, vijs): itr = 0 # Power method iterations - logger.info( - "Initiating power method to estimate J-synchronization matrix eigenvector." - ) while itr < max_iters and residual > epsilon: itr += 1 vec_new = self._signs_times_v(vijs, vec) @@ -269,6 +267,7 @@ def _J_sync_power_method(self, vijs): J_sync = np.sign(vec) return J_sync + def _signs_times_v(self, vijs, vec): """ Multiplication of the J-synchronization matrix by a candidate eigenvector. From 2f087d16404bfa70adc9ace874f2315ebfbf162f Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 4 Apr 2024 09:47:30 -0400 Subject: [PATCH 04/60] stub in W --- src/aspire/abinitio/commonline_sync3n.py | 47 ++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index ce41d687f0..0204886cd9 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -59,6 +59,10 @@ def __init__( self.degree_res = degree_res self.seed = seed + # Sync3N specific vars + self._W = None + self._D_null = 1e-13 + ########################################### # High level algorithm steps # ########################################### @@ -100,6 +104,42 @@ def _sync3n_S_to_rot(self, S, n_eigs=4): f"n_eigs must be greater than 3, default is 4. Invoked with {n_eigs}" ) + if self._W is not None: + W = self._W + if not W.shape == (self.n_img, self.n_img): + raise RuntimeError( + f"Shape of W should be {(self.n_img, self.n_img)}." + f" Received {W.shape}." + ) + # Initialize D + D = np.mean(W, axis=1) # D, check axis + + Dhalf = D + # Compute mask of trouble D values + nulls = np.abs(D) < self._D_null + # Avoid trouble values when exponentiating + Dhalf[~nulls] = Dhalf[~nulls] ** (-0.5) + # Flush trouble values to zero + Dhalf[nulls] = 0 + # expand diagonal + Dhalf = np.diag(Dhalf) + + # Report W Diagnostic + W_normalized = Dhalf**2 @ W + nzidx = np.sum(W_normalized, axis=1) != 0 + err = np.linalg.norm(np.sum(W_normalized[nzidx], axis=1) - self.n_img) + if err > 1e-10: + logger.warning(f"Large Weights Matrix Normalization Error: {err}") + + # Make W of size 3Nx3N + W = np.kron(W, np.ones((3, 3))) + + # Make Dhalf of size 3Nx3N + Dhalf = np.diag(np.kron(np.diag(Dhalf), np.ones((1, 3)))[0]) + + # Apply weights to S + S = Dhalf @ (W * S) @ Dhalf + # Extract three eigenvectors corresponding to non-zero eigenvalues. d, v = stable_eigsh(S, n_eigs) sort_idx = np.argsort(-d) @@ -110,6 +150,13 @@ def _sync3n_S_to_rot(self, S, n_eigs=4): # Only need the top 3 eigen-vectors. v = v[:, sort_idx[:3]] + # Cancel symmetrization when using weights W + if self._W is not None: + # Untill now we used a symmetrized variant of the weighted Sync matrix, + # thus we didn't get the right eigenvectors. to fix that we just need + # to multiply: + v = Dhalf @ v + # Yield estimated rotations from the eigen-vectors v = v.reshape(3, self.n_img, 3) rotations = np.transpose(v, (1, 0, 2)) # Check, may be (1, 2 , 0) for T From 0f772411a3e2b70376bbf6b4cd9c9dc0291c6caa Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 5 Apr 2024 10:13:12 -0400 Subject: [PATCH 05/60] stub in W --- src/aspire/abinitio/commonline_sync3n.py | 31 +++++++++++++++++------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 0204886cd9..b66bb6a833 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -28,6 +28,7 @@ def __init__( degree_res=1, seed=None, mask=True, + S_weighting=False, ): """ Initialize object for estimating 3D orientations. @@ -60,7 +61,7 @@ def __init__( self.seed = seed # Sync3N specific vars - self._W = None + self.S_weighting = S_weighting self._D_null = 1e-13 ########################################### @@ -82,18 +83,20 @@ def estimate_rotations(self): # Build sync3n matrix S = self._construct_sync3n_matrix(Rij) - # Optionally S weights - # todo + # Optionally compute S weights + W = None + if self.S_weighting is True: + W = self._syncmatrix_weights(Rij) # Yield rotations from S - Ris = self._sync3n_S_to_rot(S) + Ris = self._sync3n_S_to_rot(S, W) self.rotations = Ris ########################################### # The hackberries taste like hackberries # ########################################### - def _sync3n_S_to_rot(self, S, n_eigs=4): + def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): """ Use eigen decomposition of S to estimate transforms, then project transforms to nearest rotations. @@ -104,8 +107,8 @@ def _sync3n_S_to_rot(self, S, n_eigs=4): f"n_eigs must be greater than 3, default is 4. Invoked with {n_eigs}" ) - if self._W is not None: - W = self._W + if W is not None: + logger.info("Applying weights to synchronization matrix.") if not W.shape == (self.n_img, self.n_img): raise RuntimeError( f"Shape of W should be {(self.n_img, self.n_img)}." @@ -151,7 +154,7 @@ def _sync3n_S_to_rot(self, S, n_eigs=4): v = v[:, sort_idx[:3]] # Cancel symmetrization when using weights W - if self._W is not None: + if W is not None: # Untill now we used a symmetrized variant of the weighted Sync matrix, # thus we didn't get the right eigenvectors. to fix that we just need # to multiply: @@ -159,7 +162,7 @@ def _sync3n_S_to_rot(self, S, n_eigs=4): # Yield estimated rotations from the eigen-vectors v = v.reshape(3, self.n_img, 3) - rotations = np.transpose(v, (1, 0, 2)) # Check, may be (1, 2 , 0) for T + rotations = np.transpose(v, (1, 0, 2)) # Enforce we are returning actual rotations rotations = nearest_rotations(rotations) @@ -189,6 +192,16 @@ def _construct_sync3n_matrix(self, Rij): return S + def _syncmatrix_weights(self, Rij): + """ + Given relative rotations matrix `Rij`, + compute probability weights for S. + """ + logger.info("Computing synchronization matrix weights.") + # Test with identity weights, + # todo, port cryo_sync3n_syncmatrix_weights + return np.ones((self.n_img, self.n_img)) + ########################################### # Primary Methods # ########################################### From 1d3cbeffdbdef6d6fcff781d85e7669df38f6e1b Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 5 Apr 2024 15:48:31 -0400 Subject: [PATCH 06/60] begin stubbing in actual S weight computation --- src/aspire/abinitio/commonline_sync3n.py | 77 ++++++++++++++++++++++-- 1 file changed, 72 insertions(+), 5 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index b66bb6a833..8deef9c790 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -192,15 +192,82 @@ def _construct_sync3n_matrix(self, Rij): return S - def _syncmatrix_weights(self, Rij): + def _syncmatrix_weights( + self, + Rij, + permitted_inconsistency=1.5, + p_domain_limit=0.7, + max_iterations=12, + min_p_permitted=0.04, + ): """ Given relative rotations matrix `Rij`, - compute probability weights for S. + compute and return probability weights for S. """ logger.info("Computing synchronization matrix weights.") - # Test with identity weights, - # todo, port cryo_sync3n_syncmatrix_weights - return np.ones((self.n_img, self.n_img)) + + def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): + # Get inistial estimate for Pij + P, sigma, Rsquare, Pij, hist, fit, cum_scores = self._triangle_scores( + Rij, hist, Pmin, Pmax + ) + + # Check if P and Pij are consistent + mean_Pij = np.mean(Pij) + too_low = P < mean_Pij / permitted_inconsistency + too_high = P > mean_Pij * permitted_inconsistency + inconsistent = too_low | too_high + + # Check trend + if prev_too_low is not None and too_low != prev_too_low: + p_domain_limit = np.sqrt(p_domain_limit) + + # define limits for next P estimation + if too_high: + if P < min_p_permitted: + logger.error( + "Triangles Scores are too bad distributed, whatever small P we force." + ) + + Pmax = P + if Pmax is not None: + Pmax = Pmax * p_domain_limit + + Pmin = Pmax * p_domain_limit + else: + Pmin = P + if Pmin is not None: + Pmin = Pmin / p_domain_limit + + Pmax = Pmin / p_domain_limit + + return inconsistent, Pij, (too_low, Pmin, Pmax, hist) + + # Repeat iteratively until estimations of P & Pij are consistent + i = 0 + res = (None,) * 4 + inconsistent = True + while inconsistent and i < max_iterations: + inconsistent, Pij, res = body(*res) + + # Pack W + # N = 0.5 * (1 + np.sqrt(1+8*Rij.shape[2])) #? what + W = np.zeros((self.n_img, self.n_img)) + idx = 0 + for i in range(self.n_img): + for j in range(i, self.n_img): + W[i, j] = Pij[idx] + W[j, i] = Pij[idx] + idx += 1 + + return W + + def _triangle_scores(self, Rij, hist, Pmin, Pmax): + """ + Todo + """ + # return P, sigma, Rsquare, Pij, hist, fit, cum_scores + pass ########################################### # Primary Methods # From 906aa2d077458224353de114c3d31da335022ddc Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 8 Apr 2024 10:02:35 -0400 Subject: [PATCH 07/60] fix typo bug --- src/aspire/abinitio/commonline_sync3n.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 8deef9c790..68b13dd747 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -226,18 +226,20 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): if too_high: if P < min_p_permitted: logger.error( - "Triangles Scores are too bad distributed, whatever small P we force." + "Triangles Scores are poorly distributed, whatever small P we force." ) - Pmax = P if Pmax is not None: Pmax = Pmax * p_domain_limit + else: + Pmax = P Pmin = Pmax * p_domain_limit - else: - Pmin = P + else: # too low if Pmin is not None: Pmin = Pmin / p_domain_limit + else: + Pmin = P Pmax = Pmin / p_domain_limit @@ -251,7 +253,6 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): inconsistent, Pij, res = body(*res) # Pack W - # N = 0.5 * (1 + np.sqrt(1+8*Rij.shape[2])) #? what W = np.zeros((self.n_img, self.n_img)) idx = 0 for i in range(self.n_img): From 8b14db3a401278cf25f0946bc01dfee055162e77 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 8 Apr 2024 16:06:44 -0400 Subject: [PATCH 08/60] fix rot reshape bug and stub in probability_scores --- src/aspire/abinitio/commonline_sync3n.py | 190 ++++++++++++++++++++++- 1 file changed, 183 insertions(+), 7 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 68b13dd747..2f21c7da4b 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -2,6 +2,7 @@ import numpy as np from numpy.linalg import norm +from scipy.optimize import curve_fit from aspire.abinitio import CLOrient3D, SyncVotingMixin from aspire.utils import J_conjugate, all_pairs, all_triplets, nearest_rotations @@ -96,7 +97,7 @@ def estimate_rotations(self): ########################################### # The hackberries taste like hackberries # ########################################### - def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): + def _sync3n_S_to_rot(self, S, W=None, n_eigs=10): """ Use eigen decomposition of S to estimate transforms, then project transforms to nearest rotations. @@ -104,7 +105,7 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): if n_eigs < 3: raise ValueError( - f"n_eigs must be greater than 3, default is 4. Invoked with {n_eigs}" + f"n_eigs must be greater than 3, default is 10. Invoked with {n_eigs}" ) if W is not None: @@ -161,8 +162,7 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): v = Dhalf @ v # Yield estimated rotations from the eigen-vectors - v = v.reshape(3, self.n_img, 3) - rotations = np.transpose(v, (1, 0, 2)) + rotations = v.reshape(self.n_img, 3, 3).transpose(0, 2, 1) # Enforce we are returning actual rotations rotations = nearest_rotations(rotations) @@ -263,12 +263,188 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): return W - def _triangle_scores(self, Rij, hist, Pmin, Pmax): + def _triangle_scores_mex(self, Rijs, hist_intervals): + pass + # return cum_scores, hist_scores + + def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): + # The following is adopted from Matlab parias_probabilities_mex.c `looper` + # The code should be thread/parallel safe over `i` when results are gathered (via sum). + + # Initialize probability result arrays + ln_f_ind = np.zeros(len(Rij), dtype=self.dtype) + ln_f_arb = np.zeros(len(Rij), dtype=self.dtype) + + c = np.empty((4), dtype=self.dtype) + for i in range(self.n_img): + for j in range(i, self.n_img): + Rij = Rijs[i * self.n_img + j] + for k in range(j, self.n_img): + Rik = Rijs[i * self.n_img + k] + Rjk = Rijs[j * self.n_img + k] + + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rjk) ** 2) + c[3] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[4] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + + # Compute scores + s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) + s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) + s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) + + # Update probabilities + # # Probability of pair ij having score given indicicative common line + # P2, B, b, x0, A, a + f_ij_jk = np.log( + P2 + * ( + B + * np.pow(1 - s_ij_jk, b) + * np.exp(-b / (1 - x0) * (1 - s_ij_jk)) + ) + + (1 - P2) * A * np.pow((1 - s_ij_jk), a) + ) + f_ik_jk = np.log( + P2 + * ( + B + * np.pow(1 - s_ik_jk, b) + * np.exp(-b / (1 - x0) * (1 - s_ik_jk)) + ) + + (1 - P2) * A * np.pow((1 - s_ik_jk), a) + ) + f_ij_ik = np.log( + P2 + * ( + B + * np.pow(1 - s_ij_ik, b) + * np.exp(-b / (1 - x0) * (1 - s_ij_ik)) + ) + + (1 - P2) * A * np.pow((1 - s_ij_ik), a) + ) + ln_f_ind[ij] += f_ij_jk + f_ij_ik + ln_f_ind[jk] += f_ij_jk + f_ik_jk + ln_f_ind[ik] += f_ik_jk + f_ij_ik + + # # Probability of pair ij having score given arbitrary common line + f_ij_jk = np.log(A * np.pow((1 - s_ij_jk), a)) + f_ik_jk = np.log(A * np.pow((1 - s_ik_jk), a)) + f_ij_ik = np.log(A * np.pow((1 - s_ij_ik), a)) + ln_f_arb[ij] += f_ij_jk + f_ij_ik + ln_f_arb[jk] += f_ij_jk + f_ik_jk + ln_f_arb[ik] += f_ik_jk + f_ij_ik + + return ln_f_ind, ln_f_arb + + def _triangle_scores( + self, + Rijs, + hist, + Pmin, + Pmax, + hist_intervals=100, + a=2.2, + peak2sigma=2.43e-2, + P=0.5, + b=2.5, + x0=0.78, + ): """ Todo + + :param a: magic number + :param peak2sigma: empirical relation between the location of + the peak of the histigram, and the mean error in the + common lines estimations. + AKA, magic number + :param P: + :param b: + :param x0: """ - # return P, sigma, Rsquare, Pij, hist, fit, cum_scores - pass + + Pmin = Pmin or 0 + Pmin = max(Pmin, 0) # Clamp probability to [0,1] + Pmax = Pmax or 1 + Pmax = min(Pmax, 1) # Clamp probability to [0,1] + + if hist is not None: + cum_scores, scores_hist = self._triangle_scores_mex(Rijs, hist_intervals) + + # Normalize cumulated scores + cum_scores /= len(Rij) + + # Histogram decomposition: P & sigma evaluation + h = 1 / hist_intervals + hist_x = np.arange(h / 2, 1, h) + # normalization factor of one component of the histogram + A = ( + (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) + / hist_intervals + * (a + 1) + ) + # normalization of 2nd component: B = P*N_delta/sum(f), where f is the component formula + B0 = P ** (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) / np.sum( + ((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x)) + ) + start_values = np.array([B0, P, b, x0], dtype=np.float64) + lower_bounds = np.array([0, Pmin**3, 2, 0], dtype=np.float64) + upper_bounds = np.array([np.inf, Pmax**3, np.inf, 1], dtype=np.float64) + + # Fit distribution + def fun(x, B, P, b, x0, A=A, a=a): + """Function to fit. x is data vector.""" + return (1 - P) @ A * (1 - x) ** a + P * B * (1 - x) ** b * np.exp( + -b / (1 - x0) * (1 - x) + ) + + popt, pcov = curve_fit( + fun, + hist_x.astype(np.float64, copy=False), + scores_hist.astype(np.float64, copy=False), + p0=start_values, + bounds=(lower_bounds, upper_bounds), + ) + B, P, b, x0 = popt + + # Derive P and sigma + P = P ** (1 / 3) + peak = x0 # can rm later + sigma = (1 - peak) / peak2sigma + + # Initialize probability computations + # Local histograms analysis + A = a + 1 # distribution 1st component normalization factor + # distribution 2nd component normalization factor + B = B / ( + (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) / hist_intervals + ) + + # Calculate probabilities + ln_f_ind, ln_f_arb = self._pairs_probabilities(Rij, P**2, A, a, B, b, x0) + Pij = 1 / (1 + (1 - P) / P * np.exp(ln_f_arb - ln_f_ind)) + + # Fix singular output + num_nan = np.sum(np.isnan(Pij)) + if num_nan > 0: + logger.error( + f"NaN probabilities occurred {num_nan} times out of {size(Pij)}. Setting NaNs to zero." + ) + Pij = np.nan_to_num(Pij) + + return P, sigma, Rsquare, Pij, scores_hist, fit, cum_scores ########################################### # Primary Methods # From c11b90876b4fc997c36eca1a87d23732fdf04917 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 9 Apr 2024 13:07:48 -0400 Subject: [PATCH 09/60] stub in triangle scores and pair probabilities --- src/aspire/abinitio/commonline_sync3n.py | 212 ++++++++++++++++++----- 1 file changed, 170 insertions(+), 42 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 2f21c7da4b..4aba3102ce 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -5,12 +5,47 @@ from scipy.optimize import curve_fit from aspire.abinitio import CLOrient3D, SyncVotingMixin -from aspire.utils import J_conjugate, all_pairs, all_triplets, nearest_rotations +from aspire.utils import J_conjugate, all_pairs, all_triplets, nearest_rotations, trange from aspire.utils.matlab_compat import stable_eigsh from aspire.utils.random import randn logger = logging.getLogger(__name__) +# Initialize alternatives +# +# When we find the best J-configuration, we also compare it to the alternative 2nd best one. +# this comparison is done for every pair in the triplete independently. to make sure that the +# alternative is indeed different in relation to the pair, we document the differences between +# the configurations in advance: +# ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from best_conf in relation to pair + +_ALTS = np.empty((3, 4, 3), dtype=int) +# Rewrite this later. +_ALTS[0][0][0] = 1 +_ALTS[0][1][0] = 0 +_ALTS[0][2][0] = 0 +_ALTS[0][3][0] = 1 +_ALTS[1][0][0] = 2 +_ALTS[1][1][0] = 3 +_ALTS[1][2][0] = 3 +_ALTS[1][3][0] = 2 +_ALTS[0][0][1] = 2 +_ALTS[0][1][1] = 2 +_ALTS[0][2][1] = 0 +_ALTS[0][3][1] = 0 +_ALTS[1][0][1] = 3 +_ALTS[1][1][1] = 3 +_ALTS[1][2][1] = 1 +_ALTS[1][3][1] = 1 +_ALTS[0][0][2] = 1 +_ALTS[0][1][2] = 0 +_ALTS[0][2][2] = 1 +_ALTS[0][3][2] = 0 +_ALTS[1][0][2] = 3 +_ALTS[1][1][2] = 2 +_ALTS[1][2][2] = 3 +_ALTS[1][3][2] = 2 + class CLSync3N(CLOrient3D, SyncVotingMixin): """ @@ -56,6 +91,9 @@ def __init__( mask=mask, ) + # Generate pair mappings + self._pairs, self._pairs_to_linear = all_pairs(self.n_img, return_map=True) + self.epsilon = epsilon self.max_iters = max_iters self.degree_res = degree_res @@ -208,7 +246,7 @@ def _syncmatrix_weights( def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): # Get inistial estimate for Pij - P, sigma, Rsquare, Pij, hist, fit, cum_scores = self._triangle_scores( + P, sigma, Pij, hist, cum_scores = self._triangle_scores( Rij, hist, Pmin, Pmax ) @@ -249,14 +287,15 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): i = 0 res = (None,) * 4 inconsistent = True - while inconsistent and i < max_iterations: + while inconsistent and i < 1: # max_iterations: inconsistent, Pij, res = body(*res) + i += 1 # Pack W W = np.zeros((self.n_img, self.n_img)) idx = 0 for i in range(self.n_img): - for j in range(i, self.n_img): + for j in range(i + 1, self.n_img): W[i, j] = Pij[idx] W[j, i] = Pij[idx] idx += 1 @@ -264,24 +303,104 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): return W def _triangle_scores_mex(self, Rijs, hist_intervals): - pass - # return cum_scores, hist_scores + # The following is adopted from Matlab triangle_scores_mex.c + # The code should be thread/parallel safe over `i` when results are gathered (via sum). + + # Initialize probability result arrays + cum_scores = np.zeros(len(Rijs), dtype=self.dtype) + scores_hist = np.zeros(hist_intervals, dtype=self.dtype) + h = 1 / hist_intervals + + c = np.empty((4), dtype=self.dtype) + for i in trange(self.n_img, desc="Computing triangle scores"): + for j in range( + i + 1, self.n_img - 1 + ): # check bound (taken from MATLAB mex) + ij = self._pairs_to_linear[i, j] + Rij = Rijs[ij] + for k in range(j + 1, self.n_img): + ik = self._pairs_to_linear[i, k] + jk = self._pairs_to_linear[j, k] + Rik = Rijs[ik] + Rjk = Rijs[jk] + + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + alt_ij_jk = c[_ALTS[0][best_i][0]] + if c[_ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[_ALTS[1][best_i][0]] + alt_ik_jk = c[_ALTS[0][best_i][1]] + if c[_ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[_ALTS[1][best_i][1]] + alt_ij_ik = c[_ALTS[0][best_i][2]] + if c[_ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[_ALTS[1][best_i][2]] + + # Compute scores + s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) + s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) + s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) + + # Update cumulated scores + cum_scores[ij] += s_ij_jk + s_ij_ik + cum_scores[jk] += s_ij_jk + s_ik_jk + cum_scores[ik] += s_ik_jk + s_ij_ik + + # Update histogram + threshold = 0 + for l1 in range(hist_intervals): + threshold += h + if s_ij_jk < threshold: + break + + for l2 in range(hist_intervals): + threshold += h + if s_ik_jk < threshold: + break + + for l3 in range(hist_intervals): + threshold += h + if s_ij_ik < threshold: + break + + scores_hist[l1] += 1 + scores_hist[l2] += 1 + scores_hist[l3] += 1 + + return cum_scores, scores_hist def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): - # The following is adopted from Matlab parias_probabilities_mex.c `looper` + # The following is adopted from Matlab pairas_probabilities_mex.c `looper` # The code should be thread/parallel safe over `i` when results are gathered (via sum). # Initialize probability result arrays - ln_f_ind = np.zeros(len(Rij), dtype=self.dtype) - ln_f_arb = np.zeros(len(Rij), dtype=self.dtype) + ln_f_ind = np.zeros(len(Rijs), dtype=self.dtype) + ln_f_arb = np.zeros(len(Rijs), dtype=self.dtype) c = np.empty((4), dtype=self.dtype) - for i in range(self.n_img): - for j in range(i, self.n_img): - Rij = Rijs[i * self.n_img + j] - for k in range(j, self.n_img): - Rik = Rijs[i * self.n_img + k] - Rjk = Rijs[j * self.n_img + k] + for i in trange(self.n_img, desc="Computing pair probabilities"): + for j in range(i + 1, self.n_img - 1): + ij = self._pairs_to_linear[i, j] + Rij = Rijs[ij] + for k in range(j + 1, self.n_img): + ik = self._pairs_to_linear[i, k] + jk = self._pairs_to_linear[j, k] + Rik = Rijs[ik] + Rjk = Rijs[jk] # Compute conjugated rotats Rij_J = J_conjugate(Rij) @@ -290,15 +409,24 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): # Compute R muls and norms c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rjk) ** 2) - c[3] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[4] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) best_val = c[best_i] # For each triangle side, find the best alternative + alt_ij_jk = c[_ALTS[0][best_i][0]] + if c[_ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[_ALTS[1][best_i][0]] + alt_ik_jk = c[_ALTS[0][best_i][1]] + if c[_ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[_ALTS[1][best_i][1]] + alt_ij_ik = c[_ALTS[0][best_i][2]] + if c[_ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[_ALTS[1][best_i][2]] # Compute scores s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) @@ -312,37 +440,37 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): P2 * ( B - * np.pow(1 - s_ij_jk, b) + * np.power(1 - s_ij_jk, b) * np.exp(-b / (1 - x0) * (1 - s_ij_jk)) ) - + (1 - P2) * A * np.pow((1 - s_ij_jk), a) + + (1 - P2) * A * np.power((1 - s_ij_jk), a) ) f_ik_jk = np.log( P2 * ( B - * np.pow(1 - s_ik_jk, b) + * np.power(1 - s_ik_jk, b) * np.exp(-b / (1 - x0) * (1 - s_ik_jk)) ) - + (1 - P2) * A * np.pow((1 - s_ik_jk), a) + + (1 - P2) * A * np.power((1 - s_ik_jk), a) ) f_ij_ik = np.log( P2 * ( B - * np.pow(1 - s_ij_ik, b) + * np.power(1 - s_ij_ik, b) * np.exp(-b / (1 - x0) * (1 - s_ij_ik)) ) - + (1 - P2) * A * np.pow((1 - s_ij_ik), a) + + (1 - P2) * A * np.power((1 - s_ij_ik), a) ) ln_f_ind[ij] += f_ij_jk + f_ij_ik ln_f_ind[jk] += f_ij_jk + f_ik_jk ln_f_ind[ik] += f_ik_jk + f_ij_ik # # Probability of pair ij having score given arbitrary common line - f_ij_jk = np.log(A * np.pow((1 - s_ij_jk), a)) - f_ik_jk = np.log(A * np.pow((1 - s_ik_jk), a)) - f_ij_ik = np.log(A * np.pow((1 - s_ij_ik), a)) + f_ij_jk = np.log(A * np.power((1 - s_ij_jk), a)) + f_ik_jk = np.log(A * np.power((1 - s_ik_jk), a)) + f_ij_ik = np.log(A * np.power((1 - s_ij_ik), a)) ln_f_arb[ij] += f_ij_jk + f_ij_ik ln_f_arb[jk] += f_ij_jk + f_ik_jk ln_f_arb[ik] += f_ik_jk + f_ij_ik @@ -352,7 +480,7 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): def _triangle_scores( self, Rijs, - hist, + scores_hist, Pmin, Pmax, hist_intervals=100, @@ -380,11 +508,12 @@ def _triangle_scores( Pmax = Pmax or 1 Pmax = min(Pmax, 1) # Clamp probability to [0,1] - if hist is not None: + cum_scores = None # XXX Why do we even need cum_scores? + if scores_hist is None: cum_scores, scores_hist = self._triangle_scores_mex(Rijs, hist_intervals) # Normalize cumulated scores - cum_scores /= len(Rij) + cum_scores /= len(Rijs) # Histogram decomposition: P & sigma evaluation h = 1 / hist_intervals @@ -406,10 +535,11 @@ def _triangle_scores( # Fit distribution def fun(x, B, P, b, x0, A=A, a=a): """Function to fit. x is data vector.""" - return (1 - P) @ A * (1 - x) ** a + P * B * (1 - x) ** b * np.exp( + return (1 - P) * A * (1 - x) ** a + P * B * (1 - x) ** b * np.exp( -b / (1 - x0) * (1 - x) ) + breakpoint() popt, pcov = curve_fit( fun, hist_x.astype(np.float64, copy=False), @@ -433,18 +563,18 @@ def fun(x, B, P, b, x0, A=A, a=a): ) # Calculate probabilities - ln_f_ind, ln_f_arb = self._pairs_probabilities(Rij, P**2, A, a, B, b, x0) + ln_f_ind, ln_f_arb = self._pairs_probabilities(Rijs, P**2, A, a, B, b, x0) Pij = 1 / (1 + (1 - P) / P * np.exp(ln_f_arb - ln_f_ind)) # Fix singular output num_nan = np.sum(np.isnan(Pij)) if num_nan > 0: logger.error( - f"NaN probabilities occurred {num_nan} times out of {size(Pij)}. Setting NaNs to zero." + f"NaN probabilities occurred {num_nan} times out of {np.size(Pij)}. Setting NaNs to zero." ) Pij = np.nan_to_num(Pij) - return P, sigma, Rsquare, Pij, scores_hist, fit, cum_scores + return P, sigma, Pij, scores_hist, cum_scores ########################################### # Primary Methods # @@ -484,10 +614,9 @@ def _estimate_all_Rijs_c3_c4(self, clmatrix): """ n_img = self.n_img n_theta = self.n_theta - pairs = all_pairs(n_img) - Rijs = np.zeros((len(pairs), 3, 3)) + Rijs = np.zeros((len(self._pairs), 3, 3)) - for idx, (i, j) in enumerate(pairs): + for idx, (i, j) in enumerate(self._pairs): Rijs[idx] = self._syncmatrix_ij_vote_3n( clmatrix, i, j, np.arange(n_img), n_theta ) @@ -599,10 +728,9 @@ def _signs_times_v(self, vijs, vec): :return: New candidate eigenvector of length n-choose-2. The product of the J-sync matrix and vec. """ - # All pairs (i,j) and triplets (i,j,k) where i Date: Tue, 9 Apr 2024 13:10:10 -0400 Subject: [PATCH 10/60] tox checks [skip ci] --- src/aspire/abinitio/commonline_sync3n.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 4aba3102ce..263d104d0f 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -362,24 +362,24 @@ def _triangle_scores_mex(self, Rijs, hist_intervals): # Update histogram threshold = 0 - for l1 in range(hist_intervals): + for _l1 in range(hist_intervals): threshold += h if s_ij_jk < threshold: break - for l2 in range(hist_intervals): + for _l2 in range(hist_intervals): threshold += h if s_ik_jk < threshold: break - for l3 in range(hist_intervals): + for _l3 in range(hist_intervals): threshold += h if s_ij_ik < threshold: break - scores_hist[l1] += 1 - scores_hist[l2] += 1 - scores_hist[l3] += 1 + scores_hist[_l1] += 1 + scores_hist[_l2] += 1 + scores_hist[_l3] += 1 return cum_scores, scores_hist From 244613a279baf6270c2452c335c9dca384905131 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 9 Apr 2024 14:09:24 -0400 Subject: [PATCH 11/60] light cleanup --- src/aspire/abinitio/commonline_sync3n.py | 66 +++++++++--------------- 1 file changed, 25 insertions(+), 41 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 263d104d0f..f1f3cc0d3a 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -19,32 +19,13 @@ # the configurations in advance: # ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from best_conf in relation to pair -_ALTS = np.empty((3, 4, 3), dtype=int) -# Rewrite this later. -_ALTS[0][0][0] = 1 -_ALTS[0][1][0] = 0 -_ALTS[0][2][0] = 0 -_ALTS[0][3][0] = 1 -_ALTS[1][0][0] = 2 -_ALTS[1][1][0] = 3 -_ALTS[1][2][0] = 3 -_ALTS[1][3][0] = 2 -_ALTS[0][0][1] = 2 -_ALTS[0][1][1] = 2 -_ALTS[0][2][1] = 0 -_ALTS[0][3][1] = 0 -_ALTS[1][0][1] = 3 -_ALTS[1][1][1] = 3 -_ALTS[1][2][1] = 1 -_ALTS[1][3][1] = 1 -_ALTS[0][0][2] = 1 -_ALTS[0][1][2] = 0 -_ALTS[0][2][2] = 1 -_ALTS[0][3][2] = 0 -_ALTS[1][0][2] = 3 -_ALTS[1][1][2] = 2 -_ALTS[1][2][2] = 3 -_ALTS[1][3][2] = 2 +_ALTS = np.array( + [ + [[1, 2, 1], [0, 2, 0], [0, 0, 1], [1, 0, 0]], + [[2, 3, 3], [3, 3, 2], [3, 1, 3], [2, 1, 2]], + ], + dtype=int, +) class CLSync3N(CLOrient3D, SyncVotingMixin): @@ -114,28 +95,26 @@ def estimate_rotations(self): """ # Initial estimate of viewing directions - Rij0 = self._estimate_relative_viewing_directions() + Rijs0 = self._estimate_relative_viewing_directions() # Compute and apply global handedness - Rij = self._global_J_sync(Rij0) + Rijs = self._global_J_sync(Rijs0) # Build sync3n matrix - S = self._construct_sync3n_matrix(Rij) + S = self._construct_sync3n_matrix(Rijs) # Optionally compute S weights W = None if self.S_weighting is True: - W = self._syncmatrix_weights(Rij) + W = self._syncmatrix_weights(Rijs) # Yield rotations from S - Ris = self._sync3n_S_to_rot(S, W) - - self.rotations = Ris + self.rotations = self._sync3n_S_to_rot(S, W) ########################################### # The hackberries taste like hackberries # ########################################### - def _sync3n_S_to_rot(self, S, W=None, n_eigs=10): + def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): """ Use eigen decomposition of S to estimate transforms, then project transforms to nearest rotations. @@ -143,7 +122,7 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=10): if n_eigs < 3: raise ValueError( - f"n_eigs must be greater than 3, default is 10. Invoked with {n_eigs}" + f"n_eigs must be greater than 3, default is 4. Invoked with {n_eigs}" ) if W is not None: @@ -232,7 +211,7 @@ def _construct_sync3n_matrix(self, Rij): def _syncmatrix_weights( self, - Rij, + Rijs, permitted_inconsistency=1.5, p_domain_limit=0.7, max_iterations=12, @@ -247,7 +226,7 @@ def _syncmatrix_weights( def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): # Get inistial estimate for Pij P, sigma, Pij, hist, cum_scores = self._triangle_scores( - Rij, hist, Pmin, Pmax + Rijs, hist, Pmin, Pmax ) # Check if P and Pij are consistent @@ -287,7 +266,7 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): i = 0 res = (None,) * 4 inconsistent = True - while inconsistent and i < 1: # max_iterations: + while inconsistent and i < max_iterations: inconsistent, Pij, res = body(*res) i += 1 @@ -343,9 +322,11 @@ def _triangle_scores_mex(self, Rijs, hist_intervals): alt_ij_jk = c[_ALTS[0][best_i][0]] if c[_ALTS[1][best_i][0]] < alt_ij_jk: alt_ij_jk = c[_ALTS[1][best_i][0]] + alt_ik_jk = c[_ALTS[0][best_i][1]] if c[_ALTS[1][best_i][1]] < alt_ik_jk: alt_ik_jk = c[_ALTS[1][best_i][1]] + alt_ij_ik = c[_ALTS[0][best_i][2]] if c[_ALTS[1][best_i][2]] < alt_ij_ik: alt_ij_ik = c[_ALTS[1][best_i][2]] @@ -539,7 +520,6 @@ def fun(x, B, P, b, x0, A=A, a=a): -b / (1 - x0) * (1 - x) ) - breakpoint() popt, pcov = curve_fit( fun, hist_x.astype(np.float64, copy=False), @@ -590,7 +570,7 @@ def _estimate_relative_viewing_directions(self): self.build_clmatrix() # Calculate relative rotations - Rijs = self._estimate_all_Rijs_c3_c4(self.clmatrix) + Rijs = self._estimate_all_Rijs(self.clmatrix) return Rijs @@ -608,7 +588,7 @@ def _global_J_sync(self, vijs): return vijs - def _estimate_all_Rijs_c3_c4(self, clmatrix): + def _estimate_all_Rijs(self, clmatrix): """ Estimate Rijs using the voting method. """ @@ -685,6 +665,10 @@ def _J_sync_power_method(self, vijs): residual = 1 itr = 0 + # XXX, I don't like that epsilon>1 (residual) returns signs of random vector + # maybe force to run once? or return vec as zeros in that case? + # Seems unintended, but easy to do. + # Power method iterations while itr < max_iters and residual > epsilon: itr += 1 From 21fb193bd156499ea695208a4d2cc35684b409ab Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 10 Apr 2024 10:40:40 -0400 Subject: [PATCH 12/60] J weighting --- src/aspire/abinitio/commonline_sync3n.py | 167 +++++++++++------------ src/aspire/utils/misc.py | 8 +- 2 files changed, 83 insertions(+), 92 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index f1f3cc0d3a..b382ada6b0 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -5,7 +5,7 @@ from scipy.optimize import curve_fit from aspire.abinitio import CLOrient3D, SyncVotingMixin -from aspire.utils import J_conjugate, all_pairs, all_triplets, nearest_rotations, trange +from aspire.utils import J_conjugate, all_pairs, nearest_rotations, trange from aspire.utils.matlab_compat import stable_eigsh from aspire.utils.random import randn @@ -27,6 +27,8 @@ dtype=int, ) +_signs_confs = np.array([[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int) + class CLSync3N(CLOrient3D, SyncVotingMixin): """ @@ -46,6 +48,7 @@ def __init__( seed=None, mask=True, S_weighting=False, + J_weighting=False, ): """ Initialize object for estimating 3D orientations. @@ -82,6 +85,7 @@ def __init__( # Sync3N specific vars self.S_weighting = S_weighting + self.J_weighting = J_weighting self._D_null = 1e-13 ########################################### @@ -574,19 +578,18 @@ def _estimate_relative_viewing_directions(self): return Rijs - def _global_J_sync(self, vijs): + def _global_J_sync(self, Rijs): """ """ - # Determine relative handedness of vijs. - sign_ij_J = self._J_sync_power_method(vijs) + # Determine relative handedness of Rijs. + sign_ij_J = self._J_sync_power_method(Rijs) - # Synchronize vijs + # Synchronize Rijs logger.info("Applying global handedness synchronization.") - for i, sign in enumerate(sign_ij_J): - if sign == -1: - vijs[i] = J_conjugate(vijs[i]) + mask = sign_ij_J == -1 + Rijs[mask] = J_conjugate(Rijs[mask]) - return vijs + return Rijs def _estimate_all_Rijs(self, clmatrix): """ @@ -636,7 +639,7 @@ def _syncmatrix_ij_vote_3n(self, clmatrix, i, j, k_list, n_theta): # Secondary Methods for Global J Sync # ####################################### - def _J_sync_power_method(self, vijs): + def _J_sync_power_method(self, Rijs): """ Calculate the leading eigenvector of the J-synchronization matrix using the power method. @@ -645,7 +648,7 @@ def _J_sync_power_method(self, vijs): use the power method to compute the eigenvalues and eigenvectors, while constructing the matrix on-the-fly. - :param vijs: (n-choose-2)x3x3 array of estimates of relative orientation matrices. + :param Rijs: (n-choose-2)x3x3 array of estimates of relative orientation matrices. :return: An array of length n-choose-2 consisting of 1 or -1, where the sign of the i'th entry indicates whether the i'th relative orientation matrix will be J-conjugated. @@ -659,8 +662,8 @@ def _J_sync_power_method(self, vijs): max_iters = self.max_iters # Initialize candidate eigenvectors - n_vijs = vijs.shape[0] - vec = randn(n_vijs, seed=self.seed) + n_Rijs = Rijs.shape[0] + vec = randn(n_Rijs, seed=self.seed) vec = vec / norm(vec) residual = 1 itr = 0 @@ -672,7 +675,7 @@ def _J_sync_power_method(self, vijs): # Power method iterations while itr < max_iters and residual > epsilon: itr += 1 - vec_new = self._signs_times_v(vijs, vec) + vec_new = self._signs_times_v(Rijs, vec) vec_new = vec_new / norm(vec_new) residual = norm(vec_new - vec) vec = vec_new @@ -685,86 +688,74 @@ def _J_sync_power_method(self, vijs): return J_sync - def _signs_times_v(self, vijs, vec): + def _signs_times_v(self, Rijs, vec): """ - Multiplication of the J-synchronization matrix by a candidate eigenvector. - - The J-synchronization matrix is a matrix representation of the handedness graph, Gamma, whose set of - nodes consists of the estimates vijs and whose set of edges consists of the undirected edges between - all triplets of estimates vij, vjk, and vik, where i Date: Wed, 10 Apr 2024 16:04:24 -0400 Subject: [PATCH 13/60] add note about possible ij jk bug [skip ci] --- src/aspire/abinitio/commonline_sync3n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index b382ada6b0..880e63567a 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -756,6 +756,6 @@ def _signs_times_v(self, Rijs, vec): # Update vector entries new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] - new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] + new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... return new_vec From 2e5e11fa26aad15e2218bb90815318f30503fd58 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 11 Apr 2024 15:00:00 -0400 Subject: [PATCH 14/60] hack in cupy kernel --- src/aspire/abinitio/commonline_sync3n.py | 298 ++++++++++++++++++----- 1 file changed, 234 insertions(+), 64 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 880e63567a..c0e31f39a2 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -1,6 +1,7 @@ import logging import numpy as np +import cupy as cp from numpy.linalg import norm from scipy.optimize import curve_fit @@ -689,73 +690,242 @@ def _J_sync_power_method(self, Rijs): return J_sync def _signs_times_v(self, Rijs, vec): - """ - Ported from _signs_times_v_mex.c - """ - # The code should be thread/parallel safe over `i`. - new_vec = np.zeros_like(vec) - c = np.empty((4), dtype=self.dtype) - desc = "Computing signs_times_v" - if self.J_weighting: - desc += " with J_weighting" - for i in trange(self.n_img, desc=desc): - for j in range( - i + 1, self.n_img - 1 - ): # check bound (taken from MATLAB mex) - ij = self._pairs_to_linear[i, j] - Rij = Rijs[ij] - for k in range(j + 1, self.n_img): - ik = self._pairs_to_linear[i, k] - jk = self._pairs_to_linear[j, k] - Rik = Rijs[ik] - Rjk = Rijs[jk] + # host/gpu dispatch + new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS, _signs_confs) - # Compute conjugated rotats - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) + return new_vec - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) +def PAIR_IDX(N,I,J): + return ((2*N-I-1)*I//2+J-I-1) + +def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS, _signs_confs): + """ + Ported from _signs_times_v_mex.c + + n: n_img + Rijs: nchoose2x3x3 array + vec: input array + new_vec: output array + J_weighting: bool + _ALTS= 2x4x3 const lut array + _signs_confs = 4x3 const lut array + """ + # The code should be thread/parallel safe over `i`. + + new_vec = np.zeros_like(vec) + + c = np.empty((4)) + desc = "Computing signs_times_v" + if J_weighting: + desc += " with J_weighting" + for i in trange(n, desc=desc): + for j in range( + i + 1, n - 1 + ): # check bound (taken from MATLAB mex) + #ij = self._pairs_to_linear[i, j] + ij = PAIR_IDX(n, i, j) + Rij = Rijs[ij] + for k in range(j + 1, n): + #ik = self._pairs_to_linear[i, k] + #jk = self._pairs_to_linear[j, k] + ik = PAIR_IDX(n, i, k) + jk = PAIR_IDX(n, j, k) + Rik = Rijs[ik] + Rjk = Rijs[jk] + + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # MATLAB: scores_as_entries == 0 + s_ij_jk = _signs_confs[best_i][0] + s_ik_jk = _signs_confs[best_i][1] + s_ij_ik = _signs_confs[best_i][2] + + # Note there was a third J_weighting option (2) in MATLAB, + # but it was not exposed at top level. + if J_weighting: + # MATLAB: scores_as_entries == 1 + # For each triangle side, find the best alternative + alt_ij_jk = c[_ALTS[0][best_i][0]] + if c[_ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[_ALTS[1][best_i][0]] - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] + alt_ik_jk = c[_ALTS[0][best_i][1]] + if c[_ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[_ALTS[1][best_i][1]] - # MATLAB: scores_as_entries == 0 - s_ij_jk = _signs_confs[best_i][0] - s_ik_jk = _signs_confs[best_i][1] - s_ij_ik = _signs_confs[best_i][2] - - # Note there was a third J_weighting option (2) in MATLAB, - # but it was not exposed at top level. - if self.J_weighting: - # MATLAB: scores_as_entries == 1 - # For each triangle side, find the best alternative - alt_ij_jk = c[_ALTS[0][best_i][0]] - if c[_ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[_ALTS[1][best_i][0]] - - alt_ik_jk = c[_ALTS[0][best_i][1]] - if c[_ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[_ALTS[1][best_i][1]] - - alt_ij_ik = c[_ALTS[0][best_i][2]] - if c[_ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[_ALTS[1][best_i][2]] - - # Compute scores - s_ij_jk *= 1 - np.sqrt(best_val / alt_ij_jk) - s_ik_jk *= 1 - np.sqrt(best_val / alt_ik_jk) - s_ij_ik *= 1 - np.sqrt(best_val / alt_ij_ik) - - # Update vector entries - new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] - new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] - new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... + alt_ij_ik = c[_ALTS[0][best_i][2]] + if c[_ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[_ALTS[1][best_i][2]] - return new_vec + # Compute scores + s_ij_jk *= 1 - np.sqrt(best_val / alt_ij_jk) + s_ik_jk *= 1 - np.sqrt(best_val / alt_ik_jk) + s_ij_ik *= 1 - np.sqrt(best_val / alt_ij_ik) + + # Update vector entries + new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] + new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] + new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... + + return new_vec + +def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS, _signs_confs): + """ + Ported from _signs_times_v_mex.c + + n: n_img + Rijs: nchoose2x3x3 array + vec: input array + new_vec: output array + #todo J_weighting: bool + #todo _ALTS= 2x4x3 const lut array + #todo _signs_confs = 4x3 const lut array + """ + # The code should be thread/parallel safe over `i`. + + + code = r''' + +/* from i,j indoces to the common index in the N-choose-2 sized array */ +#define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2+J-I-1) + +inline void mult_3x3(double *out, double *R1, double *R2) { +/* 3X3 matrices multiplication: out = R1*R2 */ + int i,j; + for (i=0; i<3; i++) { + for (j=0;j<3;j++) { + out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + } + } +} + +inline void JRJ(double *R, double *A) { +/* multiple 3X3 matrix by J from both sizes: A = JRJ */ + A[0]=R[0]; + A[1]=R[1]; + A[2]=-R[2]; + A[3]=R[3]; + A[4]=R[4]; + A[5]=-R[5]; + A[6]=-R[6]; + A[7]=-R[7]; + A[8]=R[8]; +} + +inline double diff_norm_3x3(const double *R1, const double *R2) { +/* difference 2 matrices and return squared norm: ||R1-R2||^2 */ + int i; + double norm = 0; + for (i=0; i<9; i++) {norm += (R1[i]-R2[i])*(R1[i]-R2[i]);} + return norm; +} + + +extern "C" __global__ +void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec) +{ + /* thread index (1d), represents "i" index */ + unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; + + /* no-op when out of bounds */ + if(i >= n) return; + + unsigned long n_pairs = n*(n-1)/2; + double c[4]={0,0,0,0}; + unsigned int ij, jk, ik; + unsigned int eig; + int best_i; + double best_val; + int s_ij_jk, s_ik_jk, s_ij_ik; + + double *Rij, *Rjk, *Rik; + double JRijJ[9], JRjkJ[9], JRikJ[9]; + double tmp[9]; + + /* le sigh */ + int signs_confs[4][3]; + signs_confs[2-1][1-1]=-1; signs_confs[2-1][3-1]=-1; + signs_confs[3-1][1-1]=-1; signs_confs[3-1][2-1]=-1; + signs_confs[4-1][2-1]=-1; signs_confs[4-1][3-1]=-1; + + for(int j=i+1; j< n - 1; j++){ + ij = PAIR_IDX(n, i, j); + for(int k=j+1; k< n; k++){ + ik = PAIR_IDX(n, i, k); + jk = PAIR_IDX(n, j, k); + + /* compute configurations matches scores */ + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; + Rik = Rijs + 9*ik; + + JRJ(Rij, JRijJ); + JRJ(Rjk, JRjkJ); + JRJ(Rik, JRikJ); + + mult_3x3(tmp,Rij,Rjk); + c[0] = diff_norm_3x3(tmp,Rik); + + mult_3x3(tmp,JRijJ,Rjk); + c[1] = diff_norm_3x3(tmp,Rik); + + mult_3x3(tmp,Rij,JRjkJ); + c[2] = diff_norm_3x3(tmp,Rik); + + mult_3x3(tmp,Rij,Rjk); + c[3] = diff_norm_3x3(tmp,JRikJ); + + /* find best match */ + best_i=0; best_val=c[0]; + if (c[1] Date: Thu, 11 Apr 2024 15:00:18 -0400 Subject: [PATCH 15/60] quick test file --- x.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 x.py diff --git a/x.py b/x.py new file mode 100644 index 0000000000..1c024e8de9 --- /dev/null +++ b/x.py @@ -0,0 +1,15 @@ +from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy +import numpy as np +import cupy as cp + +n = 7 +n_pairs = n*(n-1)//2 +vec = np.ones(n_pairs, dtype=np.float64) +new_vec = np.zeros(n_pairs, dtype=np.float64) +#Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) +Rijs = np.arange(n_pairs*3*3).astype(dtype=np.float64) + + +new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None, _signs_confs=None) + +print(new_vec) From 1f988a8153e47e2ef88c1109a4183b03ce7d0984 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 11 Apr 2024 15:24:23 -0400 Subject: [PATCH 16/60] stashing --- src/aspire/abinitio/commonline_sync3n.py | 23 +++++++++++++++-------- x.py | 11 +++++++++-- 2 files changed, 24 insertions(+), 10 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index c0e31f39a2..b3eb882270 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -28,8 +28,6 @@ dtype=int, ) -_signs_confs = np.array([[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int) - class CLSync3N(CLOrient3D, SyncVotingMixin): """ @@ -692,14 +690,17 @@ def _J_sync_power_method(self, Rijs): def _signs_times_v(self, Rijs, vec): # host/gpu dispatch - new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS, _signs_confs) + #new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS) + + assert self.J_weighting ==False, "not implemented yet" + new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting, _ALTS) return new_vec def PAIR_IDX(N,I,J): return ((2*N-I-1)*I//2+J-I-1) -def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS, _signs_confs): +def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS): """ Ported from _signs_times_v_mex.c @@ -715,6 +716,7 @@ def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS, _signs_confs): new_vec = np.zeros_like(vec) + _signs_confs = np.array([[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int) c = np.empty((4)) desc = "Computing signs_times_v" if J_weighting: @@ -779,11 +781,12 @@ def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS, _signs_confs): # Update vector entries new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] - new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... + #new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... + new_vec[ik] += s_ij_ik * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... return new_vec -def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS, _signs_confs): +def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): """ Ported from _signs_times_v_mex.c @@ -840,9 +843,12 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS, _signs_confs): { /* thread index (1d), represents "i" index */ unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; + //unsigned int j = blockDim.y * blockIdx.y + threadIdx.y; /* no-op when out of bounds */ if(i >= n) return; + //if(j >= n-1) return; + //if(j < i+1) return; unsigned long n_pairs = n*(n-1)/2; double c[4]={0,0,0,0}; @@ -919,11 +925,12 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS, _signs_confs): new_vec_dev = cp.zeros_like(vec) # call the kernel - blkszx = 512 + blkszx = 128 nblkx = (n+blkszx-1)//blkszx - # blkszy = 512 + # blkszy = 2 # nblky = (n+blkszy-1)//blkszy + #signs_times_v((nblkx,nblky), (blkszx,blkszy), (n, Rijs_dev, vec_dev, new_vec_dev)) signs_times_v((nblkx,), (blkszx,), (n, Rijs_dev, vec_dev, new_vec_dev)) new_vec= new_vec_dev.get() diff --git a/x.py b/x.py index 1c024e8de9..e8a3a78a70 100644 --- a/x.py +++ b/x.py @@ -1,4 +1,5 @@ from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy +from aspire.abinitio.commonline_sync3n import _signs_times_v_host import numpy as np import cupy as cp @@ -7,9 +8,15 @@ vec = np.ones(n_pairs, dtype=np.float64) new_vec = np.zeros(n_pairs, dtype=np.float64) #Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) -Rijs = np.arange(n_pairs*3*3).astype(dtype=np.float64) +Rijs = np.arange(n_pairs*3*3).reshape(n_pairs,3,3).astype(dtype=np.float64) -new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None, _signs_confs=None) +new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None) +print("gpu\n") print(new_vec) + +new_vec_host = _signs_times_v_host(n, Rijs, vec, J_weighting=None, _ALTS=None) + +print("host\n",new_vec_host) + From 2712c08ec632339797e1da357330fc01f044db7e Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 11 Apr 2024 15:25:01 -0400 Subject: [PATCH 17/60] black --- src/aspire/abinitio/commonline_sync3n.py | 50 +++++++++++++----------- x.py | 15 ++++--- 2 files changed, 34 insertions(+), 31 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index b3eb882270..6769d54550 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -1,7 +1,7 @@ import logging -import numpy as np import cupy as cp +import numpy as np from numpy.linalg import norm from scipy.optimize import curve_fit @@ -690,17 +690,19 @@ def _J_sync_power_method(self, Rijs): def _signs_times_v(self, Rijs, vec): # host/gpu dispatch - #new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS) + # new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS) - assert self.J_weighting ==False, "not implemented yet" + assert self.J_weighting == False, "not implemented yet" new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting, _ALTS) return new_vec -def PAIR_IDX(N,I,J): - return ((2*N-I-1)*I//2+J-I-1) - -def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS): + +def PAIR_IDX(N, I, J): + return (2 * N - I - 1) * I // 2 + J - I - 1 + + +def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS): """ Ported from _signs_times_v_mex.c @@ -716,21 +718,21 @@ def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS): new_vec = np.zeros_like(vec) - _signs_confs = np.array([[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int) + _signs_confs = np.array( + [[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int + ) c = np.empty((4)) desc = "Computing signs_times_v" if J_weighting: desc += " with J_weighting" for i in trange(n, desc=desc): - for j in range( - i + 1, n - 1 - ): # check bound (taken from MATLAB mex) - #ij = self._pairs_to_linear[i, j] + for j in range(i + 1, n - 1): # check bound (taken from MATLAB mex) + # ij = self._pairs_to_linear[i, j] ij = PAIR_IDX(n, i, j) Rij = Rijs[ij] for k in range(j + 1, n): - #ik = self._pairs_to_linear[i, k] - #jk = self._pairs_to_linear[j, k] + # ik = self._pairs_to_linear[i, k] + # jk = self._pairs_to_linear[j, k] ik = PAIR_IDX(n, i, k) jk = PAIR_IDX(n, j, k) Rik = Rijs[ik] @@ -781,11 +783,14 @@ def _signs_times_v_host(n, Rijs, vec,J_weighting, _ALTS): # Update vector entries new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] - #new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... - new_vec[ik] += s_ij_ik * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... + # new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... + new_vec[ik] += ( + s_ij_ik * vec[ij] + s_ik_jk * vec[jk] + ) # jk/ik? was a bug?? worked better with s_ij_jk... return new_vec + def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): """ Ported from _signs_times_v_mex.c @@ -800,8 +805,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): """ # The code should be thread/parallel safe over `i`. - - code = r''' + code = r""" /* from i,j indoces to the common index in the N-choose-2 sized array */ #define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2+J-I-1) @@ -915,10 +919,10 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): } /* j */ return; }; -''' +""" module = cp.RawModule(code=code) - signs_times_v = module.get_function('signs_times_v') + signs_times_v = module.get_function("signs_times_v") Rijs_dev = cp.array(Rijs) vec_dev = cp.array(vec) @@ -926,13 +930,13 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): # call the kernel blkszx = 128 - nblkx = (n+blkszx-1)//blkszx + nblkx = (n + blkszx - 1) // blkszx # blkszy = 2 # nblky = (n+blkszy-1)//blkszy - #signs_times_v((nblkx,nblky), (blkszx,blkszy), (n, Rijs_dev, vec_dev, new_vec_dev)) + # signs_times_v((nblkx,nblky), (blkszx,blkszy), (n, Rijs_dev, vec_dev, new_vec_dev)) signs_times_v((nblkx,), (blkszx,), (n, Rijs_dev, vec_dev, new_vec_dev)) - new_vec= new_vec_dev.get() + new_vec = new_vec_dev.get() return new_vec diff --git a/x.py b/x.py index e8a3a78a70..7aab023007 100644 --- a/x.py +++ b/x.py @@ -1,14 +1,14 @@ -from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy -from aspire.abinitio.commonline_sync3n import _signs_times_v_host -import numpy as np import cupy as cp +import numpy as np + +from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy, _signs_times_v_host n = 7 -n_pairs = n*(n-1)//2 +n_pairs = n * (n - 1) // 2 vec = np.ones(n_pairs, dtype=np.float64) new_vec = np.zeros(n_pairs, dtype=np.float64) -#Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) -Rijs = np.arange(n_pairs*3*3).reshape(n_pairs,3,3).astype(dtype=np.float64) +# Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) +Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3).astype(dtype=np.float64) new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None) @@ -18,5 +18,4 @@ new_vec_host = _signs_times_v_host(n, Rijs, vec, J_weighting=None, _ALTS=None) -print("host\n",new_vec_host) - +print("host\n", new_vec_host) From 1583cec1c2352e846eab0362b8378d80ea42cbbc Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Thu, 11 Apr 2024 16:36:58 -0400 Subject: [PATCH 18/60] debug --- src/aspire/abinitio/commonline_sync3n.py | 39 +++++++++++++----------- x.py | 12 ++++---- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 6769d54550..ac7a84238a 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -690,7 +690,7 @@ def _J_sync_power_method(self, Rijs): def _signs_times_v(self, Rijs, vec): # host/gpu dispatch - # new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS) + # new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS, self._pairs_to_linear) assert self.J_weighting == False, "not implemented yet" new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting, _ALTS) @@ -702,7 +702,7 @@ def PAIR_IDX(N, I, J): return (2 * N - I - 1) * I // 2 + J - I - 1 -def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS): +def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): """ Ported from _signs_times_v_mex.c @@ -727,14 +727,14 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS): desc += " with J_weighting" for i in trange(n, desc=desc): for j in range(i + 1, n - 1): # check bound (taken from MATLAB mex) - # ij = self._pairs_to_linear[i, j] - ij = PAIR_IDX(n, i, j) + ij = _pairs_to_linear[i, j] + #ij = PAIR_IDX(n, i, j) Rij = Rijs[ij] for k in range(j + 1, n): - # ik = self._pairs_to_linear[i, k] - # jk = self._pairs_to_linear[j, k] - ik = PAIR_IDX(n, i, k) - jk = PAIR_IDX(n, j, k) + ik = _pairs_to_linear[i, k] + jk = _pairs_to_linear[j, k] + #ik = PAIR_IDX(n, i, k) + #jk = PAIR_IDX(n, j, k) Rik = Rijs[ik] Rjk = Rijs[jk] @@ -784,9 +784,8 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS): new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] # new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... - new_vec[ik] += ( - s_ij_ik * vec[ij] + s_ik_jk * vec[jk] - ) # jk/ik? was a bug?? worked better with s_ij_jk... + # jk/ik? was a bug?? worked better with s_ij_jk... + new_vec[ik] += s_ij_ik * vec[ij] + s_ik_jk * vec[jk] return new_vec @@ -808,7 +807,8 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): code = r""" /* from i,j indoces to the common index in the N-choose-2 sized array */ -#define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2+J-I-1) +#define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) + inline void mult_3x3(double *out, double *R1, double *R2) { /* 3X3 matrices multiplication: out = R1*R2 */ @@ -854,10 +854,10 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): //if(j >= n-1) return; //if(j < i+1) return; - unsigned long n_pairs = n*(n-1)/2; - double c[4]={0,0,0,0}; - unsigned int ij, jk, ik; - unsigned int eig; + double c[4]; + int j, k; + for(k=0;k<4;k++){c[k]=0;} + unsigned long ij, jk, ik; int best_i; double best_val; int s_ij_jk, s_ik_jk, s_ij_ik; @@ -868,13 +868,15 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): /* le sigh */ int signs_confs[4][3]; + for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } signs_confs[2-1][1-1]=-1; signs_confs[2-1][3-1]=-1; signs_confs[3-1][1-1]=-1; signs_confs[3-1][2-1]=-1; signs_confs[4-1][2-1]=-1; signs_confs[4-1][3-1]=-1; - for(int j=i+1; j< n - 1; j++){ + + for(j=i+1; j< n - 1; j++){ ij = PAIR_IDX(n, i, j); - for(int k=j+1; k< n; k++){ + for(k=j+1; k< n; k++){ ik = PAIR_IDX(n, i, k); jk = PAIR_IDX(n, j, k); @@ -917,6 +919,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): } /* k */ } /* j */ + return; }; """ diff --git a/x.py b/x.py index 7aab023007..ea15631e36 100644 --- a/x.py +++ b/x.py @@ -2,20 +2,20 @@ import numpy as np from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy, _signs_times_v_host +from aspire.utils import all_pairs -n = 7 +n = 4 n_pairs = n * (n - 1) // 2 +_, _pairs_to_linear = all_pairs(n, return_map=True) + vec = np.ones(n_pairs, dtype=np.float64) -new_vec = np.zeros(n_pairs, dtype=np.float64) # Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3).astype(dtype=np.float64) new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None) +print("gpu\n", new_vec) -print("gpu\n") -print(new_vec) - -new_vec_host = _signs_times_v_host(n, Rijs, vec, J_weighting=None, _ALTS=None) +new_vec_host = _signs_times_v_host(n, Rijs, vec, J_weighting=None, _ALTS=None, _pairs_to_linear=_pairs_to_linear) print("host\n", new_vec_host) From b44dea69ce0e3ce716f93f056a000863cb3e9c91 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 12 Apr 2024 11:00:11 -0400 Subject: [PATCH 19/60] stashing, cupy code mostly works --- src/aspire/abinitio/commonline_sync3n.py | 67 ++++++++++--------- x.py | 85 +++++++++++++++++++++--- 2 files changed, 111 insertions(+), 41 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index ac7a84238a..bf35c9c73e 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -312,10 +312,10 @@ def _triangle_scores_mex(self, Rijs, hist_intervals): Rjk_J = J_conjugate(Rjk) # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + c[0] = np.sum(((Rij @ Rjk.T) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk.T) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J.T) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk.T) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -392,10 +392,10 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): Rjk_J = J_conjugate(Rjk) # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + c[0] = np.sum(((Rij @ Rjk.T) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk.T) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J.T) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk.T) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -728,13 +728,13 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): for i in trange(n, desc=desc): for j in range(i + 1, n - 1): # check bound (taken from MATLAB mex) ij = _pairs_to_linear[i, j] - #ij = PAIR_IDX(n, i, j) + # ij = PAIR_IDX(n, i, j) Rij = Rijs[ij] for k in range(j + 1, n): ik = _pairs_to_linear[i, k] jk = _pairs_to_linear[j, k] - #ik = PAIR_IDX(n, i, k) - #jk = PAIR_IDX(n, j, k) + # ik = PAIR_IDX(n, i, k) + # jk = PAIR_IDX(n, j, k) Rik = Rijs[ik] Rjk = Rijs[jk] @@ -744,10 +744,10 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): Rjk_J = J_conjugate(Rjk) # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + c[0] = np.sum(((Rij @ Rjk.T) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk.T) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J.T) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk.T) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -810,12 +810,16 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): #define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) +// DEBUG TRANS BUGS inline void mult_3x3(double *out, double *R1, double *R2) { -/* 3X3 matrices multiplication: out = R1*R2 */ +// /* 3X3 matrices multiplication: out = R1*R2 */ +// out.T = R1.T @ R2.T ? int i,j; for (i=0; i<3; i++) { for (j=0;j<3;j++) { - out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; +// out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + out[3*i+j] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + } } } @@ -847,15 +851,13 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): { /* thread index (1d), represents "i" index */ unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; - //unsigned int j = blockDim.y * blockIdx.y + threadIdx.y; /* no-op when out of bounds */ if(i >= n) return; - //if(j >= n-1) return; - //if(j < i+1) return; double c[4]; - int j, k; + int j; + int k; for(k=0;k<4;k++){c[k]=0;} unsigned long ij, jk, ik; int best_i; @@ -874,7 +876,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): signs_confs[4-1][2-1]=-1; signs_confs[4-1][3-1]=-1; - for(j=i+1; j< n - 1; j++){ + for(j=i+1; j< (n - 1); j++){ ij = PAIR_IDX(n, i, j); for(k=j+1; k< n; k++){ ik = PAIR_IDX(n, i, k); @@ -913,9 +915,10 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): s_ij_ik = signs_confs[best_i][2]; /* update multiplication */ - new_vec[ij] += s_ij_jk*vec[jk] + s_ij_ik*vec[ik]; - new_vec[jk] += s_ij_jk*vec[ij] + s_ik_jk*vec[ik]; - new_vec[ik] += s_ij_ik*vec[ij] + s_ik_jk*vec[jk]; /* ij jk bug? */ + new_vec[ij*n + i] += s_ij_jk*vec[jk] + s_ij_ik*vec[ik]; + new_vec[jk*n + i] += s_ij_jk*vec[ij] + s_ik_jk*vec[ik]; + new_vec[ik*n + i] += s_ij_ik*vec[ij] + s_ik_jk*vec[jk]; /* ij jk bug?, relating to mat mul T? */ + //new_vec[ik*n + i] += s_ij_jk*vec[ij] + s_ik_jk*vec[jk]; /* ij jk bug? */ } /* k */ } /* j */ @@ -929,17 +932,19 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): Rijs_dev = cp.array(Rijs) vec_dev = cp.array(vec) - new_vec_dev = cp.zeros_like(vec) + # 2d over i then accum to avoid race on i + new_vec_dev = cp.zeros((vec.shape[0], n)) # call the kernel - blkszx = 128 + blkszx = 512 nblkx = (n + blkszx - 1) // blkszx - # blkszy = 2 - # nblky = (n+blkszy-1)//blkszy - # signs_times_v((nblkx,nblky), (blkszx,blkszy), (n, Rijs_dev, vec_dev, new_vec_dev)) signs_times_v((nblkx,), (blkszx,), (n, Rijs_dev, vec_dev, new_vec_dev)) - new_vec = new_vec_dev.get() + # accumulate, can reuse the vec_dev array now. + cp.sum(new_vec_dev, axis=1, out=vec_dev) + + # dtoh + new_vec = vec_dev.get() return new_vec diff --git a/x.py b/x.py index ea15631e36..02d39fe255 100644 --- a/x.py +++ b/x.py @@ -1,21 +1,86 @@ +import pickle +import time +from collections import defaultdict + import cupy as cp +import matplotlib.pyplot as plt import numpy as np from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy, _signs_times_v_host from aspire.utils import all_pairs -n = 4 -n_pairs = n * (n - 1) // 2 -_, _pairs_to_linear = all_pairs(n, return_map=True) -vec = np.ones(n_pairs, dtype=np.float64) -# Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) -Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3).astype(dtype=np.float64) +def time_test(n): + n_pairs = n * (n - 1) // 2 + _, _pairs_to_linear = all_pairs(n, return_map=True) + + vec = np.ones(n_pairs, dtype=np.float64) + # Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) + Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3).astype(dtype=np.float64) + + tic0 = time.perf_counter() + new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None) + tic1 = time.perf_counter() + gpu_time = tic1 - tic0 + print("gpu\n", new_vec) + + tic2 = time.perf_counter() + new_vec_host = _signs_times_v_host( + n, Rijs, vec, J_weighting=None, _ALTS=None, _pairs_to_linear=_pairs_to_linear + ) + tic3 = time.perf_counter() + host_time = tic3 - tic2 + print("host\n", new_vec_host) + + print(f"\n\n\nSize:\t{n}") + print("Allclose? ", np.allclose(new_vec_host, new_vec)) + print(f"gpu_time: {gpu_time}") + print(f"host_time: {host_time}") + speedup = host_time / gpu_time + print(f"speedup: {speedup}") + + return host_time, gpu_time, speedup + + +def plotit(results): + N = np.array(list(results.keys())) + H = np.array([v["host"] for v in results.values()]) + G = np.array([v["gpu"] for v in results.values()]) + S = np.array([v["speedup"] for v in results.values()]) + + plt.plot(N, H, label="host python") + plt.plot(N, G, label="cuda") + plt.title("Walltimes (s)") + plt.legend() + plt.show() + plt.savefig("walltimes.png") + plt.clf() + + plt.plot(N, S) + plt.title("Speedup Ratio") + plt.show() + plt.savefig("speedups.png") + plt.clf() + + +def main(): + results = defaultdict(dict) + # too long...! for n in [4,16,64,100,128,200,256,512,1024,2048,3000, 4096, 10000]: + # for n in [4,16]: # test + for n in [4, 16, 64, 100, 128, 200, 512]: + h, g, s = time_test(n) + results[n]["host"] = h + results[n]["gpu"] = g + results[n]["speedup"] = s + # save in case we cancel + with open("saved_results.pkl", "wb") as f: + pickle.dump(results, f) -new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None) -print("gpu\n", new_vec) + print() + print(results) + print() + plotit(results) -new_vec_host = _signs_times_v_host(n, Rijs, vec, J_weighting=None, _ALTS=None, _pairs_to_linear=_pairs_to_linear) -print("host\n", new_vec_host) +time_test(64) From 6cd68861f720bb58e7d842ae41914d89b7684cb4 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 12 Apr 2024 11:45:37 -0400 Subject: [PATCH 20/60] still debating that bug, but stashing here --- src/aspire/abinitio/commonline_sync3n.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index bf35c9c73e..68bc7dce9e 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -744,10 +744,10 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): Rjk_J = J_conjugate(Rjk) # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk.T) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk.T) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J.T) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk.T) - Rik_J) ** 2) + c[0] = np.sum(((Rjk @ Rij) - Rik) ** 2) + c[1] = np.sum(((Rjk @ Rij_J) - Rik) ** 2) + c[2] = np.sum(((Rjk_J @ Rij) - Rik) ** 2) + c[3] = np.sum(((Rjk @ Rij ) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -817,8 +817,8 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): int i,j; for (i=0; i<3; i++) { for (j=0;j<3;j++) { -// out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; - out[3*i+j] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; +// out[3*i+j] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; } } From 37c2f34a43aca6839008b772ea41e6495ecb9478 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 16 Apr 2024 08:19:03 -0400 Subject: [PATCH 21/60] autoconf gpu [skip ci] --- src/aspire/abinitio/commonline_sync3n.py | 106 +++++++++++++---------- x.py | 1 + 2 files changed, 59 insertions(+), 48 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 68bc7dce9e..fb5722f4e5 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -1,6 +1,5 @@ import logging -import cupy as cp import numpy as np from numpy.linalg import norm from scipy.optimize import curve_fit @@ -87,6 +86,22 @@ def __init__( self.J_weighting = J_weighting self._D_null = 1e-13 + # Auto configure GPU + self._use_gpu = False + try: + import cupy as cp + + if cp.cuda.runtime.getDeviceCount() >= 1: + gpu_id = cp.cuda.runtime.getDevice() + logger.info( + f"cupy and GPU {gpu_id} found by cuda runtime; enabling cupy." + ) + self._use_gpu = True + else: + logger.info("GPU not found, defaulting to numpy.") + except ModuleNotFoundError: + logger.info("cupy not found, defaulting numpy.") + ########################################### # High level algorithm steps # ########################################### @@ -286,7 +301,6 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): def _triangle_scores_mex(self, Rijs, hist_intervals): # The following is adopted from Matlab triangle_scores_mex.c - # The code should be thread/parallel safe over `i` when results are gathered (via sum). # Initialize probability result arrays cum_scores = np.zeros(len(Rijs), dtype=self.dtype) @@ -369,7 +383,6 @@ def _triangle_scores_mex(self, Rijs, hist_intervals): def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): # The following is adopted from Matlab pairas_probabilities_mex.c `looper` - # The code should be thread/parallel safe over `i` when results are gathered (via sum). # Initialize probability result arrays ln_f_ind = np.zeros(len(Rijs), dtype=self.dtype) @@ -690,18 +703,19 @@ def _J_sync_power_method(self, Rijs): def _signs_times_v(self, Rijs, vec): # host/gpu dispatch - # new_vec = _signs_times_v_host(self.n_img, Rijs, vec, self.J_weighting, _ALTS, self._pairs_to_linear) - - assert self.J_weighting == False, "not implemented yet" - new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting, _ALTS) + if self._use_gpu: + assert self.J_weighting is False, "not implemented yet" + new_vec = _signs_times_v_cupy( + self.n_img, Rijs, vec, self.J_weighting, _ALTS + ) + else: + new_vec = _signs_times_v_host( + self.n_img, Rijs, vec, self.J_weighting, _ALTS, self._pairs_to_linear + ) return new_vec -def PAIR_IDX(N, I, J): - return (2 * N - I - 1) * I // 2 + J - I - 1 - - def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): """ Ported from _signs_times_v_mex.c @@ -714,7 +728,6 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): _ALTS= 2x4x3 const lut array _signs_confs = 4x3 const lut array """ - # The code should be thread/parallel safe over `i`. new_vec = np.zeros_like(vec) @@ -728,13 +741,10 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): for i in trange(n, desc=desc): for j in range(i + 1, n - 1): # check bound (taken from MATLAB mex) ij = _pairs_to_linear[i, j] - # ij = PAIR_IDX(n, i, j) Rij = Rijs[ij] for k in range(j + 1, n): ik = _pairs_to_linear[i, k] jk = _pairs_to_linear[j, k] - # ik = PAIR_IDX(n, i, k) - # jk = PAIR_IDX(n, j, k) Rik = Rijs[ik] Rjk = Rijs[jk] @@ -747,7 +757,7 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): c[0] = np.sum(((Rjk @ Rij) - Rik) ** 2) c[1] = np.sum(((Rjk @ Rij_J) - Rik) ** 2) c[2] = np.sum(((Rjk_J @ Rij) - Rik) ** 2) - c[3] = np.sum(((Rjk @ Rij ) - Rik_J) ** 2) + c[3] = np.sum(((Rjk @ Rij) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -802,7 +812,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): #todo _ALTS= 2x4x3 const lut array #todo _signs_confs = 4x3 const lut array """ - # The code should be thread/parallel safe over `i`. + import cupy as cp code = r""" @@ -814,35 +824,35 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): inline void mult_3x3(double *out, double *R1, double *R2) { // /* 3X3 matrices multiplication: out = R1*R2 */ // out.T = R1.T @ R2.T ? - int i,j; - for (i=0; i<3; i++) { - for (j=0;j<3;j++) { - out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; -// out[3*i+j] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; - - } - } + int i,j; + for (i=0; i<3; i++) { + for (j=0;j<3;j++) { + out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; +// out[3*i+j] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + + } + } } inline void JRJ(double *R, double *A) { /* multiple 3X3 matrix by J from both sizes: A = JRJ */ - A[0]=R[0]; - A[1]=R[1]; - A[2]=-R[2]; - A[3]=R[3]; - A[4]=R[4]; - A[5]=-R[5]; - A[6]=-R[6]; - A[7]=-R[7]; - A[8]=R[8]; + A[0]=R[0]; + A[1]=R[1]; + A[2]=-R[2]; + A[3]=R[3]; + A[4]=R[4]; + A[5]=-R[5]; + A[6]=-R[6]; + A[7]=-R[7]; + A[8]=R[8]; } inline double diff_norm_3x3(const double *R1, const double *R2) { /* difference 2 matrices and return squared norm: ||R1-R2||^2 */ - int i; - double norm = 0; - for (i=0; i<9; i++) {norm += (R1[i]-R2[i])*(R1[i]-R2[i]);} - return norm; + int i; + double norm = 0; + for (i=0; i<9; i++) {norm += (R1[i]-R2[i])*(R1[i]-R2[i]);} + return norm; } @@ -874,7 +884,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): signs_confs[2-1][1-1]=-1; signs_confs[2-1][3-1]=-1; signs_confs[3-1][1-1]=-1; signs_confs[3-1][2-1]=-1; signs_confs[4-1][2-1]=-1; signs_confs[4-1][3-1]=-1; - + for(j=i+1; j< (n - 1); j++){ ij = PAIR_IDX(n, i, j); @@ -883,32 +893,32 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): jk = PAIR_IDX(n, j, k); /* compute configurations matches scores */ - Rij = Rijs + 9*ij; - Rjk = Rijs + 9*jk; + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; Rik = Rijs + 9*ik; - + JRJ(Rij, JRijJ); JRJ(Rjk, JRjkJ); JRJ(Rik, JRikJ); - + mult_3x3(tmp,Rij,Rjk); c[0] = diff_norm_3x3(tmp,Rik); - + mult_3x3(tmp,JRijJ,Rjk); c[1] = diff_norm_3x3(tmp,Rik); - + mult_3x3(tmp,Rij,JRjkJ); c[2] = diff_norm_3x3(tmp,Rik); - + mult_3x3(tmp,Rij,Rjk); c[3] = diff_norm_3x3(tmp,JRikJ); - + /* find best match */ best_i=0; best_val=c[0]; if (c[1] Date: Tue, 23 Apr 2024 14:20:18 -0400 Subject: [PATCH 22/60] rm debug comments after checking matmul --- src/aspire/abinitio/commonline_sync3n.py | 23 ++++++++--------------- x.py | 2 +- 2 files changed, 9 insertions(+), 16 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index fb5722f4e5..ee39d732cc 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -793,8 +793,6 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): # Update vector entries new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] - # new_vec[ik] += s_ij_jk * vec[ij] + s_ik_jk * vec[jk] # jk/ik? was a bug?? worked better with s_ij_jk... - # jk/ik? was a bug?? worked better with s_ij_jk... new_vec[ik] += s_ij_ik * vec[ij] + s_ik_jk * vec[jk] return new_vec @@ -820,18 +818,14 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): #define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) -// DEBUG TRANS BUGS inline void mult_3x3(double *out, double *R1, double *R2) { -// /* 3X3 matrices multiplication: out = R1*R2 */ -// out.T = R1.T @ R2.T ? - int i,j; - for (i=0; i<3; i++) { - for (j=0;j<3;j++) { - out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; -// out[3*i+j] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; - - } - } + /* 3X3 matrices multiplication: out = R1*R2 */ + int i,j; + for (i=0; i<3; i++) { + for (j=0;j<3;j++) { + out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + } + } } inline void JRJ(double *R, double *A) { @@ -927,8 +921,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): /* update multiplication */ new_vec[ij*n + i] += s_ij_jk*vec[jk] + s_ij_ik*vec[ik]; new_vec[jk*n + i] += s_ij_jk*vec[ij] + s_ik_jk*vec[ik]; - new_vec[ik*n + i] += s_ij_ik*vec[ij] + s_ik_jk*vec[jk]; /* ij jk bug?, relating to mat mul T? */ - //new_vec[ik*n + i] += s_ij_jk*vec[ij] + s_ik_jk*vec[jk]; /* ij jk bug? */ + new_vec[ik*n + i] += s_ij_ik*vec[ij] + s_ik_jk*vec[jk]; } /* k */ } /* j */ diff --git a/x.py b/x.py index 9bdbaf7182..a82da49ae7 100644 --- a/x.py +++ b/x.py @@ -84,4 +84,4 @@ def main(): plotit(results) -time_test(64) +time_test(128) From ea391fb0e2c13d1d59b74b8a58b5f46d4492d5eb Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 23 Apr 2024 15:57:19 -0400 Subject: [PATCH 23/60] fixup Rijk_lmnop muls [skip ci] --- src/aspire/abinitio/commonline_sync3n.py | 88 +++++++++++++++++------- x.py | 6 +- 2 files changed, 65 insertions(+), 29 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index ee39d732cc..277b87bfd5 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -705,9 +705,7 @@ def _signs_times_v(self, Rijs, vec): # host/gpu dispatch if self._use_gpu: assert self.J_weighting is False, "not implemented yet" - new_vec = _signs_times_v_cupy( - self.n_img, Rijs, vec, self.J_weighting, _ALTS - ) + new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting) else: new_vec = _signs_times_v_host( self.n_img, Rijs, vec, self.J_weighting, _ALTS, self._pairs_to_linear @@ -726,7 +724,6 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): new_vec: output array J_weighting: bool _ALTS= 2x4x3 const lut array - _signs_confs = 4x3 const lut array """ new_vec = np.zeros_like(vec) @@ -734,6 +731,7 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): _signs_confs = np.array( [[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int ) + c = np.empty((4)) desc = "Computing signs_times_v" if J_weighting: @@ -754,10 +752,10 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): Rjk_J = J_conjugate(Rjk) # Compute R muls and norms - c[0] = np.sum(((Rjk @ Rij) - Rik) ** 2) - c[1] = np.sum(((Rjk @ Rij_J) - Rik) ** 2) - c[2] = np.sum(((Rjk_J @ Rij) - Rik) ** 2) - c[3] = np.sum(((Rjk @ Rij) - Rik_J) ** 2) + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -798,7 +796,7 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): return new_vec -def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): +def _signs_times_v_cupy(n, Rijs, vec, J_weighting): """ Ported from _signs_times_v_mex.c @@ -806,9 +804,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): Rijs: nchoose2x3x3 array vec: input array new_vec: output array - #todo J_weighting: bool - #todo _ALTS= 2x4x3 const lut array - #todo _signs_confs = 4x3 const lut array + J_weighting: bool """ import cupy as cp @@ -851,7 +847,7 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): extern "C" __global__ -void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec) +void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec, bool J_weighting) { /* thread index (1d), represents "i" index */ unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; @@ -860,24 +856,40 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): if(i >= n) return; double c[4]; - int j; - int k; + unsigned int j; + unsigned int k; for(k=0;k<4;k++){c[k]=0;} unsigned long ij, jk, ik; int best_i; double best_val; - int s_ij_jk, s_ik_jk, s_ij_ik; + double s_ij_jk, s_ik_jk, s_ij_ik; + double alt_ij_jk, alt_ij_ik, alt_ik_jk; double *Rij, *Rjk, *Rik; double JRijJ[9], JRjkJ[9], JRikJ[9]; double tmp[9]; - /* le sigh */ int signs_confs[4][3]; for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } - signs_confs[2-1][1-1]=-1; signs_confs[2-1][3-1]=-1; - signs_confs[3-1][1-1]=-1; signs_confs[3-1][2-1]=-1; - signs_confs[4-1][2-1]=-1; signs_confs[4-1][3-1]=-1; + signs_confs[1][0]=-1; signs_confs[1][2]=-1; + signs_confs[2][0]=-1; signs_confs[2][1]=-1; + signs_confs[3][1]=-1; signs_confs[3][2]=-1; + + /* initialize alternatives */ + /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. + * this comparison is done for every pair in the triplete independently. to make sure that the + * alternative is indeed different in relation to the pair, we document the differences between + * the configurations in advance: + * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from + * best_conf in relation to pair */ + + int ALTS[2][4][3]; + ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; + ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; + ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; + ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; + ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; + ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; for(j=i+1; j< (n - 1); j++){ @@ -895,16 +907,16 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): JRJ(Rjk, JRjkJ); JRJ(Rik, JRikJ); - mult_3x3(tmp,Rij,Rjk); + mult_3x3(tmp,Rjk,Rij); c[0] = diff_norm_3x3(tmp,Rik); - mult_3x3(tmp,JRijJ,Rjk); + mult_3x3(tmp,Rjk,JRijJ); c[1] = diff_norm_3x3(tmp,Rik); - mult_3x3(tmp,Rij,JRjkJ); + mult_3x3(tmp,JRjkJ,Rij); c[2] = diff_norm_3x3(tmp,Rik); - mult_3x3(tmp,Rij,Rjk); + mult_3x3(tmp,Rjk,Rij); c[3] = diff_norm_3x3(tmp,JRikJ); /* find best match */ @@ -918,6 +930,30 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): s_ik_jk = signs_confs[best_i][1]; s_ij_ik = signs_confs[best_i][2]; + /* J weighting */ + if(J_weighting){ + /* for each triangle side, find the best alternative */ + alt_ij_jk = c[ALTS[0][best_i][0]]; + if (c[ALTS[1][best_i][0]] < alt_ij_jk){ + alt_ij_jk = c[ALTS[1][best_i][0]]; + } + + alt_ik_jk = c[ALTS[0][best_i][1]]; + if (c[ALTS[1][best_i][1]] < alt_ik_jk){ + alt_ik_jk = c[ALTS[1][best_i][1]]; + } + alt_ij_ik = c[ALTS[0][best_i][2]]; + if (c[ALTS[1][best_i][2]] < alt_ij_ik){ + alt_ij_ik = c[ALTS[1][best_i][2]]; + } + + /* Update scores */ + s_ij_jk *= 1 - sqrt(best_val / alt_ij_jk); + s_ik_jk *= 1 - sqrt(best_val / alt_ik_jk); + s_ij_ik *= 1 - sqrt(best_val / alt_ij_ik); + } + + /* update multiplication */ new_vec[ij*n + i] += s_ij_jk*vec[jk] + s_ij_ik*vec[ik]; new_vec[jk*n + i] += s_ij_jk*vec[ij] + s_ik_jk*vec[ik]; @@ -941,8 +977,8 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting, _ALTS): # call the kernel blkszx = 512 nblkx = (n + blkszx - 1) // blkszx - - signs_times_v((nblkx,), (blkszx,), (n, Rijs_dev, vec_dev, new_vec_dev)) + assert J_weighting == False + signs_times_v((nblkx,), (blkszx,), (n, Rijs_dev, vec_dev, new_vec_dev, J_weighting)) # accumulate, can reuse the vec_dev array now. cp.sum(new_vec_dev, axis=1, out=vec_dev) diff --git a/x.py b/x.py index a82da49ae7..ebc5e6d768 100644 --- a/x.py +++ b/x.py @@ -19,14 +19,14 @@ def time_test(n): Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3).astype(dtype=np.float64) tic0 = time.perf_counter() - new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=None, _ALTS=None) + new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=False) tic1 = time.perf_counter() gpu_time = tic1 - tic0 print("gpu\n", new_vec) tic2 = time.perf_counter() new_vec_host = _signs_times_v_host( - n, Rijs, vec, J_weighting=None, _ALTS=None, _pairs_to_linear=_pairs_to_linear + n, Rijs, vec, J_weighting=False, _ALTS=None, _pairs_to_linear=_pairs_to_linear ) tic3 = time.perf_counter() host_time = tic3 - tic2 @@ -84,4 +84,4 @@ def main(): plotit(results) -time_test(128) +time_test(64) From 46c72dbde8ef2f984afb980283679e52ac6e1bce Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 24 Apr 2024 09:58:57 -0400 Subject: [PATCH 24/60] re-implement matmul [skip ci] --- src/aspire/abinitio/commonline_sync3n.py | 33 ++++++++++++++---------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 277b87bfd5..510d789ac8 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -815,11 +815,18 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting): inline void mult_3x3(double *out, double *R1, double *R2) { - /* 3X3 matrices multiplication: out = R1*R2 */ - int i,j; - for (i=0; i<3; i++) { - for (j=0;j<3;j++) { - out[3*j+i] = R1[3*0+i]*R2[3*j+0] + R1[3*1+i]*R2[3*j+1] + R1[3*2+i]*R2[3*j+2]; + /* 3X3 matrices multiplication: out = R1*R2 + * Note, this differs from the MATLAB mult_3x3. + */ + + int i,j,k; + + for(i=0; i<3; i++){ + for(j=0; j<3; j++){ + out[i*3 + j] = 0; + for (k=0; k<3; k++){ + out[i*3 + j] += R1[i*3+k] * R2[k*3+j]; + } } } } @@ -907,17 +914,17 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting): JRJ(Rjk, JRjkJ); JRJ(Rik, JRikJ); - mult_3x3(tmp,Rjk,Rij); - c[0] = diff_norm_3x3(tmp,Rik); + mult_3x3(tmp, Rij, Rjk); + c[0] = diff_norm_3x3(tmp, Rik); - mult_3x3(tmp,Rjk,JRijJ); - c[1] = diff_norm_3x3(tmp,Rik); + mult_3x3(tmp, JRijJ, Rjk); + c[1] = diff_norm_3x3(tmp, Rik); - mult_3x3(tmp,JRjkJ,Rij); - c[2] = diff_norm_3x3(tmp,Rik); + mult_3x3(tmp, Rij, JRjkJ); + c[2] = diff_norm_3x3(tmp, Rik); - mult_3x3(tmp,Rjk,Rij); - c[3] = diff_norm_3x3(tmp,JRikJ); + mult_3x3(tmp, Rij, Rjk); + c[3] = diff_norm_3x3(tmp, JRikJ); /* find best match */ best_i=0; best_val=c[0]; From e7e4e293f1683a34ec3416811f9dcfa691e187b5 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 24 Apr 2024 16:43:38 -0400 Subject: [PATCH 25/60] add pairs prob kernel [skip ci] --- src/aspire/abinitio/commonline_sync3n.py | 405 ++++++++++++++++------- 1 file changed, 294 insertions(+), 111 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 510d789ac8..1f64cf7e12 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -299,7 +299,7 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): return W - def _triangle_scores_mex(self, Rijs, hist_intervals): + def _triangle_scores_inner(self, Rijs, hist_intervals): # The following is adopted from Matlab triangle_scores_mex.c # Initialize probability result arrays @@ -326,10 +326,10 @@ def _triangle_scores_mex(self, Rijs, hist_intervals): Rjk_J = J_conjugate(Rjk) # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk.T) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk.T) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J.T) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk.T) - Rik_J) ** 2) + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) # Find best match best_i = np.argmin(c) @@ -382,95 +382,15 @@ def _triangle_scores_mex(self, Rijs, hist_intervals): return cum_scores, scores_hist def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): - # The following is adopted from Matlab pairas_probabilities_mex.c `looper` - - # Initialize probability result arrays - ln_f_ind = np.zeros(len(Rijs), dtype=self.dtype) - ln_f_arb = np.zeros(len(Rijs), dtype=self.dtype) - - c = np.empty((4), dtype=self.dtype) - for i in trange(self.n_img, desc="Computing pair probabilities"): - for j in range(i + 1, self.n_img - 1): - ij = self._pairs_to_linear[i, j] - Rij = Rijs[ij] - for k in range(j + 1, self.n_img): - ik = self._pairs_to_linear[i, k] - jk = self._pairs_to_linear[j, k] - Rik = Rijs[ik] - Rjk = Rijs[jk] - - # Compute conjugated rotats - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) - - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk.T) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk.T) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J.T) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk.T) - Rik_J) ** 2) - - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] - - # For each triangle side, find the best alternative - alt_ij_jk = c[_ALTS[0][best_i][0]] - if c[_ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[_ALTS[1][best_i][0]] - alt_ik_jk = c[_ALTS[0][best_i][1]] - if c[_ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[_ALTS[1][best_i][1]] - alt_ij_ik = c[_ALTS[0][best_i][2]] - if c[_ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[_ALTS[1][best_i][2]] - - # Compute scores - s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) - s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) - s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) - - # Update probabilities - # # Probability of pair ij having score given indicicative common line - # P2, B, b, x0, A, a - f_ij_jk = np.log( - P2 - * ( - B - * np.power(1 - s_ij_jk, b) - * np.exp(-b / (1 - x0) * (1 - s_ij_jk)) - ) - + (1 - P2) * A * np.power((1 - s_ij_jk), a) - ) - f_ik_jk = np.log( - P2 - * ( - B - * np.power(1 - s_ik_jk, b) - * np.exp(-b / (1 - x0) * (1 - s_ik_jk)) - ) - + (1 - P2) * A * np.power((1 - s_ik_jk), a) - ) - f_ij_ik = np.log( - P2 - * ( - B - * np.power(1 - s_ij_ik, b) - * np.exp(-b / (1 - x0) * (1 - s_ij_ik)) - ) - + (1 - P2) * A * np.power((1 - s_ij_ik), a) - ) - ln_f_ind[ij] += f_ij_jk + f_ij_ik - ln_f_ind[jk] += f_ij_jk + f_ik_jk - ln_f_ind[ik] += f_ik_jk + f_ij_ik - - # # Probability of pair ij having score given arbitrary common line - f_ij_jk = np.log(A * np.power((1 - s_ij_jk), a)) - f_ik_jk = np.log(A * np.power((1 - s_ik_jk), a)) - f_ij_ik = np.log(A * np.power((1 - s_ij_ik), a)) - ln_f_arb[ij] += f_ij_jk + f_ij_ik - ln_f_arb[jk] += f_ij_jk + f_ik_jk - ln_f_arb[ik] += f_ik_jk + f_ij_ik + # dtype is critical for passing into C code... + params = np.arary([P2, A, a, B, b, x0], dtype=np.float64) + # host/gpu dispatch + if self._use_gpu: + ln_f_ind, ln_f_arb = _pairs_probabilities_cupy(self.n_img, Rijs, *params) + else: + ln_f_ind, ln_f_arb = _pairs_probabilities_host( + self.n_img, Rijs, *params, _ALTS, self._pairs_to_linear + ) return ln_f_ind, ln_f_arb @@ -507,7 +427,7 @@ def _triangle_scores( cum_scores = None # XXX Why do we even need cum_scores? if scores_hist is None: - cum_scores, scores_hist = self._triangle_scores_mex(Rijs, hist_intervals) + cum_scores, scores_hist = self._triangle_scores_inner(Rijs, hist_intervals) # Normalize cumulated scores cum_scores /= len(Rijs) @@ -704,7 +624,6 @@ def _signs_times_v(self, Rijs, vec): # host/gpu dispatch if self._use_gpu: - assert self.J_weighting is False, "not implemented yet" new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting) else: new_vec = _signs_times_v_host( @@ -796,19 +715,8 @@ def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): return new_vec -def _signs_times_v_cupy(n, Rijs, vec, J_weighting): - """ - Ported from _signs_times_v_mex.c - - n: n_img - Rijs: nchoose2x3x3 array - vec: input array - new_vec: output array - J_weighting: bool - """ - import cupy as cp - - code = r""" +def _init_cupy_module(): + module_code = r""" /* from i,j indoces to the common index in the N-choose-2 sized array */ #define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) @@ -971,9 +879,157 @@ def _signs_times_v_cupy(n, Rijs, vec, J_weighting): return; }; + +extern "C" __global__ +void pairs_probabilities(int n, double* Rijs, double P2, double A, double a, double B, double b, double x0, double* ln_f_ind, double* ln_f_arb) +{ + /* thread index (1d), represents "i" index */ + unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; + + /* no-op when out of bounds */ + if(i >= n) return; + + double c[4]; + unsigned int j; + unsigned int k; + for(k=0;k<4;k++){c[k]=0;} + unsigned long ij, jk, ik; + int best_i; + double best_val; + double s_ij_jk, s_ik_jk, s_ij_ik; + double alt_ij_jk, alt_ij_ik, alt_ik_jk; + double f_ij_jk, f_ik_jk, f_ij_ik; + + + double *Rij, *Rjk, *Rik; + double JRijJ[9], JRjkJ[9], JRikJ[9]; + double tmp[9]; + + int signs_confs[4][3]; + for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } + signs_confs[1][0]=-1; signs_confs[1][2]=-1; + signs_confs[2][0]=-1; signs_confs[2][1]=-1; + signs_confs[3][1]=-1; signs_confs[3][2]=-1; + + /* initialize alternatives */ + /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. + * this comparison is done for every pair in the triplete independently. to make sure that the + * alternative is indeed different in relation to the pair, we document the differences between + * the configurations in advance: + * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from + * best_conf in relation to pair */ + + int ALTS[2][4][3]; + ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; + ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; + ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; + ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; + ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; + ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; + + + for(j=i+1; j< (n - 1); j++){ + ij = PAIR_IDX(n, i, j); + for(k=j+1; k< n; k++){ + ik = PAIR_IDX(n, i, k); + jk = PAIR_IDX(n, j, k); + + /* compute configurations matches scores */ + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; + Rik = Rijs + 9*ik; + + JRJ(Rij, JRijJ); + JRJ(Rjk, JRjkJ); + JRJ(Rik, JRikJ); + + mult_3x3(tmp, Rij, Rjk); + c[0] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, JRijJ, Rjk); + c[1] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, JRjkJ); + c[2] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, Rjk); + c[3] = diff_norm_3x3(tmp, JRikJ); + + /* find best match */ + best_i=0; best_val=c[0]; + if (c[1] Date: Fri, 26 Apr 2024 08:59:29 -0400 Subject: [PATCH 26/60] add triangle scores cupy kernel --- src/aspire/abinitio/commonline_sync3n.py | 358 +++++++++++++++++------ 1 file changed, 271 insertions(+), 87 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 1f64cf7e12..948accdd7f 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -300,84 +300,16 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): return W def _triangle_scores_inner(self, Rijs, hist_intervals): - # The following is adopted from Matlab triangle_scores_mex.c - # Initialize probability result arrays - cum_scores = np.zeros(len(Rijs), dtype=self.dtype) - scores_hist = np.zeros(hist_intervals, dtype=self.dtype) - h = 1 / hist_intervals - - c = np.empty((4), dtype=self.dtype) - for i in trange(self.n_img, desc="Computing triangle scores"): - for j in range( - i + 1, self.n_img - 1 - ): # check bound (taken from MATLAB mex) - ij = self._pairs_to_linear[i, j] - Rij = Rijs[ij] - for k in range(j + 1, self.n_img): - ik = self._pairs_to_linear[i, k] - jk = self._pairs_to_linear[j, k] - Rik = Rijs[ik] - Rjk = Rijs[jk] - - # Compute conjugated rotats - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) - - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) - - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] - - # For each triangle side, find the best alternative - alt_ij_jk = c[_ALTS[0][best_i][0]] - if c[_ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[_ALTS[1][best_i][0]] - - alt_ik_jk = c[_ALTS[0][best_i][1]] - if c[_ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[_ALTS[1][best_i][1]] - - alt_ij_ik = c[_ALTS[0][best_i][2]] - if c[_ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[_ALTS[1][best_i][2]] - - # Compute scores - s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) - s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) - s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) - - # Update cumulated scores - cum_scores[ij] += s_ij_jk + s_ij_ik - cum_scores[jk] += s_ij_jk + s_ik_jk - cum_scores[ik] += s_ik_jk + s_ij_ik - - # Update histogram - threshold = 0 - for _l1 in range(hist_intervals): - threshold += h - if s_ij_jk < threshold: - break - - for _l2 in range(hist_intervals): - threshold += h - if s_ik_jk < threshold: - break - - for _l3 in range(hist_intervals): - threshold += h - if s_ij_ik < threshold: - break - - scores_hist[_l1] += 1 - scores_hist[_l2] += 1 - scores_hist[_l3] += 1 + # host/gpu dispatch + if self._use_gpu: + cum_scores, scores_hist = _triangle_scores_inner_cupy( + self.n_img, Rijs, hist_intervals + ) + else: + cum_scores, scores_hist = _triangle_scores_inner_host( + self.n_img, Rijs, hist_intervals, _ALTS, self._pairs_to_linear + ) return cum_scores, scores_hist @@ -980,24 +912,24 @@ def _init_cupy_module(): s_ij_jk = 1 - sqrt(best_val / alt_ij_jk); s_ik_jk = 1 - sqrt(best_val / alt_ik_jk); s_ij_ik = 1 - sqrt(best_val / alt_ij_ik); - + /* the probability of a pair ij to have the observed triangles scores, given it has an indicative common line */ f_ij_jk = log( P2*(B*pow(1-s_ij_jk,b)*exp(-b/(1-x0)*(1-s_ij_jk))) + (1-P2)*A*pow((1-s_ij_jk),a) ); f_ik_jk = log( P2*(B*pow(1-s_ik_jk,b)*exp(-b/(1-x0)*(1-s_ik_jk))) + (1-P2)*A*pow((1-s_ik_jk),a) ); f_ij_ik = log( P2*(B*pow(1-s_ij_ik,b)*exp(-b/(1-x0)*(1-s_ij_ik))) + (1-P2)*A*pow((1-s_ij_ik),a) ); - ln_f_ind[ij*n +i] += f_ij_jk + f_ij_ik; - ln_f_ind[jk*n +i] += f_ij_jk + f_ik_jk; - ln_f_ind[ik*n +i] += f_ik_jk + f_ij_ik; - + ln_f_ind[ij*n +i] += f_ij_jk + f_ij_ik; + ln_f_ind[jk*n +i] += f_ij_jk + f_ik_jk; + ln_f_ind[ik*n +i] += f_ik_jk + f_ij_ik; + /* the probability of a pair ij to have the observed triangles scores, given it has an arbitrary common line */ f_ij_jk = log( A*pow((1-s_ij_jk),a) ); - f_ik_jk = log( A*pow((1-s_ik_jk),a) ); - f_ij_ik = log( A*pow((1-s_ij_ik),a) ); - ln_f_arb[ij*n +i] += f_ij_jk + f_ij_ik; - ln_f_arb[jk*n +i] += f_ij_jk + f_ik_jk; + f_ik_jk = log( A*pow((1-s_ik_jk),a) ); + f_ij_ik = log( A*pow((1-s_ij_ik),a) ); + ln_f_arb[ij*n +i] += f_ij_jk + f_ij_ik; + ln_f_arb[jk*n +i] += f_ij_jk + f_ik_jk; ln_f_arb[ik*n +i] += f_ik_jk + f_ij_ik; @@ -1007,6 +939,138 @@ def _init_cupy_module(): return; }; + +extern "C" __global__ +void triangle_scores_inner(int n, double* Rijs, int n_intervals, double* cum_scores, double* scores_hist) +{ + /* thread index (1d), represents "i" index */ + unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; + + /* no-op when out of bounds */ + if(i >= n) return; + + double c[4]; + unsigned int j; + unsigned int k; + for(k=0;k<4;k++){c[k]=0;} + unsigned long ij, jk, ik; + int best_i; + double best_val; + double s_ij_jk, s_ik_jk, s_ij_ik; + double alt_ij_jk, alt_ij_ik, alt_ik_jk; + unsigned int l1,l2,l3; + double threshold; + double h = 1. / n_intervals; + + double *Rij, *Rjk, *Rik; + double JRijJ[9], JRjkJ[9], JRikJ[9]; + double tmp[9]; + + /* initialize alternatives */ + /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. + * this comparison is done for every pair in the triplete independently. to make sure that the + * alternative is indeed different in relation to the pair, we document the differences between + * the configurations in advance: + * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from + * best_conf in relation to pair */ + + int ALTS[2][4][3]; + ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; + ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; + ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; + ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; + ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; + ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; + + + for(j=i+1; j< (n - 1); j++){ + ij = PAIR_IDX(n, i, j); + for(k=j+1; k< n; k++){ + ik = PAIR_IDX(n, i, k); + jk = PAIR_IDX(n, j, k); + + /* compute configurations matches scores */ + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; + Rik = Rijs + 9*ik; + + JRJ(Rij, JRijJ); + JRJ(Rjk, JRjkJ); + JRJ(Rik, JRikJ); + + mult_3x3(tmp, Rij, Rjk); + c[0] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, JRijJ, Rjk); + c[1] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, JRjkJ); + c[2] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, Rjk); + c[3] = diff_norm_3x3(tmp, JRikJ); + + /* find best match */ + best_i=0; best_val=c[0]; + if (c[1] Date: Fri, 26 Apr 2024 09:32:52 -0400 Subject: [PATCH 27/60] initial bulk refactor --- src/aspire/abinitio/commonline_sync3n.py | 1219 +++++++--------------- 1 file changed, 400 insertions(+), 819 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 948accdd7f..7c9d65c81b 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -11,28 +11,28 @@ logger = logging.getLogger(__name__) -# Initialize alternatives -# -# When we find the best J-configuration, we also compare it to the alternative 2nd best one. -# this comparison is done for every pair in the triplete independently. to make sure that the -# alternative is indeed different in relation to the pair, we document the differences between -# the configurations in advance: -# ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from best_conf in relation to pair - -_ALTS = np.array( - [ - [[1, 2, 1], [0, 2, 0], [0, 0, 1], [1, 0, 0]], - [[2, 3, 3], [3, 3, 2], [3, 1, 3], [2, 1, 2]], - ], - dtype=int, -) - class CLSync3N(CLOrient3D, SyncVotingMixin): """ Define a class to estimate 3D orientations using common lines Sync3N methods (2017). """ + # Initialize alternatives + # + # When we find the best J-configuration, we also compare it to the alternative 2nd best one. + # this comparison is done for every pair in the triplete independently. to make sure that the + # alternative is indeed different in relation to the pair, we document the differences between + # the configurations in advance: + # ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from best_conf in relation to pair + + _ALTS = np.array( + [ + [[1, 2, 1], [0, 2, 0], [0, 0, 1], [1, 0, 0]], + [[2, 3, 3], [3, 3, 2], [3, 1, 3], [2, 1, 2]], + ], + dtype=int, + ) + def __init__( self, src, @@ -47,6 +47,7 @@ def __init__( mask=True, S_weighting=False, J_weighting=False, + hist_intervals=100, ): """ Initialize object for estimating 3D orientations. @@ -85,9 +86,10 @@ def __init__( self.S_weighting = S_weighting self.J_weighting = J_weighting self._D_null = 1e-13 + self.hist_intervals = hist_intervals # Auto configure GPU - self._use_gpu = False + self._gpu_module = None try: import cupy as cp @@ -96,9 +98,10 @@ def __init__( logger.info( f"cupy and GPU {gpu_id} found by cuda runtime; enabling cupy." ) - self._use_gpu = True + self._gpu_module = _init_cupy_module() else: logger.info("GPU not found, defaulting to numpy.") + except ModuleNotFoundError: logger.info("cupy not found, defaulting numpy.") @@ -299,17 +302,140 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): return W - def _triangle_scores_inner(self, Rijs, hist_intervals): + def _triangle_scores_inner(self, Rijs): # host/gpu dispatch - if self._use_gpu: - cum_scores, scores_hist = _triangle_scores_inner_cupy( - self.n_img, Rijs, hist_intervals - ) + if self._gpu_module: + cum_scores, scores_hist = self._triangle_scores_inner_cupy(Rijs) else: - cum_scores, scores_hist = _triangle_scores_inner_host( - self.n_img, Rijs, hist_intervals, _ALTS, self._pairs_to_linear - ) + cum_scores, scores_hist = self._triangle_scores_inner_host(Rijs) + + return cum_scores, scores_hist + + def _triangle_scores_inner_host(self, Rijs): + + # The following is adopted from Matlab triangle_scores_mex.c + + # Initialize probability result arrays + cum_scores = np.zeros(len(Rijs), dtype=Rijs.dtype) + scores_hist = np.zeros(self.hist_intervals, dtype=Rijs.dtype) + h = 1 / self.hist_intervals + + c = np.empty((4), dtype=Rijs.dtype) + for i in trange(self.n_img, desc="Computing triangle scores"): + for j in range( + i + 1, self.n_img - 1 + ): # check bound (taken from MATLAB mex) + ij = self._pairs_to_linear[i, j] + Rij = Rijs[ij] + for k in range(j + 1, self.n_img): + ik = self._pairs_to_linear[i, k] + jk = self._pairs_to_linear[j, k] + Rik = Rijs[ik] + Rjk = Rijs[jk] + + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + alt_ij_jk = c[self._ALTS[0][best_i][0]] + if c[self._ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[self._ALTS[1][best_i][0]] + + alt_ik_jk = c[self._ALTS[0][best_i][1]] + if c[self._ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[self._ALTS[1][best_i][1]] + + alt_ij_ik = c[self._ALTS[0][best_i][2]] + if c[self._ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[self._ALTS[1][best_i][2]] + + # Compute scores + s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) + s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) + s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) + + # Update cumulated scores + cum_scores[ij] += s_ij_jk + s_ij_ik + cum_scores[jk] += s_ij_jk + s_ik_jk + cum_scores[ik] += s_ik_jk + s_ij_ik + + # Update histogram + threshold = 0 + for _l1 in range(self.hist_intervals - 1): + threshold += h + if s_ij_jk < threshold: + break + + threshold = 0 + for _l2 in range(self.hist_intervals - 1): + threshold += h + if s_ik_jk < threshold: + break + + threshold = 0 + for _l3 in range(self.hist_intervals - 1): + threshold += h + if s_ij_ik < threshold: + break + + scores_hist[_l1] += 1 + scores_hist[_l2] += 1 + scores_hist[_l3] += 1 + + return cum_scores, scores_hist + + def _triangle_scores_inner_cupy(self, Rijs): + """ + n: n_img + Rijs: nchoose2x3x3 array + + """ + import cupy as cp + + triangle_scores = self._gpu_module.get_function("triangle_scores_inner") + + Rijs_dev = cp.array(Rijs) + + # xxx I think we can safely remove cum_scores + cum_scores_dev = cp.zeros( + (n_img * (n_img - 1) // 2, n_img), dtype=np.float64 + ) # n is for thread safety + + scores_hist_dev = cp.zeros( + (hist_intervals, n_img), dtype=np.float64 + ) # n is for thread safety + + # call the kernel + blkszx = 512 + nblkx = (n_img + blkszx - 1) // blkszx + triangle_scores( + (nblkx,), + (blkszx,), + ( + self.n_img, + Rijs_dev, + self.hist_intervals, + cum_scores_dev, + scores_hist_dev, + ), + ) + + # accumulate over thread results + cum_scores = cp.sum(cum_scores_dev, axis=1).get() + scores_hist = cp.sum(scores_hist_dev, axis=1).get() return cum_scores, scores_hist @@ -317,12 +443,136 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): # dtype is critical for passing into C code... params = np.arary([P2, A, a, B, b, x0], dtype=np.float64) # host/gpu dispatch - if self._use_gpu: - ln_f_ind, ln_f_arb = _pairs_probabilities_cupy(self.n_img, Rijs, *params) + if self._gpu_module: + ln_f_ind, ln_f_arb = self._pairs_probabilities_cupy(Rijs, *params) else: - ln_f_ind, ln_f_arb = _pairs_probabilities_host( - self.n_img, Rijs, *params, _ALTS, self._pairs_to_linear - ) + ln_f_ind, ln_f_arb = self._pairs_probabilities_host(Rijs, *params) + + return ln_f_ind, ln_f_arb + + def _pairs_probabilities_host(self, Rijs, P2, A, a, B, b, x0): + # The following is adopted from Matlab pairs_probabilities_mex.c `looper` + + # Initialize probability result arrays + ln_f_ind = np.zeros(len(Rijs), dtype=Rijs.dtype) + ln_f_arb = np.zeros(len(Rijs), dtype=Rijs.dtype) + + c = np.empty((4), dtype=Rijs.dtype) + for i in trange(self.n_img, desc="Computing pair probabilities"): + for j in range(i + 1, self.n_img - 1): + ij = self._pairs_to_linear[i, j] + Rij = Rijs[ij] + for k in range(j + 1, self.n_img): + ik = self._pairs_to_linear[i, k] + jk = self._pairs_to_linear[j, k] + Rik = Rijs[ik] + Rjk = Rijs[jk] + + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + alt_ij_jk = c[self._ALTS[0][best_i][0]] + if c[self._ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[self._ALTS[1][best_i][0]] + alt_ik_jk = c[self._ALTS[0][best_i][1]] + if c[self._ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[self._ALTS[1][best_i][1]] + alt_ij_ik = c[self._ALTS[0][best_i][2]] + if c[self._ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[self._ALTS[1][best_i][2]] + + # Compute scores + s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) + s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) + s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) + + # Update probabilities + # # Probability of pair ij having score given indicicative common line + # P2, B, b, x0, A, a + f_ij_jk = np.log( + P2 + * ( + B + * np.power(1 - s_ij_jk, b) + * np.exp(-b / (1 - x0) * (1 - s_ij_jk)) + ) + + (1 - P2) * A * np.power((1 - s_ij_jk), a) + ) + f_ik_jk = np.log( + P2 + * ( + B + * np.power(1 - s_ik_jk, b) + * np.exp(-b / (1 - x0) * (1 - s_ik_jk)) + ) + + (1 - P2) * A * np.power((1 - s_ik_jk), a) + ) + f_ij_ik = np.log( + P2 + * ( + B + * np.power(1 - s_ij_ik, b) + * np.exp(-b / (1 - x0) * (1 - s_ij_ik)) + ) + + (1 - P2) * A * np.power((1 - s_ij_ik), a) + ) + ln_f_ind[ij] += f_ij_jk + f_ij_ik + ln_f_ind[jk] += f_ij_jk + f_ik_jk + ln_f_ind[ik] += f_ik_jk + f_ij_ik + + # # Probability of pair ij having score given arbitrary common line + f_ij_jk = np.log(A * np.power((1 - s_ij_jk), a)) + f_ik_jk = np.log(A * np.power((1 - s_ik_jk), a)) + f_ij_ik = np.log(A * np.power((1 - s_ij_ik), a)) + ln_f_arb[ij] += f_ij_jk + f_ij_ik + ln_f_arb[jk] += f_ij_jk + f_ik_jk + ln_f_arb[ik] += f_ik_jk + f_ij_ik + + return ln_f_ind, ln_f_arb + + def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): + """ + n: n_img + Rijs: nchoose2x3x3 array + + """ + import cupy as cp + + pairs_probabilities = self._gpu_module.get_function("pairs_probabilities") + + Rijs_dev = cp.array(Rijs) + ln_f_ind_dev = cp.zeros( + (self.n_img * (self.n_img - 1) // 2, self.n_img) + ) # second dim is for thread safety + ln_f_arb_dev = cp.zeros( + (self.n_img * (self.n_img - 1) // 2, self.n_img) + ) # second dim is for thread safety + + # call the kernel + blkszx = 512 + nblkx = (self.n_img + blkszx - 1) // blkszx + pairs_probabilities( + (nblkx,), + (blkszx,), + (self.n_img, Rijs_dev, P2, A, a, B, b, x0, ln_f_ind_dev, ln_f_arb_dev), + ) + + # accumulate over thread results + ln_f_arb = cp.sum(ln_f_arb_dev, axis=1).get() + ln_f_ind = cp.sum(ln_f_ind_dev, axis=1).get() return ln_f_ind, ln_f_arb @@ -332,7 +582,6 @@ def _triangle_scores( scores_hist, Pmin, Pmax, - hist_intervals=100, a=2.2, peak2sigma=2.43e-2, P=0.5, @@ -359,7 +608,7 @@ def _triangle_scores( cum_scores = None # XXX Why do we even need cum_scores? if scores_hist is None: - cum_scores, scores_hist = self._triangle_scores_inner(Rijs, hist_intervals) + cum_scores, scores_hist = self._triangle_scores_inner(Rijs) # Normalize cumulated scores cum_scores /= len(Rijs) @@ -555,809 +804,141 @@ def _J_sync_power_method(self, Rijs): def _signs_times_v(self, Rijs, vec): # host/gpu dispatch - if self._use_gpu: - new_vec = _signs_times_v_cupy(self.n_img, Rijs, vec, self.J_weighting) + if self._gpu_module: + new_vec = self._signs_times_v_cupy(Rijs, vec) else: - new_vec = _signs_times_v_host( - self.n_img, Rijs, vec, self.J_weighting, _ALTS, self._pairs_to_linear - ) + new_vec = self._signs_times_v_host(Rijs, vec) return new_vec + def _signs_times_v_host(self, Rijs, vec): + """ + Ported from _signs_times_v_mex.c + + n: n_img + Rijs: nchoose2x3x3 array + vec: input array + new_vec: output array + J_weighting: bool + _ALTS= 2x4x3 const lut array + """ -def _signs_times_v_host(n, Rijs, vec, J_weighting, _ALTS, _pairs_to_linear): - """ - Ported from _signs_times_v_mex.c - - n: n_img - Rijs: nchoose2x3x3 array - vec: input array - new_vec: output array - J_weighting: bool - _ALTS= 2x4x3 const lut array - """ - - new_vec = np.zeros_like(vec) - - _signs_confs = np.array( - [[1, 1, 1], [-1, 1, -1], [-1, -1, 1], [1, -1, -1]], dtype=int - ) - - c = np.empty((4)) - desc = "Computing signs_times_v" - if J_weighting: - desc += " with J_weighting" - for i in trange(n, desc=desc): - for j in range(i + 1, n - 1): # check bound (taken from MATLAB mex) - ij = _pairs_to_linear[i, j] - Rij = Rijs[ij] - for k in range(j + 1, n): - ik = _pairs_to_linear[i, k] - jk = _pairs_to_linear[j, k] - Rik = Rijs[ik] - Rjk = Rijs[jk] - - # Compute conjugated rotats - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) - - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) - - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] - - # MATLAB: scores_as_entries == 0 - s_ij_jk = _signs_confs[best_i][0] - s_ik_jk = _signs_confs[best_i][1] - s_ij_ik = _signs_confs[best_i][2] - - # Note there was a third J_weighting option (2) in MATLAB, - # but it was not exposed at top level. - if J_weighting: - # MATLAB: scores_as_entries == 1 - # For each triangle side, find the best alternative - alt_ij_jk = c[_ALTS[0][best_i][0]] - if c[_ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[_ALTS[1][best_i][0]] - - alt_ik_jk = c[_ALTS[0][best_i][1]] - if c[_ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[_ALTS[1][best_i][1]] - - alt_ij_ik = c[_ALTS[0][best_i][2]] - if c[_ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[_ALTS[1][best_i][2]] - - # Compute scores - s_ij_jk *= 1 - np.sqrt(best_val / alt_ij_jk) - s_ik_jk *= 1 - np.sqrt(best_val / alt_ik_jk) - s_ij_ik *= 1 - np.sqrt(best_val / alt_ij_ik) - - # Update vector entries - new_vec[ij] += s_ij_jk * vec[jk] + s_ij_ik * vec[ik] - new_vec[jk] += s_ij_jk * vec[ij] + s_ik_jk * vec[ik] - new_vec[ik] += s_ij_ik * vec[ij] + s_ik_jk * vec[jk] - - return new_vec - - -def _init_cupy_module(): - module_code = r""" - -/* from i,j indoces to the common index in the N-choose-2 sized array */ -#define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) - - -inline void mult_3x3(double *out, double *R1, double *R2) { - /* 3X3 matrices multiplication: out = R1*R2 - * Note, this differs from the MATLAB mult_3x3. - */ - - int i,j,k; - - for(i=0; i<3; i++){ - for(j=0; j<3; j++){ - out[i*3 + j] = 0; - for (k=0; k<3; k++){ - out[i*3 + j] += R1[i*3+k] * R2[k*3+j]; - } - } - } -} - -inline void JRJ(double *R, double *A) { -/* multiple 3X3 matrix by J from both sizes: A = JRJ */ - A[0]=R[0]; - A[1]=R[1]; - A[2]=-R[2]; - A[3]=R[3]; - A[4]=R[4]; - A[5]=-R[5]; - A[6]=-R[6]; - A[7]=-R[7]; - A[8]=R[8]; -} - -inline double diff_norm_3x3(const double *R1, const double *R2) { -/* difference 2 matrices and return squared norm: ||R1-R2||^2 */ - int i; - double norm = 0; - for (i=0; i<9; i++) {norm += (R1[i]-R2[i])*(R1[i]-R2[i]);} - return norm; -} - - -extern "C" __global__ -void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec, bool J_weighting) -{ - /* thread index (1d), represents "i" index */ - unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; - - /* no-op when out of bounds */ - if(i >= n) return; - - double c[4]; - unsigned int j; - unsigned int k; - for(k=0;k<4;k++){c[k]=0;} - unsigned long ij, jk, ik; - int best_i; - double best_val; - double s_ij_jk, s_ik_jk, s_ij_ik; - double alt_ij_jk, alt_ij_ik, alt_ik_jk; - - double *Rij, *Rjk, *Rik; - double JRijJ[9], JRjkJ[9], JRikJ[9]; - double tmp[9]; - - int signs_confs[4][3]; - for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } - signs_confs[1][0]=-1; signs_confs[1][2]=-1; - signs_confs[2][0]=-1; signs_confs[2][1]=-1; - signs_confs[3][1]=-1; signs_confs[3][2]=-1; - - /* initialize alternatives */ - /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. - * this comparison is done for every pair in the triplete independently. to make sure that the - * alternative is indeed different in relation to the pair, we document the differences between - * the configurations in advance: - * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from - * best_conf in relation to pair */ - - int ALTS[2][4][3]; - ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; - ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; - ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; - ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; - ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; - ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; - - - for(j=i+1; j< (n - 1); j++){ - ij = PAIR_IDX(n, i, j); - for(k=j+1; k< n; k++){ - ik = PAIR_IDX(n, i, k); - jk = PAIR_IDX(n, j, k); - - /* compute configurations matches scores */ - Rij = Rijs + 9*ij; - Rjk = Rijs + 9*jk; - Rik = Rijs + 9*ik; - - JRJ(Rij, JRijJ); - JRJ(Rjk, JRjkJ); - JRJ(Rik, JRikJ); - - mult_3x3(tmp, Rij, Rjk); - c[0] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, JRijJ, Rjk); - c[1] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, Rij, JRjkJ); - c[2] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, Rij, Rjk); - c[3] = diff_norm_3x3(tmp, JRikJ); - - /* find best match */ - best_i=0; best_val=c[0]; - if (c[1]= n) return; - - double c[4]; - unsigned int j; - unsigned int k; - for(k=0;k<4;k++){c[k]=0;} - unsigned long ij, jk, ik; - int best_i; - double best_val; - double s_ij_jk, s_ik_jk, s_ij_ik; - double alt_ij_jk, alt_ij_ik, alt_ik_jk; - double f_ij_jk, f_ik_jk, f_ij_ik; - - - double *Rij, *Rjk, *Rik; - double JRijJ[9], JRjkJ[9], JRikJ[9]; - double tmp[9]; - - int signs_confs[4][3]; - for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } - signs_confs[1][0]=-1; signs_confs[1][2]=-1; - signs_confs[2][0]=-1; signs_confs[2][1]=-1; - signs_confs[3][1]=-1; signs_confs[3][2]=-1; - - /* initialize alternatives */ - /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. - * this comparison is done for every pair in the triplete independently. to make sure that the - * alternative is indeed different in relation to the pair, we document the differences between - * the configurations in advance: - * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from - * best_conf in relation to pair */ - - int ALTS[2][4][3]; - ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; - ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; - ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; - ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; - ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; - ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; - - - for(j=i+1; j< (n - 1); j++){ - ij = PAIR_IDX(n, i, j); - for(k=j+1; k< n; k++){ - ik = PAIR_IDX(n, i, k); - jk = PAIR_IDX(n, j, k); - - /* compute configurations matches scores */ - Rij = Rijs + 9*ij; - Rjk = Rijs + 9*jk; - Rik = Rijs + 9*ik; - - JRJ(Rij, JRijJ); - JRJ(Rjk, JRjkJ); - JRJ(Rik, JRikJ); - - mult_3x3(tmp, Rij, Rjk); - c[0] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, JRijJ, Rjk); - c[1] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, Rij, JRjkJ); - c[2] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, Rij, Rjk); - c[3] = diff_norm_3x3(tmp, JRikJ); - - /* find best match */ - best_i=0; best_val=c[0]; - if (c[1]= n) return; - - double c[4]; - unsigned int j; - unsigned int k; - for(k=0;k<4;k++){c[k]=0;} - unsigned long ij, jk, ik; - int best_i; - double best_val; - double s_ij_jk, s_ik_jk, s_ij_ik; - double alt_ij_jk, alt_ij_ik, alt_ik_jk; - unsigned int l1,l2,l3; - double threshold; - double h = 1. / n_intervals; - - double *Rij, *Rjk, *Rik; - double JRijJ[9], JRjkJ[9], JRikJ[9]; - double tmp[9]; - - /* initialize alternatives */ - /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. - * this comparison is done for every pair in the triplete independently. to make sure that the - * alternative is indeed different in relation to the pair, we document the differences between - * the configurations in advance: - * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from - * best_conf in relation to pair */ - - int ALTS[2][4][3]; - ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; - ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; - ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; - ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; - ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; - ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; - - - for(j=i+1; j< (n - 1); j++){ - ij = PAIR_IDX(n, i, j); - for(k=j+1; k< n; k++){ - ik = PAIR_IDX(n, i, k); - jk = PAIR_IDX(n, j, k); - - /* compute configurations matches scores */ - Rij = Rijs + 9*ij; - Rjk = Rijs + 9*jk; - Rik = Rijs + 9*ik; - - JRJ(Rij, JRijJ); - JRJ(Rjk, JRjkJ); - JRJ(Rik, JRikJ); - - mult_3x3(tmp, Rij, Rjk); - c[0] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, JRijJ, Rjk); - c[1] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, Rij, JRjkJ); - c[2] = diff_norm_3x3(tmp, Rik); - - mult_3x3(tmp, Rij, Rjk); - c[3] = diff_norm_3x3(tmp, JRikJ); - - /* find best match */ - best_i=0; best_val=c[0]; - if (c[1] Date: Fri, 26 Apr 2024 10:42:49 -0400 Subject: [PATCH 28/60] initial cupy comparison test add --- src/aspire/abinitio/commonline_sync3n.cu | 421 +++++++++++++++++++++++ src/aspire/abinitio/commonline_sync3n.py | 30 +- tests/test_commonline_sync3n_cupy.py | 104 ++++++ 3 files changed, 541 insertions(+), 14 deletions(-) create mode 100644 src/aspire/abinitio/commonline_sync3n.cu create mode 100644 tests/test_commonline_sync3n_cupy.py diff --git a/src/aspire/abinitio/commonline_sync3n.cu b/src/aspire/abinitio/commonline_sync3n.cu new file mode 100644 index 0000000000..3c0b0b9001 --- /dev/null +++ b/src/aspire/abinitio/commonline_sync3n.cu @@ -0,0 +1,421 @@ + +/* from i,j indoces to the common index in the N-choose-2 sized array */ +#define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) + + +inline void mult_3x3(double *out, double *R1, double *R2) { + /* 3X3 matrices multiplication: out = R1*R2 + * Note, this differs from the MATLAB mult_3x3. + */ + + int i,j,k; + + for(i=0; i<3; i++){ + for(j=0; j<3; j++){ + out[i*3 + j] = 0; + for (k=0; k<3; k++){ + out[i*3 + j] += R1[i*3+k] * R2[k*3+j]; + } + } + } +} + +inline void JRJ(double *R, double *A) { + /* multiple 3X3 matrix by J from both sizes: A = JRJ */ + A[0]=R[0]; + A[1]=R[1]; + A[2]=-R[2]; + A[3]=R[3]; + A[4]=R[4]; + A[5]=-R[5]; + A[6]=-R[6]; + A[7]=-R[7]; + A[8]=R[8]; +} + +inline double diff_norm_3x3(const double *R1, const double *R2) { + /* difference 2 matrices and return squared norm: ||R1-R2||^2 */ + int i; + double norm = 0; + for (i=0; i<9; i++) {norm += (R1[i]-R2[i])*(R1[i]-R2[i]);} + return norm; +} + + +extern "C" __global__ +void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec, bool J_weighting) +{ + /* thread index (1d), represents "i" index */ + unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; + + /* no-op when out of bounds */ + if(i >= n) return; + + double c[4]; + unsigned int j; + unsigned int k; + for(k=0;k<4;k++){c[k]=0;} + unsigned long ij, jk, ik; + int best_i; + double best_val; + double s_ij_jk, s_ik_jk, s_ij_ik; + double alt_ij_jk, alt_ij_ik, alt_ik_jk; + + double *Rij, *Rjk, *Rik; + double JRijJ[9], JRjkJ[9], JRikJ[9]; + double tmp[9]; + + int signs_confs[4][3]; + for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } + signs_confs[1][0]=-1; signs_confs[1][2]=-1; + signs_confs[2][0]=-1; signs_confs[2][1]=-1; + signs_confs[3][1]=-1; signs_confs[3][2]=-1; + + /* initialize alternatives */ + /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. + * this comparison is done for every pair in the triplete independently. to make sure that the + * alternative is indeed different in relation to the pair, we document the differences between + * the configurations in advance: + * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from + * best_conf in relation to pair */ + + int ALTS[2][4][3]; + ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; + ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; + ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; + ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; + ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; + ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; + + + for(j=i+1; j< (n - 1); j++){ + ij = PAIR_IDX(n, i, j); + for(k=j+1; k< n; k++){ + ik = PAIR_IDX(n, i, k); + jk = PAIR_IDX(n, j, k); + + /* compute configurations matches scores */ + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; + Rik = Rijs + 9*ik; + + JRJ(Rij, JRijJ); + JRJ(Rjk, JRjkJ); + JRJ(Rik, JRikJ); + + mult_3x3(tmp, Rij, Rjk); + c[0] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, JRijJ, Rjk); + c[1] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, JRjkJ); + c[2] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, Rjk); + c[3] = diff_norm_3x3(tmp, JRikJ); + + /* find best match */ + best_i=0; best_val=c[0]; + if (c[1]= n) return; + + double c[4]; + unsigned int j; + unsigned int k; + for(k=0;k<4;k++){c[k]=0;} + unsigned long ij, jk, ik; + int best_i; + double best_val; + double s_ij_jk, s_ik_jk, s_ij_ik; + double alt_ij_jk, alt_ij_ik, alt_ik_jk; + double f_ij_jk, f_ik_jk, f_ij_ik; + + + double *Rij, *Rjk, *Rik; + double JRijJ[9], JRjkJ[9], JRikJ[9]; + double tmp[9]; + + int signs_confs[4][3]; + for(int a=0; a<4; a++) { for(k=0; k<3; k++) { signs_confs[a][k]=1; } } + signs_confs[1][0]=-1; signs_confs[1][2]=-1; + signs_confs[2][0]=-1; signs_confs[2][1]=-1; + signs_confs[3][1]=-1; signs_confs[3][2]=-1; + + /* initialize alternatives */ + /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. + * this comparison is done for every pair in the triplete independently. to make sure that the + * alternative is indeed different in relation to the pair, we document the differences between + * the configurations in advance: + * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from + * best_conf in relation to pair */ + + int ALTS[2][4][3]; + ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; + ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; + ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; + ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; + ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; + ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; + + + for(j=i+1; j< (n - 1); j++){ + ij = PAIR_IDX(n, i, j); + for(k=j+1; k< n; k++){ + ik = PAIR_IDX(n, i, k); + jk = PAIR_IDX(n, j, k); + + /* compute configurations matches scores */ + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; + Rik = Rijs + 9*ik; + + JRJ(Rij, JRijJ); + JRJ(Rjk, JRjkJ); + JRJ(Rik, JRikJ); + + mult_3x3(tmp, Rij, Rjk); + c[0] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, JRijJ, Rjk); + c[1] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, JRjkJ); + c[2] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, Rjk); + c[3] = diff_norm_3x3(tmp, JRikJ); + + /* find best match */ + best_i=0; best_val=c[0]; + if (c[1]= n) return; + + double c[4]; + unsigned int j; + unsigned int k; + for(k=0;k<4;k++){c[k]=0;} + unsigned long ij, jk, ik; + int best_i; + double best_val; + double s_ij_jk, s_ik_jk, s_ij_ik; + double alt_ij_jk, alt_ij_ik, alt_ik_jk; + unsigned int l1,l2,l3; + double threshold; + double h = 1. / n_intervals; + + double *Rij, *Rjk, *Rik; + double JRijJ[9], JRjkJ[9], JRikJ[9]; + double tmp[9]; + + /* initialize alternatives */ + /* when we find the best J-configuration, we also compare it to the alternative 2nd best one. + * this comparison is done for every pair in the triplete independently. to make sure that the + * alternative is indeed different in relation to the pair, we document the differences between + * the configurations in advance: + * ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from + * best_conf in relation to pair */ + + int ALTS[2][4][3]; + ALTS[0][0][0]=1; ALTS[0][1][0]=0; ALTS[0][2][0]=0; ALTS[0][3][0]=1; + ALTS[1][0][0]=2; ALTS[1][1][0]=3; ALTS[1][2][0]=3; ALTS[1][3][0]=2; + ALTS[0][0][1]=2; ALTS[0][1][1]=2; ALTS[0][2][1]=0; ALTS[0][3][1]=0; + ALTS[1][0][1]=3; ALTS[1][1][1]=3; ALTS[1][2][1]=1; ALTS[1][3][1]=1; + ALTS[0][0][2]=1; ALTS[0][1][2]=0; ALTS[0][2][2]=1; ALTS[0][3][2]=0; + ALTS[1][0][2]=3; ALTS[1][1][2]=2; ALTS[1][2][2]=3; ALTS[1][3][2]=2; + + + for(j=i+1; j< (n - 1); j++){ + ij = PAIR_IDX(n, i, j); + for(k=j+1; k< n; k++){ + ik = PAIR_IDX(n, i, k); + jk = PAIR_IDX(n, j, k); + + /* compute configurations matches scores */ + Rij = Rijs + 9*ij; + Rjk = Rijs + 9*jk; + Rik = Rijs + 9*ik; + + JRJ(Rij, JRijJ); + JRJ(Rjk, JRjkJ); + JRJ(Rik, JRikJ); + + mult_3x3(tmp, Rij, Rjk); + c[0] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, JRijJ, Rjk); + c[1] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, JRjkJ); + c[2] = diff_norm_3x3(tmp, Rik); + + mult_3x3(tmp, Rij, Rjk); + c[3] = diff_norm_3x3(tmp, JRikJ); + + /* find best match */ + best_i=0; best_val=c[0]; + if (c[1] Date: Fri, 26 Apr 2024 10:47:29 -0400 Subject: [PATCH 29/60] cleanup cl3n compare test a little --- src/aspire/abinitio/commonline_sync3n.py | 6 ++++-- tests/test_commonline_sync3n_cupy.py | 25 +++++++++++++++--------- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 1f957168d0..bc6f7634d8 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -918,7 +918,9 @@ def _signs_times_v_cupy(self, Rijs, vec): blkszx = 512 nblkx = (self.n_img + blkszx - 1) // blkszx signs_times_v( - (nblkx,), (blkszx,), (self.n_img, Rijs_dev, vec_dev, new_vec_dev, self.J_weighting) + (nblkx,), + (blkszx,), + (self.n_img, Rijs_dev, vec_dev, new_vec_dev, self.J_weighting), ) # accumulate, can reuse the vec_dev array now. @@ -939,7 +941,7 @@ def _init_cupy_module(): # Read in contents of file fp = os.path.join(os.path.dirname(__file__), "commonline_sync3n.cu") - with open(fp, 'r') as fh: + with open(fp, "r") as fh: module_code = fh.read() # CUPY compile the CUDA code diff --git a/tests/test_commonline_sync3n_cupy.py b/tests/test_commonline_sync3n_cupy.py index f80f6b12fc..8068266e65 100644 --- a/tests/test_commonline_sync3n_cupy.py +++ b/tests/test_commonline_sync3n_cupy.py @@ -1,35 +1,39 @@ import numpy as np import pytest -from aspire.source import Simulation from aspire.abinitio.commonline_sync3n import CLSync3N +from aspire.source import Simulation -DTYPE = np.float64 -N = 64 +DTYPE = np.float64 # TODO, consider single precision. +N = 64 # Number of images n_pairs = N * (N - 1) // 2 + @pytest.fixture def src_fixture(): src = Simulation(n=N, L=32, C=1, dtype=DTYPE) src = src.cache() return src + @pytest.fixture def cl3n_fixture(src_fixture): cl = CLSync3N(src_fixture) return cl + @pytest.fixture def rijs_fixture(): Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3) Rijs = Rijs.astype(dtype=DTYPE, copy=False) return Rijs + def test_pairs_prob_host_vs_cupy(cl3n_fixture, rijs_fixture): """ Compares pairs_probabilities between host and cupy implementations. """ - + P2, A, a, B, b, x0 = 1, 2, 3, 4, 5, 6 # DTYPE is critical here (manually calling private method @@ -45,6 +49,7 @@ def test_pairs_prob_host_vs_cupy(cl3n_fixture, rijs_fixture): np.testing.assert_allclose(indsh, indscp) np.testing.assert_allclose(arbh, arbcp) + def test_triangle_scores_host_vs_cupy(cl3n_fixture, rijs_fixture): """ Compares triangle_scores between host and cupy implementations. @@ -58,8 +63,9 @@ def test_triangle_scores_host_vs_cupy(cl3n_fixture, rijs_fixture): cuh, hih = cl3n_fixture._triangle_scores_inner_host(rijs_fixture) # Compare host to cupy calls - np.testing.assert_allclose(cucp,cuh) - np.testing.assert_allclose(hicp,hih) + np.testing.assert_allclose(cucp, cuh) + np.testing.assert_allclose(hicp, hih) + def test_stv_host_vs_cupy(cl3n_fixture, rijs_fixture): """ @@ -68,10 +74,10 @@ def test_stv_host_vs_cupy(cl3n_fixture, rijs_fixture): Default J_weighting=False """ # dummy data vector - vec = np.ones(n_pairs, dtype=DTYPE) + vec = np.random.random(n_pairs).astype(dtype=DTYPE, copy=False) # J_weighting=False - assert cl3n_fixture.J_weighting == False + assert cl3n_fixture.J_weighting is False # Execute CUPY new_vec_cp = cl3n_fixture._signs_times_v_cupy(rijs_fixture, vec) @@ -82,6 +88,7 @@ def test_stv_host_vs_cupy(cl3n_fixture, rijs_fixture): # Compare host to cupy calls np.testing.assert_allclose(new_vec_cp, new_vec_h) + def test_stvJwt_host_vs_cupy(cl3n_fixture, rijs_fixture): """ Compares signs_times_v between host and cupy implementations. @@ -89,7 +96,7 @@ def test_stvJwt_host_vs_cupy(cl3n_fixture, rijs_fixture): Force J_weighting=True """ # dummy data vector - vec = np.ones(n_pairs, dtype=DTYPE) + vec = np.random.random(n_pairs).astype(dtype=DTYPE, copy=False) # J_weighting=True cl3n_fixture.J_weighting = True From 41e68c8f8bd56d1e61878557ea34340c1ece2a36 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 26 Apr 2024 10:49:42 -0400 Subject: [PATCH 30/60] rm merged test file --- x.py | 87 ------------------------------------------------------------ 1 file changed, 87 deletions(-) delete mode 100644 x.py diff --git a/x.py b/x.py deleted file mode 100644 index ebc5e6d768..0000000000 --- a/x.py +++ /dev/null @@ -1,87 +0,0 @@ -import pickle -import time -from collections import defaultdict - -import cupy as cp -import matplotlib.pyplot as plt -import numpy as np - -from aspire.abinitio.commonline_sync3n import _signs_times_v_cupy, _signs_times_v_host -from aspire.utils import all_pairs - - -def time_test(n): - n_pairs = n * (n - 1) // 2 - _, _pairs_to_linear = all_pairs(n, return_map=True) - - vec = np.ones(n_pairs, dtype=np.float64) - # Rijs = np.random.randn(n_pairs*3*3).astype(dtype=np.float64) - Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3).astype(dtype=np.float64) - - tic0 = time.perf_counter() - new_vec = _signs_times_v_cupy(n, Rijs, vec, J_weighting=False) - tic1 = time.perf_counter() - gpu_time = tic1 - tic0 - print("gpu\n", new_vec) - - tic2 = time.perf_counter() - new_vec_host = _signs_times_v_host( - n, Rijs, vec, J_weighting=False, _ALTS=None, _pairs_to_linear=_pairs_to_linear - ) - tic3 = time.perf_counter() - host_time = tic3 - tic2 - print("host\n", new_vec_host) - - print(f"\n\n\nSize:\t{n}") - print("Allclose? ", np.allclose(new_vec_host, new_vec)) - print(f"gpu_time: {gpu_time}") - print(f"host_time: {host_time}") - speedup = host_time / gpu_time - print(f"speedup: {speedup}") - - return host_time, gpu_time, speedup - - -def plotit(results): - N = np.array(list(results.keys())) - H = np.array([v["host"] for v in results.values()]) - G = np.array([v["gpu"] for v in results.values()]) - S = np.array([v["speedup"] for v in results.values()]) - - plt.plot(N, H, label="host python") - plt.plot(N, G, label="cuda") - plt.title("Walltimes (s)") - plt.legend() - plt.show() - plt.savefig("walltimes.png") - plt.clf() - - plt.plot(N, S) - plt.title("Speedup Ratio") - plt.show() - plt.savefig("speedups.png") - plt.clf() - - -def main(): - results = defaultdict(dict) - # too long...! for n in [4,16,64,100,128,200,256,512,1024,2048,3000, 4096, 10000]: - # for n in [4,16]: # test - for n in [4, 16, 64, 100, 128, 200, 512]: - h, g, s = time_test(n) - results[n]["host"] = h - results[n]["gpu"] = g - results[n]["speedup"] = s - - # save in case we cancel - with open("saved_results.pkl", "wb") as f: - pickle.dump(results, f) - - print() - print(results) - print() - - plotit(results) - - -time_test(64) From 61d349819423ee113ca585c1fb9f09376f3b2758 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 26 Apr 2024 10:53:52 -0400 Subject: [PATCH 31/60] fixup manifest --- MANIFEST.in | 1 + 1 file changed, 1 insertion(+) diff --git a/MANIFEST.in b/MANIFEST.in index 4477aa87c0..ecc7484b40 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -17,6 +17,7 @@ recursive-include docs *.rst recursive-include docs Makefile recursive-include docs *.sh recursive-include src *.conf +recursive-include src *.cu recursive-include src *.yaml prune docs/build prune docs/source From d571349bc115b551a7578be22a9618c42f6e0c7c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 26 Apr 2024 10:58:00 -0400 Subject: [PATCH 32/60] remove unused cum_scores --- src/aspire/abinitio/commonline_sync3n.cu | 8 +----- src/aspire/abinitio/commonline_sync3n.py | 35 ++++++------------------ tests/test_commonline_sync3n_cupy.py | 10 ++++--- 3 files changed, 15 insertions(+), 38 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.cu b/src/aspire/abinitio/commonline_sync3n.cu index 3c0b0b9001..58ee75a98e 100644 --- a/src/aspire/abinitio/commonline_sync3n.cu +++ b/src/aspire/abinitio/commonline_sync3n.cu @@ -290,7 +290,7 @@ void pairs_probabilities(int n, double* Rijs, double P2, double A, double a, dou extern "C" __global__ -void triangle_scores_inner(int n, double* Rijs, int n_intervals, double* cum_scores, double* scores_hist) +void triangle_scores_inner(int n, double* Rijs, int n_intervals, double* scores_hist) { /* thread index (1d), represents "i" index */ unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; @@ -385,12 +385,6 @@ void triangle_scores_inner(int n, double* Rijs, int n_intervals, double* cum_sco s_ik_jk = 1 - sqrt(best_val / alt_ik_jk); s_ij_ik = 1 - sqrt(best_val / alt_ij_ik); - - /* update cumulated scores */ - cum_scores[ij*n+i] += s_ij_jk + s_ij_ik; - cum_scores[jk*n+i] += s_ij_jk + s_ik_jk; - cum_scores[ik*n+i] += s_ik_jk + s_ij_ik; - /* update scores histogram */ threshold = 0; for (l1=0; l1 Date: Mon, 29 Apr 2024 08:42:52 -0400 Subject: [PATCH 33/60] atomic stv --- src/aspire/abinitio/commonline_sync3n.cu | 6 +++--- src/aspire/abinitio/commonline_sync3n.py | 8 ++------ 2 files changed, 5 insertions(+), 9 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.cu b/src/aspire/abinitio/commonline_sync3n.cu index 58ee75a98e..884e5b44f4 100644 --- a/src/aspire/abinitio/commonline_sync3n.cu +++ b/src/aspire/abinitio/commonline_sync3n.cu @@ -151,9 +151,9 @@ void signs_times_v(int n, double* Rijs, const double* vec, double* new_vec, bool /* update multiplication */ - new_vec[ij*n + i] += s_ij_jk*vec[jk] + s_ij_ik*vec[ik]; - new_vec[jk*n + i] += s_ij_jk*vec[ij] + s_ik_jk*vec[ik]; - new_vec[ik*n + i] += s_ij_ik*vec[ij] + s_ik_jk*vec[jk]; + atomicAdd(&(new_vec[ij]), s_ij_jk*vec[jk] + s_ij_ik*vec[ik]); + atomicAdd(&(new_vec[jk]), s_ij_jk*vec[ij] + s_ik_jk*vec[ik]); + atomicAdd(&(new_vec[ik]), s_ij_ik*vec[ij] + s_ik_jk*vec[jk]); } /* k */ } /* j */ diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 6f09350589..8bc41cc4ca 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -892,8 +892,7 @@ def _signs_times_v_cupy(self, Rijs, vec): Rijs_dev = cp.array(Rijs) vec_dev = cp.array(vec) - # 2d over i then accum to avoid race on i - new_vec_dev = cp.zeros((vec.shape[0], self.n_img)) + new_vec_dev = cp.zeros((vec.shape[0])) # call the kernel blkszx = 512 @@ -904,11 +903,8 @@ def _signs_times_v_cupy(self, Rijs, vec): (self.n_img, Rijs_dev, vec_dev, new_vec_dev, self.J_weighting), ) - # accumulate, can reuse the vec_dev array now. - cp.sum(new_vec_dev, axis=1, out=vec_dev) - # dtoh - new_vec = vec_dev.get() + new_vec = new_vec_dev.get() return new_vec From 9fba485dc6d36230dc6c1211c16c1a74f69114ba Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 10:01:37 -0400 Subject: [PATCH 34/60] convert remaining kernels to use atomics instead of naive array safety --- src/aspire/abinitio/commonline_sync3n.cu | 18 +++++++++--------- src/aspire/abinitio/commonline_sync3n.py | 20 +++++++------------- tests/test_commonline_sync3n_cupy.py | 6 +++--- 3 files changed, 19 insertions(+), 25 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.cu b/src/aspire/abinitio/commonline_sync3n.cu index 884e5b44f4..aaff3d0e76 100644 --- a/src/aspire/abinitio/commonline_sync3n.cu +++ b/src/aspire/abinitio/commonline_sync3n.cu @@ -268,18 +268,18 @@ void pairs_probabilities(int n, double* Rijs, double P2, double A, double a, dou f_ij_jk = log( P2*(B*pow(1-s_ij_jk,b)*exp(-b/(1-x0)*(1-s_ij_jk))) + (1-P2)*A*pow((1-s_ij_jk),a) ); f_ik_jk = log( P2*(B*pow(1-s_ik_jk,b)*exp(-b/(1-x0)*(1-s_ik_jk))) + (1-P2)*A*pow((1-s_ik_jk),a) ); f_ij_ik = log( P2*(B*pow(1-s_ij_ik,b)*exp(-b/(1-x0)*(1-s_ij_ik))) + (1-P2)*A*pow((1-s_ij_ik),a) ); - ln_f_ind[ij*n +i] += f_ij_jk + f_ij_ik; - ln_f_ind[jk*n +i] += f_ij_jk + f_ik_jk; - ln_f_ind[ik*n +i] += f_ik_jk + f_ij_ik; + atomicAdd(&(ln_f_ind[ij]), f_ij_jk + f_ij_ik); + atomicAdd(&(ln_f_ind[jk]), f_ij_jk + f_ik_jk); + atomicAdd(&(ln_f_ind[ik]), f_ik_jk + f_ij_ik); /* the probability of a pair ij to have the observed triangles scores, given it has an arbitrary common line */ f_ij_jk = log( A*pow((1-s_ij_jk),a) ); f_ik_jk = log( A*pow((1-s_ik_jk),a) ); f_ij_ik = log( A*pow((1-s_ij_ik),a) ); - ln_f_arb[ij*n +i] += f_ij_jk + f_ij_ik; - ln_f_arb[jk*n +i] += f_ij_jk + f_ik_jk; - ln_f_arb[ik*n +i] += f_ik_jk + f_ij_ik; + atomicAdd(&(ln_f_arb[ij]), f_ij_jk + f_ij_ik); + atomicAdd(&(ln_f_arb[jk]), f_ij_jk + f_ik_jk); + atomicAdd(&(ln_f_arb[ik]), f_ik_jk + f_ij_ik); } /* k */ @@ -404,9 +404,9 @@ void triangle_scores_inner(int n, double* Rijs, int n_intervals, double* scores_ if (s_ij_ik < threshold) {break;} } - scores_hist[l1*n+i] += 1; - scores_hist[l2*n+i] += 1; - scores_hist[l3*n+i] += 1; + atomicAdd(&(scores_hist[l1]), 1); + atomicAdd(&(scores_hist[l2]), 1); + atomicAdd(&(scores_hist[l3]), 1); } /* k */ } /* j */ diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 8bc41cc4ca..463dc52d9c 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -402,9 +402,7 @@ def _triangle_scores_inner_cupy(self, Rijs): Rijs_dev = cp.array(Rijs) - scores_hist_dev = cp.zeros( - (self.hist_intervals, self.n_img), dtype=np.float64 - ) # n is for thread safety + scores_hist_dev = cp.zeros((self.hist_intervals), dtype=np.float64) # call the kernel blkszx = 512 @@ -420,8 +418,8 @@ def _triangle_scores_inner_cupy(self, Rijs): ), ) - # accumulate over thread results - scores_hist = cp.sum(scores_hist_dev, axis=1).get() + # d2h + scores_hist = scores_hist_dev.get() return scores_hist @@ -540,12 +538,8 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): pairs_probabilities = self._gpu_module.get_function("pairs_probabilities") Rijs_dev = cp.array(Rijs) - ln_f_ind_dev = cp.zeros( - (self.n_img * (self.n_img - 1) // 2, self.n_img) - ) # second dim is for thread safety - ln_f_arb_dev = cp.zeros( - (self.n_img * (self.n_img - 1) // 2, self.n_img) - ) # second dim is for thread safety + ln_f_ind_dev = cp.zeros((self.n_img * (self.n_img - 1) // 2), dtype=np.float64) + ln_f_arb_dev = cp.zeros((self.n_img * (self.n_img - 1) // 2), dtype=np.float64) # call the kernel blkszx = 512 @@ -557,8 +551,8 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): ) # accumulate over thread results - ln_f_arb = cp.sum(ln_f_arb_dev, axis=1).get() - ln_f_ind = cp.sum(ln_f_ind_dev, axis=1).get() + ln_f_arb = ln_f_arb_dev.get() + ln_f_ind = ln_f_ind_dev.get() return ln_f_ind, ln_f_arb diff --git a/tests/test_commonline_sync3n_cupy.py b/tests/test_commonline_sync3n_cupy.py index 3cc0245ad7..81d967aa8e 100644 --- a/tests/test_commonline_sync3n_cupy.py +++ b/tests/test_commonline_sync3n_cupy.py @@ -12,20 +12,20 @@ # XXX TODO, conditionally run these only if GPU present. -@pytest.fixture +@pytest.fixture(scope="module") def src_fixture(): src = Simulation(n=N, L=32, C=1, dtype=DTYPE) src = src.cache() return src -@pytest.fixture +@pytest.fixture(scope="module") def cl3n_fixture(src_fixture): cl = CLSync3N(src_fixture) return cl -@pytest.fixture +@pytest.fixture(scope="module") def rijs_fixture(): Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3) Rijs = Rijs.astype(dtype=DTYPE, copy=False) From a834b2f443db629c05cb8b8747b64dd533e0dcc5 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 10:55:05 -0400 Subject: [PATCH 35/60] add some documentation --- src/aspire/abinitio/commonline_sync3n.py | 115 +++++++++++++++++------ 1 file changed, 85 insertions(+), 30 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 463dc52d9c..a7066a4abe 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -21,7 +21,7 @@ class CLSync3N(CLOrient3D, SyncVotingMixin): # Initialize alternatives # # When we find the best J-configuration, we also compare it to the alternative 2nd best one. - # this comparison is done for every pair in the triplete independently. to make sure that the + # this comparison is done for every pair in the triplet independently. to make sure that the # alternative is indeed different in relation to the pair, we document the differences between # the configurations in advance: # ALTS(:,best_conf,pair) = the two configurations in which J-sync differs from best_conf in relation to pair @@ -43,7 +43,6 @@ def __init__( shift_step=1, epsilon=1e-2, max_iters=1000, - degree_res=1, seed=None, mask=True, S_weighting=False, @@ -60,10 +59,15 @@ def __init__( :param shift_step: Resolution of shift estimation in pixels. Default = 1 pixel. :param epsilon: Tolerance for the power method. :param max_iter: Maximum iterations for the power method. - :param degree_res: Degree resolution for estimating in-plane rotations. :param seed: Optional seed for RNG. :param mask: Option to mask `src.images` with a fuzzy mask (boolean). Default, `True`, applies a mask. + :param S_weighting: Optionally apply probabilistic weighting + to the `S` matrix. + :param J_weighting: Optionally use `J` weights instead of + signs when computing `signs_times_v`. + :param hist_intervals: Number of histogram bins used to + compute triangle scores when `S_weighting` enabled. """ super().__init__( @@ -80,7 +84,6 @@ def __init__( self.epsilon = epsilon self.max_iters = max_iters - self.degree_res = degree_res self.seed = seed # Sync3N specific vars @@ -241,11 +244,27 @@ def _syncmatrix_weights( ): """ Given relative rotations matrix `Rij`, - compute and return probability weights for S. + compute and return probability weights `P` for S. + + Default parameters here were taken from those in the MATLAB + code, with the original author noting they were found + empirically. + + :param permitted_inconsistency: Consistency condition is + `mean(Pij)/permitted_inconsistency < P < + mean(Pij)*permitted_inconsistency`. + :param p_domain_limit: Domain of P is [Pmin,Pmax], with + Pmin=p_domain_limit*Pmax + :param max_iterations: Maximum iterations for P estimation. + :param min_p_permitted: Small value at which to stop + attempting to synchronize P. """ logger.info("Computing synchronization matrix weights.") - def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): + def _body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): + """ + Helper function to run and test triangle_scores. + """ # Get inistial estimate for Pij P, sigma, Pij, hist = self._triangle_scores(Rijs, hist, Pmin, Pmax) @@ -287,7 +306,7 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): res = (None,) * 4 inconsistent = True while inconsistent and i < max_iterations: - inconsistent, Pij, res = body(*res) + inconsistent, Pij, res = _body(*res) i += 1 # Pack W @@ -302,6 +321,13 @@ def body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): return W def _triangle_scores_inner(self, Rijs): + """ + Computes histogram of `triangle scores`. + + Wrapper for cpu/gpu dispatch. + + :param Rijs: nchoose2 by 3 by 3 array of rotations. + """ # host/gpu dispatch if self._gpu_module: @@ -312,6 +338,11 @@ def _triangle_scores_inner(self, Rijs): return scores_hist def _triangle_scores_inner_host(self, Rijs): + """ + See _triangle_scores_inner. + + CPU implementation. + """ # The following is adopted from Matlab triangle_scores_mex.c @@ -392,10 +423,11 @@ def _triangle_scores_inner_host(self, Rijs): def _triangle_scores_inner_cupy(self, Rijs): """ - n: n_img - Rijs: nchoose2x3x3 array + See _triangle_scores_inner. + GPU implementation. """ + import cupy as cp triangle_scores = self._gpu_module.get_function("triangle_scores_inner") @@ -424,6 +456,20 @@ def _triangle_scores_inner_cupy(self, Rijs): return scores_hist def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): + """ + This function computes the probability of a pair `ij` having + an observed value of triangles score under two priors. Once + given it has an indicative common line, and again once given + it has an arbitrary common line. + + The probability of the common line to be indicative can then + be derived by Bayes Theorem. + + Wrapper for cpu/gpu dispatch. + + :param Rijs: nchoose2 by 3 by 3 array of rotations. + XXX + """ # dtype is critical for passing into C code... params = np.arary([P2, A, a, B, b, x0], dtype=np.float64) # host/gpu dispatch @@ -435,6 +481,11 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): return ln_f_ind, ln_f_arb def _pairs_probabilities_host(self, Rijs, P2, A, a, B, b, x0): + """ + See _pairs_probabilities. + + CPU implementation. + """ # The following is adopted from Matlab pairs_probabilities_mex.c `looper` # Initialize probability result arrays @@ -529,10 +580,11 @@ def _pairs_probabilities_host(self, Rijs, P2, A, a, B, b, x0): def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): """ - n: n_img - Rijs: nchoose2x3x3 array + See _pairs_probabilities. + GPU implementation. """ + import cupy as cp pairs_probabilities = self._gpu_module.get_function("pairs_probabilities") @@ -569,16 +621,19 @@ def _triangle_scores( x0=0.78, ): """ - Todo + Computes `triangle_scores`, attempts to fit curve to distribution, and uses estimated distribution to compute `pairs_probabilities`. + + Default parameters here were taken from those in the MATLAB + code, with the original author noting they were found + empirically. - :param a: magic number + :param a: :param peak2sigma: empirical relation between the location of the peak of the histigram, and the mean error in the common lines estimations. - AKA, magic number :param P: :param b: - :param x0: + :param x0: Initial guess """ Pmin = Pmin or 0 @@ -757,7 +812,8 @@ def _J_sync_power_method(self, Rijs): residual = 1 itr = 0 - # XXX, I don't like that epsilon>1 (residual) returns signs of random vector + # Todo + # I don't like that epsilon>1 (residual) returns signs of random vector # maybe force to run once? or return vec as zeros in that case? # Seems unintended, but easy to do. @@ -778,7 +834,14 @@ def _J_sync_power_method(self, Rijs): return J_sync def _signs_times_v(self, Rijs, vec): + """ + Multiplication of the J-synchronization matrix by a candidate eigenvector `vec` + Wrapper for cpu/gpu dispatch. + + :param Rijs: An n-choose-2x3x3 array of estimates of relative rotations + :param vec: The current candidate eigenvector of length n-choose-2 from the power method. + """ # host/gpu dispatch if self._gpu_module: new_vec = self._signs_times_v_cupy(Rijs, vec) @@ -789,14 +852,9 @@ def _signs_times_v(self, Rijs, vec): def _signs_times_v_host(self, Rijs, vec): """ - Ported from _signs_times_v_mex.c + See `_signs_times_v`. - n: n_img - Rijs: nchoose2x3x3 array - vec: input array - new_vec: output array - J_weighting: bool - _ALTS= 2x4x3 const lut array + CPU implementation. """ new_vec = np.zeros_like(vec) @@ -872,13 +930,9 @@ def _signs_times_v_host(self, Rijs, vec): def _signs_times_v_cupy(self, Rijs, vec): """ - Ported from _signs_times_v_mex.c + See `_signs_times_v`. - n: n_img - Rijs: nchoose2x3x3 array - vec: input array - new_vec: output array - J_weighting: bool + CPU implementation. """ import cupy as cp @@ -905,7 +959,8 @@ def _signs_times_v_cupy(self, Rijs, vec): @staticmethod def _init_cupy_module(): """ - Private utility method to read in CUDA source and return as compiled CUPY module. + Private utility method to read in CUDA source and return as + compiled CUPY module. """ import cupy as cp From 98aa274164051d3de622542487a6aa2ea44e7877 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 11:18:40 -0400 Subject: [PATCH 36/60] more cleanup --- src/aspire/abinitio/commonline_sync3n.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index a7066a4abe..33cb7a292a 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -451,7 +451,7 @@ def _triangle_scores_inner_cupy(self, Rijs): ) # d2h - scores_hist = scores_hist_dev.get() + scores_hist = scores_hist_dev.get().astype(self.dtype, copy=False) return scores_hist @@ -468,10 +468,17 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): Wrapper for cpu/gpu dispatch. :param Rijs: nchoose2 by 3 by 3 array of rotations. - XXX + :param P2: distribution parameter + :param A: distribution parameter + :param a: distribution parameter + :param B: distribution parameter + :param b: distribution parameter + :param x0: Initial guess + """ - # dtype is critical for passing into C code... - params = np.arary([P2, A, a, B, b, x0], dtype=np.float64) + # These param values are passed to C, force doubles. + params = np.array([P2, A, a, B, b, x0], dtype=np.float64) + # host/gpu dispatch if self._gpu_module: ln_f_ind, ln_f_arb = self._pairs_probabilities_cupy(Rijs, *params) @@ -603,8 +610,8 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): ) # accumulate over thread results - ln_f_arb = ln_f_arb_dev.get() - ln_f_ind = ln_f_ind_dev.get() + ln_f_arb = ln_f_arb_dev.get().astype(self.dtype, copy=False) + ln_f_ind = ln_f_ind_dev.get().astype(self.dtype, copy=False) return ln_f_ind, ln_f_arb @@ -952,7 +959,7 @@ def _signs_times_v_cupy(self, Rijs, vec): ) # dtoh - new_vec = new_vec_dev.get() + new_vec = new_vec_dev.get().astype(self.dtype, copy=False) return new_vec From 8161a2100ee94652ad8585fa842785cd4f836c56 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 11:31:37 -0400 Subject: [PATCH 37/60] looks like this actually needs double precision. --- src/aspire/abinitio/commonline_sync3n.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 33cb7a292a..4729bf7785 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -451,7 +451,7 @@ def _triangle_scores_inner_cupy(self, Rijs): ) # d2h - scores_hist = scores_hist_dev.get().astype(self.dtype, copy=False) + scores_hist = scores_hist_dev.get() return scores_hist @@ -610,8 +610,8 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): ) # accumulate over thread results - ln_f_arb = ln_f_arb_dev.get().astype(self.dtype, copy=False) - ln_f_ind = ln_f_ind_dev.get().astype(self.dtype, copy=False) + ln_f_arb = ln_f_arb_dev.get() + ln_f_ind = ln_f_ind_dev.get() return ln_f_ind, ln_f_arb @@ -959,7 +959,7 @@ def _signs_times_v_cupy(self, Rijs, vec): ) # dtoh - new_vec = new_vec_dev.get().astype(self.dtype, copy=False) + new_vec = new_vec_dev.get() return new_vec From accdc935df83460d946df94da379047cea85fc27 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 13:00:03 -0400 Subject: [PATCH 38/60] fix precision bug in CL sync3n power method. --- src/aspire/abinitio/commonline_sync3n.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 4729bf7785..6793c146ee 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -827,7 +827,8 @@ def _J_sync_power_method(self, Rijs): # Power method iterations while itr < max_iters and residual > epsilon: itr += 1 - vec_new = self._signs_times_v(Rijs, vec) + # Todo, this code code actually needs double precision for accuracy... forcing. + vec_new = self._signs_times_v(Rijs, vec).astype(np.float64, copy=False) vec_new = vec_new / norm(vec_new) residual = norm(vec_new - vec) vec = vec_new From 14dfb1c657efd7330233fee87575a4bb30858857 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 13:50:18 -0400 Subject: [PATCH 39/60] fixup some of the dtypes --- src/aspire/abinitio/commonline_sync3n.cu | 2 +- src/aspire/abinitio/commonline_sync3n.py | 15 ++++++++------- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.cu b/src/aspire/abinitio/commonline_sync3n.cu index aaff3d0e76..99582b3f24 100644 --- a/src/aspire/abinitio/commonline_sync3n.cu +++ b/src/aspire/abinitio/commonline_sync3n.cu @@ -290,7 +290,7 @@ void pairs_probabilities(int n, double* Rijs, double P2, double A, double a, dou extern "C" __global__ -void triangle_scores_inner(int n, double* Rijs, int n_intervals, double* scores_hist) +void triangle_scores_inner(int n, double* Rijs, int n_intervals, unsigned int* scores_hist) { /* thread index (1d), represents "i" index */ unsigned int i = blockDim.x * blockIdx.x + threadIdx.x; diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 6793c146ee..886557630f 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -347,7 +347,7 @@ def _triangle_scores_inner_host(self, Rijs): # The following is adopted from Matlab triangle_scores_mex.c # Initialize probability result arrays - scores_hist = np.zeros(self.hist_intervals, dtype=Rijs.dtype) + scores_hist = np.zeros(self.hist_intervals, dtype=np.uint32) h = 1 / self.hist_intervals c = np.empty((4), dtype=Rijs.dtype) @@ -432,9 +432,10 @@ def _triangle_scores_inner_cupy(self, Rijs): triangle_scores = self._gpu_module.get_function("triangle_scores_inner") - Rijs_dev = cp.array(Rijs) + Rijs_dev = cp.array(Rijs, dtype=np.float64) - scores_hist_dev = cp.zeros((self.hist_intervals), dtype=np.float64) + # This holds integer counts + scores_hist_dev = cp.zeros((self.hist_intervals), dtype=np.uint32) # call the kernel blkszx = 512 @@ -596,7 +597,7 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): pairs_probabilities = self._gpu_module.get_function("pairs_probabilities") - Rijs_dev = cp.array(Rijs) + Rijs_dev = cp.array(Rijs, dtype=np.float64) ln_f_ind_dev = cp.zeros((self.n_img * (self.n_img - 1) // 2), dtype=np.float64) ln_f_arb_dev = cp.zeros((self.n_img * (self.n_img - 1) // 2), dtype=np.float64) @@ -946,9 +947,9 @@ def _signs_times_v_cupy(self, Rijs, vec): signs_times_v = self._gpu_module.get_function("signs_times_v") - Rijs_dev = cp.array(Rijs) - vec_dev = cp.array(vec) - new_vec_dev = cp.zeros((vec.shape[0])) + Rijs_dev = cp.array(Rijs, dtype=np.float64) + vec_dev = cp.array(vec, dtype=np.float64) + new_vec_dev = cp.zeros((vec.shape[0]), dtype=np.float64) # call the kernel blkszx = 512 From 1af210fcf053a0d2a0113a81759ed1aea40b7f4c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 29 Apr 2024 14:22:24 -0400 Subject: [PATCH 40/60] conditionally run host-gpu comparison --- tests/test_commonline_sync3n_cupy.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/tests/test_commonline_sync3n_cupy.py b/tests/test_commonline_sync3n_cupy.py index 81d967aa8e..e13147f6ae 100644 --- a/tests/test_commonline_sync3n_cupy.py +++ b/tests/test_commonline_sync3n_cupy.py @@ -4,14 +4,15 @@ from aspire.abinitio.commonline_sync3n import CLSync3N from aspire.source import Simulation +# If cupy is not available, skip this entire module +pytest.importorskip("cupy") + + DTYPE = np.float64 # TODO, consider single precision. N = 64 # Number of images n_pairs = N * (N - 1) // 2 -# XXX TODO, conditionally run these only if GPU present. - - @pytest.fixture(scope="module") def src_fixture(): src = Simulation(n=N, L=32, C=1, dtype=DTYPE) From a53cd209f3f1f382c41548cd956952070b6b1488 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 7 May 2024 10:39:17 -0400 Subject: [PATCH 41/60] add MATLAB comparison tests --- tests/test_commonline_sync3n_cupy.py | 107 +++++++++++++++++++++++++++ 1 file changed, 107 insertions(+) diff --git a/tests/test_commonline_sync3n_cupy.py b/tests/test_commonline_sync3n_cupy.py index e13147f6ae..7165d368a7 100644 --- a/tests/test_commonline_sync3n_cupy.py +++ b/tests/test_commonline_sync3n_cupy.py @@ -112,3 +112,110 @@ def test_stvJwt_host_vs_cupy(cl3n_fixture, rijs_fixture): # Compare host to cupy calls np.testing.assert_allclose(new_vec_cp, new_vec_h) + + +@pytest.fixture +def matlab_ref_fixture(): + """ + Setup ASPIRE-Python objects using dummy data that is easily + constructed in MATLAB. + """ + DTYPE = np.float64 + n = 5 + n_pairs = n * (n - 1) // 2 + + # Dummy input vector. + Rijs = np.transpose( + np.arange(1, n_pairs * 3 * 3 + 1, dtype=DTYPE).reshape(n_pairs, 3, 3), (0, 2, 1) + ) + # Equivalent MATLAB + # n=5; np=n*(n-1)/2; rijs= reshape([1:np*3*3],[3,3,np]) + + # Create CL object for testing function calls + src = Simulation(L=8, n=n, C=1, dtype=DTYPE) + cl3n = CLSync3N(src, seed=314, S_weighting=False, J_weighting=False) + + return Rijs, cl3n + + +def test_triangles_scores(matlab_ref_fixture): + """ + Compares output of identical dummy data between this + implementation and legacy MATLAB triangles_scores_mex. + """ + Rijs, cl3n = matlab_ref_fixture + + hist = cl3n._triangle_scores_inner(Rijs) + + # Default is 100 histogram intervals, + # so the histogram reference is compressed. + ref_hist = np.zeros(cl3n.hist_intervals) + # Nonzeros, [[indices, ...], [values, ...]] + ref_compressed = np.array( + [[0, 10, 11, 12, 70, 71, 72, 76, 81, 89], [14, 2, 2, 2, 1, 1, 2, 1, 2, 3]] + ) + # Pack the reference histogram + np.put(ref_hist, *ref_compressed) + + np.testing.assert_allclose(hist, ref_hist) + + +def test_pairs_prob_mex(matlab_ref_fixture): + """ + Compares output of identical dummy data between this + implementation and legacy MATLAB pairs_probabilities_mex. + """ + Rijs, cl3n = matlab_ref_fixture + + params = np.arange(1, 7) + + ln_f_ind, ln_f_arb = cl3n._pairs_probabilities_host(Rijs, *params) + + ref_ln_f_ind = [ + -24.1817, + -5.6554, + 4.9117, + 12.7047, + -12.9374, + -5.5158, + 1.5289, + -9.0406, + -2.2067, + -7.3968, + ] + + ref_ln_f_arb = [ + -17.1264, + -6.7218, + -0.8876, + 3.3437, + -10.7251, + -6.7051, + -2.9029, + -8.5061, + -4.8288, + -7.5608, + ] + + np.testing.assert_allclose(ln_f_arb, ref_ln_f_arb, atol=5e-5) + + np.testing.assert_allclose(ln_f_ind, ref_ln_f_ind, atol=5e-5) + + +def test_signs_times_v_mex(matlab_ref_fixture): + """ + Compares output of identical dummy data between this + implementation and legacy MATLAB signs_times_v. + """ + Rijs, cl3n = matlab_ref_fixture + + # Dummy input vector + vec = np.ones(len(Rijs), dtype=DTYPE) + # Equivalent matlab + # vec=ones([1,np]); + + new_vec = cl3n._signs_times_v(Rijs, vec) + + ref_vec = [0, -2, -2, 0, -6, -4, -2, -2, -2, 0] + + np.testing.assert_allclose(new_vec, ref_vec) From 73e3614d9a73a9419841bb4ecf033bbf404643a6 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 7 May 2024 11:45:13 -0400 Subject: [PATCH 42/60] Allow sync3n methods to run in singles via upcasting --- src/aspire/abinitio/commonline_sync3n.py | 8 ++--- tests/test_commonline_sync3n_cupy.py | 42 +++++++++++++++--------- 2 files changed, 31 insertions(+), 19 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 886557630f..36f251510f 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -611,8 +611,8 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): ) # accumulate over thread results - ln_f_arb = ln_f_arb_dev.get() - ln_f_ind = ln_f_ind_dev.get() + ln_f_arb = ln_f_arb_dev.get().astype(self.dtype, copy=False) + ln_f_ind = ln_f_ind_dev.get().astype(self.dtype, copy=False) return ln_f_ind, ln_f_arb @@ -857,7 +857,7 @@ def _signs_times_v(self, Rijs, vec): else: new_vec = self._signs_times_v_host(Rijs, vec) - return new_vec + return new_vec.astype(vec.dtype, copy=False) def _signs_times_v_host(self, Rijs, vec): """ @@ -961,7 +961,7 @@ def _signs_times_v_cupy(self, Rijs, vec): ) # dtoh - new_vec = new_vec_dev.get() + new_vec = new_vec_dev.get().astype(vec.dtype, copy=False) return new_vec diff --git a/tests/test_commonline_sync3n_cupy.py b/tests/test_commonline_sync3n_cupy.py index 7165d368a7..9bea14c21f 100644 --- a/tests/test_commonline_sync3n_cupy.py +++ b/tests/test_commonline_sync3n_cupy.py @@ -8,14 +8,19 @@ pytest.importorskip("cupy") -DTYPE = np.float64 # TODO, consider single precision. -N = 64 # Number of images +N = 32 # Number of images n_pairs = N * (N - 1) // 2 +DTYPES = [np.float32, np.float64] + + +@pytest.fixture(scope="module", params=DTYPES, ids=lambda x: f"dtype={x}") +def dtype(request): + return request.param @pytest.fixture(scope="module") -def src_fixture(): - src = Simulation(n=N, L=32, C=1, dtype=DTYPE) +def src_fixture(dtype): + src = Simulation(n=N, L=32, C=1, dtype=dtype) src = src.cache() return src @@ -27,9 +32,8 @@ def cl3n_fixture(src_fixture): @pytest.fixture(scope="module") -def rijs_fixture(): - Rijs = np.arange(n_pairs * 3 * 3).reshape(n_pairs, 3, 3) - Rijs = Rijs.astype(dtype=DTYPE, copy=False) +def rijs_fixture(dtype): + Rijs = np.arange(n_pairs * 3 * 3, dtype=dtype).reshape(n_pairs, 3, 3) return Rijs @@ -50,15 +54,17 @@ def test_pairs_prob_host_vs_cupy(cl3n_fixture, rijs_fixture): indsh, arbh = cl3n_fixture._pairs_probabilities_host(rijs_fixture, *params) # Compare host to cupy calls - np.testing.assert_allclose(indsh, indscp) - np.testing.assert_allclose(arbh, arbcp) + rtol = 1e-07 # np testing default + if rijs_fixture.dtype != np.float64: + rtol = 2e-5 + np.testing.assert_allclose(indsh, indscp, rtol=rtol) + np.testing.assert_allclose(arbh, arbcp, rtol=rtol) def test_triangle_scores_host_vs_cupy(cl3n_fixture, rijs_fixture): """ Compares triangle_scores between host and cupy implementations. """ - # DTYPE is critical here (manually calling private method # Execute CUPY hist_cp = cl3n_fixture._triangle_scores_inner_cupy(rijs_fixture) @@ -77,7 +83,7 @@ def test_stv_host_vs_cupy(cl3n_fixture, rijs_fixture): Default J_weighting=False """ # dummy data vector - vec = np.random.random(n_pairs).astype(dtype=DTYPE, copy=False) + vec = np.ones(n_pairs, dtype=rijs_fixture.dtype) # J_weighting=False assert cl3n_fixture.J_weighting is False @@ -99,7 +105,7 @@ def test_stvJwt_host_vs_cupy(cl3n_fixture, rijs_fixture): Force J_weighting=True """ # dummy data vector - vec = np.random.random(n_pairs).astype(dtype=DTYPE, copy=False) + vec = np.ones(n_pairs, dtype=rijs_fixture.dtype) # J_weighting=True cl3n_fixture.J_weighting = True @@ -111,7 +117,13 @@ def test_stvJwt_host_vs_cupy(cl3n_fixture, rijs_fixture): new_vec_h = cl3n_fixture._signs_times_v_host(rijs_fixture, vec) # Compare host to cupy calls - np.testing.assert_allclose(new_vec_cp, new_vec_h) + rtol = 1e-7 # np testing default + if vec.dtype != np.float64: + rtol = 3e-07 + np.testing.assert_allclose(new_vec_cp, new_vec_h, rtol=rtol) + + +# The following fixture and tests compare against the legacy MATLAB implementation @pytest.fixture @@ -120,7 +132,7 @@ def matlab_ref_fixture(): Setup ASPIRE-Python objects using dummy data that is easily constructed in MATLAB. """ - DTYPE = np.float64 + DTYPE = np.float64 # MATLAB code is doubles only n = 5 n_pairs = n * (n - 1) // 2 @@ -210,7 +222,7 @@ def test_signs_times_v_mex(matlab_ref_fixture): Rijs, cl3n = matlab_ref_fixture # Dummy input vector - vec = np.ones(len(Rijs), dtype=DTYPE) + vec = np.ones(len(Rijs), dtype=Rijs.dtype) # Equivalent matlab # vec=ones([1,np]); From 0a91eece68b003a3696dd30a0a368270d4b13087 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 3 Jul 2024 08:58:34 -0400 Subject: [PATCH 43/60] Update some docstrings --- src/aspire/abinitio/commonline_sync3n.py | 54 ++++++++++++++++++++---- 1 file changed, 45 insertions(+), 9 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 36f251510f..32db9d23e7 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -16,6 +16,15 @@ class CLSync3N(CLOrient3D, SyncVotingMixin): """ Define a class to estimate 3D orientations using common lines Sync3N methods (2017). + + Ido Greenberg, Yoel Shkolnisky, + Common lines modeling for reference free Ab-initio reconstruction in cryo-EM, + Journal of Structural Biology, + Volume 200, Issue 2, + 2017, + Pages 106-117, + ISSN 1047-8477, + https://doi.org/10.1016/j.jsb.2017.09.007. """ # Initialize alternatives @@ -136,13 +145,17 @@ def estimate_rotations(self): # Yield rotations from S self.rotations = self._sync3n_S_to_rot(S, W) - ########################################### - # The hackberries taste like hackberries # - ########################################### + ####################### + # Main Sync3N Methods # + ####################### def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): """ Use eigen decomposition of S to estimate transforms, then project transforms to nearest rotations. + + :param S: Numpy array represeting Synchronization matrix. + :param W: Optional weights array, default `None` is equal weighting of `S`. + :param n_eigs: Optional, number of eigenvalues to compute (min 3). """ if n_eigs < 3: @@ -214,6 +227,9 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): def _construct_sync3n_matrix(self, Rij): """ Construct sync3n matrix from estimated rotations Rij. + + :param Rij: Numpy array of estimated rotations (all pairs). + :return: Synchronization matrix S, (3*N, 3*N). """ # Initialize S with diag identity blocks @@ -258,6 +274,7 @@ def _syncmatrix_weights( :param max_iterations: Maximum iterations for P estimation. :param min_p_permitted: Small value at which to stop attempting to synchronize P. + :return: Synchronization matrix weights `W`. """ logger.info("Computing synchronization matrix weights.") @@ -327,6 +344,7 @@ def _triangle_scores_inner(self, Rijs): Wrapper for cpu/gpu dispatch. :param Rijs: nchoose2 by 3 by 3 array of rotations. + :return: Histogram of triangle scores. """ # host/gpu dispatch @@ -475,7 +493,7 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): :param B: distribution parameter :param b: distribution parameter :param x0: Initial guess - + :return: (log indicative probabilities, log arbitrary probabilities) """ # These param values are passed to C, force doubles. params = np.array([P2, A, a, B, b, x0], dtype=np.float64) @@ -629,19 +647,23 @@ def _triangle_scores( x0=0.78, ): """ - Computes `triangle_scores`, attempts to fit curve to distribution, and uses estimated distribution to compute `pairs_probabilities`. + Computes `triangle_scores`, attempts to fit curve to + distribution, and uses estimated distribution to compute + `pairs_probabilities`. Default parameters here were taken from those in the MATLAB code, with the original author noting they were found empirically. - :param a: + :param a: distribution parameter :param peak2sigma: empirical relation between the location of the peak of the histigram, and the mean error in the common lines estimations. - :param P: - :param b: + :param P: distribution parameter + :param b: distribution parameter :param x0: Initial guess + :return: Tuple of pairs probabilty Pij and related terms + (P, sigma, Pij, scores_hist) """ Pmin = Pmin or 0 @@ -731,7 +753,17 @@ def _estimate_relative_viewing_directions(self): return Rijs def _global_J_sync(self, Rijs): - """ """ + """ + Apply global J-synchronization. + + Given all pairs of estimated rotation matrices `Rijs` with + arbitrary handedness (J conjugation), attempt to detect and + conjugate entries of `Rijs` such that all rotations have same + handedness. + + :param Rijs: Array of all pairs of rotation matrices + :return: Array of all pairs of J synchronized rotation matrices + """ # Determine relative handedness of Rijs. sign_ij_J = self._J_sync_power_method(Rijs) @@ -746,6 +778,9 @@ def _global_J_sync(self, Rijs): def _estimate_all_Rijs(self, clmatrix): """ Estimate Rijs using the voting method. + + :param clmatrix: Common lines matrix + :return: Estimated rotations """ n_img = self.n_img n_theta = self.n_theta @@ -850,6 +885,7 @@ def _signs_times_v(self, Rijs, vec): :param Rijs: An n-choose-2x3x3 array of estimates of relative rotations :param vec: The current candidate eigenvector of length n-choose-2 from the power method. + :return: New candidate eigenvector. """ # host/gpu dispatch if self._gpu_module: From 51ffdacad72c0dbf3aa539a78b0eaf71674302c8 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 3 Jul 2024 09:14:42 -0400 Subject: [PATCH 44/60] initial add cl sync3n test --- tests/test_commonline_sync3n.py | 101 ++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) create mode 100644 tests/test_commonline_sync3n.py diff --git a/tests/test_commonline_sync3n.py b/tests/test_commonline_sync3n.py new file mode 100644 index 0000000000..f9be7a103d --- /dev/null +++ b/tests/test_commonline_sync3n.py @@ -0,0 +1,101 @@ +import os + +import numpy as np +import pytest + +from aspire.abinitio import CLSync3N +from aspire.source import Simulation +from aspire.utils import mean_aligned_angular_distance, rots_to_clmatrix +from aspire.volume import AsymmetricVolume + +DATA_DIR = os.path.join(os.path.dirname(__file__), "saved_test_data") + +RESOLUTION = [ + 40, + 41, +] + +# `None` defaults to random offsets. +OFFSETS = [ + 0, + # pytest.param(None, marks=pytest.mark.expensive), +] + +DTYPES = [ + # np.float32, + # pytest.param(np.float64, marks=pytest.mark.expensive), + np.float64, +] + + +@pytest.fixture(params=RESOLUTION, ids=lambda x: f"resolution={x}") +def resolution(request): + return request.param + + +@pytest.fixture(params=OFFSETS, ids=lambda x: f"offsets={x}") +def offsets(request): + return request.param + + +@pytest.fixture(params=DTYPES, ids=lambda x: f"dtype={x}") +def dtype(request): + return request.param + + +@pytest.fixture +def source_orientation_objs(resolution, offsets, dtype): + src = Simulation( + n=50, + L=resolution, + vols=AsymmetricVolume(L=resolution, C=1, K=100, seed=0).generate(), + offsets=offsets, + amplitudes=1, + seed=0, + ).cache() + + # # Search for common lines over less shifts for 0 offsets. + # max_shift = 1 / resolution + # shift_step = 1 + # if src.offsets.all() != 0: + # max_shift = 0.20 + # shift_step = 0.25 # Reduce shift steps for non-integer offsets of Simulation. + # orient_est = CLSync3N( + # src, max_shift=max_shift, shift_step=shift_step, mask=False + + # ) + orient_est = CLSync3N(src) + + return src, orient_est + + +def test_build_clmatrix(source_orientation_objs): + src, orient_est = source_orientation_objs + + # Build clmatrix estimate. + orient_est.build_clmatrix() + + gt_clmatrix = rots_to_clmatrix(src.rotations, orient_est.n_theta) + + angle_diffs = abs(orient_est.clmatrix - gt_clmatrix) * 360 / orient_est.n_theta + + # Count number of estimates within 5 degrees of ground truth. + within_5 = np.sum((angle_diffs - 360) % 360 < 5) + + # Check that at least 98% of estimates are within 5 degrees. + tol = 0.98 + if src.offsets.all() != 0: + # Set tolerance to 95% when using nonzero offsets. + tol = 0.95 + assert within_5 / angle_diffs.size > tol + + +def test_estimate_rotations(source_orientation_objs): + src, orient_est = source_orientation_objs + + orient_est.estimate_rotations() + + # Register estimates to ground truth rotations and compute the + # mean angular distance between them (in degrees). + # Assert that mean angular distance is less than 1 degree. + mean_aligned_angular_distance(orient_est.rotations, src.rotations, degree_tol=1) From 4c5102ce745e1882dada18d9658f6d1dfc19fefc Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 3 Jul 2024 10:13:14 -0400 Subject: [PATCH 45/60] add minimal test --- src/aspire/abinitio/commonline_sync3n.py | 8 ++++++- tests/test_commonline_sync3n.py | 29 ++++++++---------------- 2 files changed, 16 insertions(+), 21 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 32db9d23e7..45456f8a70 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -99,7 +99,13 @@ def __init__( self.S_weighting = S_weighting self.J_weighting = J_weighting self._D_null = 1e-13 - self.hist_intervals = hist_intervals + self.hist_intervals = int(hist_intervals) + # Warn if histogram may be too sparse for curve fitting + if self.S_weighting and (src.n < hist_intervals): + logger.warning( + f"`hist_intervals` {hist_intervals} > src.n {src.n}." + " Consider reducing if curve fitting is infeasable." + ) # Auto configure GPU self._gpu_module = None diff --git a/tests/test_commonline_sync3n.py b/tests/test_commonline_sync3n.py index f9be7a103d..119eec2ae1 100644 --- a/tests/test_commonline_sync3n.py +++ b/tests/test_commonline_sync3n.py @@ -18,53 +18,42 @@ # `None` defaults to random offsets. OFFSETS = [ 0, - # pytest.param(None, marks=pytest.mark.expensive), ] DTYPES = [ - # np.float32, + np.float32, # pytest.param(np.float64, marks=pytest.mark.expensive), np.float64, ] -@pytest.fixture(params=RESOLUTION, ids=lambda x: f"resolution={x}") +@pytest.fixture(params=RESOLUTION, ids=lambda x: f"resolution={x}", scope="module") def resolution(request): return request.param -@pytest.fixture(params=OFFSETS, ids=lambda x: f"offsets={x}") +@pytest.fixture(params=OFFSETS, ids=lambda x: f"offsets={x}", scope="module") def offsets(request): return request.param -@pytest.fixture(params=DTYPES, ids=lambda x: f"dtype={x}") +@pytest.fixture(params=DTYPES, ids=lambda x: f"dtype={x}", scope="module") def dtype(request): return request.param -@pytest.fixture +@pytest.fixture(scope="module") def source_orientation_objs(resolution, offsets, dtype): src = Simulation( - n=50, + n=100, L=resolution, - vols=AsymmetricVolume(L=resolution, C=1, K=100, seed=0).generate(), + vols=AsymmetricVolume(L=resolution, C=1, K=100, seed=123).generate(), offsets=offsets, amplitudes=1, - seed=0, + seed=456, ).cache() - # # Search for common lines over less shifts for 0 offsets. - # max_shift = 1 / resolution - # shift_step = 1 - # if src.offsets.all() != 0: - # max_shift = 0.20 - # shift_step = 0.25 # Reduce shift steps for non-integer offsets of Simulation. - # orient_est = CLSync3N( - # src, max_shift=max_shift, shift_step=shift_step, mask=False - - # ) - orient_est = CLSync3N(src) + orient_est = CLSync3N(src, S_weighting=True, seed=789) return src, orient_est From d0c2c0d48331af04f0f0f45bcaec1931adddda51 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 3 Jul 2024 10:20:52 -0400 Subject: [PATCH 46/60] actually test the different dtypes --- tests/test_orient_sync_voting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_orient_sync_voting.py b/tests/test_orient_sync_voting.py index 31d6b20e94..3e875e9467 100644 --- a/tests/test_orient_sync_voting.py +++ b/tests/test_orient_sync_voting.py @@ -52,7 +52,7 @@ def source_orientation_objs(resolution, offsets, dtype): src = Simulation( n=50, L=resolution, - vols=AsymmetricVolume(L=resolution, C=1, K=100, seed=0).generate(), + vols=AsymmetricVolume(L=resolution, C=1, K=100, seed=0, dtype=dtype).generate(), offsets=offsets, amplitudes=1, seed=0, From 52a099e2a91c0a5ad87da14a4d0448e8ac5e8eda Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 3 Jul 2024 10:24:05 -0400 Subject: [PATCH 47/60] mark float64 and odd sync3n as expensive --- tests/test_commonline_sync3n.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/test_commonline_sync3n.py b/tests/test_commonline_sync3n.py index 119eec2ae1..6640fa871f 100644 --- a/tests/test_commonline_sync3n.py +++ b/tests/test_commonline_sync3n.py @@ -12,18 +12,16 @@ RESOLUTION = [ 40, - 41, + pytest.param(41, marks=pytest.mark.expensive), ] -# `None` defaults to random offsets. OFFSETS = [ 0, ] DTYPES = [ np.float32, - # pytest.param(np.float64, marks=pytest.mark.expensive), - np.float64, + pytest.param(np.float64, marks=pytest.mark.expensive), ] @@ -47,7 +45,9 @@ def source_orientation_objs(resolution, offsets, dtype): src = Simulation( n=100, L=resolution, - vols=AsymmetricVolume(L=resolution, C=1, K=100, seed=123).generate(), + vols=AsymmetricVolume( + L=resolution, C=1, K=100, seed=123, dtype=dtype + ).generate(), offsets=offsets, amplitudes=1, seed=456, From 1124033c904e9c769f364aa60ab5dc330372cbfe Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 19 Jul 2024 08:45:47 -0400 Subject: [PATCH 48/60] first pass addressing review remarks --- src/aspire/abinitio/commonline_sync3n.py | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 45456f8a70..4c06d882d9 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -122,7 +122,7 @@ def __init__( logger.info("GPU not found, defaulting to numpy.") except ModuleNotFoundError: - logger.info("cupy not found, defaulting numpy.") + logger.info("cupy not found, defaulting to numpy.") ########################################### # High level algorithm steps # @@ -159,7 +159,7 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): Use eigen decomposition of S to estimate transforms, then project transforms to nearest rotations. - :param S: Numpy array represeting Synchronization matrix. + :param S: Numpy array representing Synchronization matrix. :param W: Optional weights array, default `None` is equal weighting of `S`. :param n_eigs: Optional, number of eigenvalues to compute (min 3). """ @@ -177,7 +177,7 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): f" Received {W.shape}." ) # Initialize D - D = np.mean(W, axis=1) # D, check axis + D = np.mean(W, axis=1) Dhalf = D # Compute mask of trouble D values @@ -197,10 +197,10 @@ def _sync3n_S_to_rot(self, S, W=None, n_eigs=4): logger.warning(f"Large Weights Matrix Normalization Error: {err}") # Make W of size 3Nx3N - W = np.kron(W, np.ones((3, 3))) + W = np.kron(W, np.ones((3, 3), dtype=self.dtype)) # Make Dhalf of size 3Nx3N - Dhalf = np.diag(np.kron(np.diag(Dhalf), np.ones((1, 3)))[0]) + Dhalf = np.diag(np.kron(np.diag(Dhalf), np.ones(3, dtype=self.dtype))) # Apply weights to S S = Dhalf @ (W * S) @ Dhalf @@ -333,7 +333,7 @@ def _body(prev_too_low, Pmin, Pmax, hist, p_domain_limit=p_domain_limit): i += 1 # Pack W - W = np.zeros((self.n_img, self.n_img)) + W = np.zeros((self.n_img, self.n_img), dtype=self.dtype) idx = 0 for i in range(self.n_img): for j in range(i + 1, self.n_img): @@ -375,7 +375,7 @@ def _triangle_scores_inner_host(self, Rijs): h = 1 / self.hist_intervals c = np.empty((4), dtype=Rijs.dtype) - for i in trange(self.n_img, desc="Computing triangle scores"): + for i in trange(self.n_img - 2, desc="Computing triangle scores"): for j in range( i + 1, self.n_img - 1 ): # check bound (taken from MATLAB mex) @@ -525,7 +525,7 @@ def _pairs_probabilities_host(self, Rijs, P2, A, a, B, b, x0): ln_f_arb = np.zeros(len(Rijs), dtype=Rijs.dtype) c = np.empty((4), dtype=Rijs.dtype) - for i in trange(self.n_img, desc="Computing pair probabilities"): + for i in trange(self.n_img - 2, desc="Computing pair probabilities"): for j in range(i + 1, self.n_img - 1): ij = self._pairs_to_linear[i, j] Rij = Rijs[ij] @@ -715,8 +715,7 @@ def fun(x, B, P, b, x0, A=A, a=a): # Derive P and sigma P = P ** (1 / 3) - peak = x0 # can rm later - sigma = (1 - peak) / peak2sigma + sigma = (1 - x0) / peak2sigma # Initialize probability computations # Local histograms analysis @@ -918,7 +917,7 @@ def _signs_times_v_host(self, Rijs, vec): desc = "Computing signs_times_v" if self.J_weighting: desc += " with J_weighting" - for i in trange(self.n_img, desc=desc): + for i in trange(self.n_img - 2, desc=desc): for j in range( i + 1, self.n_img - 1 ): # check bound (taken from MATLAB mex) From 61c6a89ef1f246ee78242ea57bc5f3f15f7a8307 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Fri, 19 Jul 2024 08:51:42 -0400 Subject: [PATCH 49/60] move initial rotation estimate lines into estimate_rotations --- src/aspire/abinitio/commonline_sync3n.py | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 4c06d882d9..77f964c523 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -134,8 +134,14 @@ def estimate_rotations(self): :return: Array of rotation matrices, size n_imgx3x3. """ + logger.info(f"Estimating relative viewing directions for {self.n_img} images.") + + # Detect a single pair of common-lines between each pair of images + self.build_clmatrix() + # Initial estimate of viewing directions - Rijs0 = self._estimate_relative_viewing_directions() + # Calculate relative rotations + Rijs0 = self._estimate_all_Rijs(self.clmatrix) # Compute and apply global handedness Rijs = self._global_J_sync(Rijs0) @@ -743,20 +749,6 @@ def fun(x, B, P, b, x0, A=A, a=a): # Primary Methods # ########################################### - def _estimate_relative_viewing_directions(self): - """ - Estimate the relative viewing directions vij = vi*vj^T, i Date: Fri, 9 Aug 2024 14:32:26 -0400 Subject: [PATCH 50/60] important progress bar --- src/aspire/abinitio/commonline_sync3n.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 77f964c523..8369d50045 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -6,7 +6,7 @@ from scipy.optimize import curve_fit from aspire.abinitio import CLOrient3D, SyncVotingMixin -from aspire.utils import J_conjugate, all_pairs, nearest_rotations, trange +from aspire.utils import J_conjugate, all_pairs, nearest_rotations, tqdm, trange from aspire.utils.matlab_compat import stable_eigsh from aspire.utils.random import randn @@ -783,7 +783,7 @@ def _estimate_all_Rijs(self, clmatrix): n_theta = self.n_theta Rijs = np.zeros((len(self._pairs), 3, 3)) - for idx, (i, j) in enumerate(self._pairs): + for idx, (i, j) in enumerate(tqdm(self._pairs, desc="Estimate Rijs")): Rijs[idx] = self._syncmatrix_ij_vote_3n( clmatrix, i, j, np.arange(n_img), n_theta ) From d4bf0bb2af02d66f38265da40904976b42388890 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 13 Aug 2024 15:37:49 -0400 Subject: [PATCH 51/60] Use trust region method for S weight least squares --- src/aspire/abinitio/commonline_sync3n.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 8369d50045..5c730e7aa2 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -716,6 +716,7 @@ def fun(x, B, P, b, x0, A=A, a=a): scores_hist.astype(np.float64, copy=False), p0=start_values, bounds=(lower_bounds, upper_bounds), + method="trf", # MATLAB used method "LAR" with algo "Trust-Region" ) B, P, b, x0 = popt From e1774869ed195a92359c3493f7f28de3cbc9986e Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 19 Aug 2024 09:10:18 -0400 Subject: [PATCH 52/60] use class mangled names for gpu methods --- src/aspire/abinitio/commonline_sync3n.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 5c730e7aa2..e8149a4921 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -108,7 +108,7 @@ def __init__( ) # Auto configure GPU - self._gpu_module = None + self.__gpu_module = None try: import cupy as cp @@ -117,7 +117,7 @@ def __init__( logger.info( f"cupy and GPU {gpu_id} found by cuda runtime; enabling cupy." ) - self._gpu_module = self._init_cupy_module() + self.__gpu_module = self.__init_cupy_module() else: logger.info("GPU not found, defaulting to numpy.") @@ -360,7 +360,7 @@ def _triangle_scores_inner(self, Rijs): """ # host/gpu dispatch - if self._gpu_module: + if self.__gpu_module: scores_hist = self._triangle_scores_inner_cupy(Rijs) else: scores_hist = self._triangle_scores_inner_host(Rijs) @@ -460,7 +460,7 @@ def _triangle_scores_inner_cupy(self, Rijs): import cupy as cp - triangle_scores = self._gpu_module.get_function("triangle_scores_inner") + triangle_scores = self.__gpu_module.get_function("triangle_scores_inner") Rijs_dev = cp.array(Rijs, dtype=np.float64) @@ -511,7 +511,7 @@ def _pairs_probabilities(self, Rijs, P2, A, a, B, b, x0): params = np.array([P2, A, a, B, b, x0], dtype=np.float64) # host/gpu dispatch - if self._gpu_module: + if self.__gpu_module: ln_f_ind, ln_f_arb = self._pairs_probabilities_cupy(Rijs, *params) else: ln_f_ind, ln_f_arb = self._pairs_probabilities_host(Rijs, *params) @@ -625,7 +625,7 @@ def _pairs_probabilities_cupy(self, Rijs, P2, A, a, B, b, x0): import cupy as cp - pairs_probabilities = self._gpu_module.get_function("pairs_probabilities") + pairs_probabilities = self.__gpu_module.get_function("pairs_probabilities") Rijs_dev = cp.array(Rijs, dtype=np.float64) ln_f_ind_dev = cp.zeros((self.n_img * (self.n_img - 1) // 2), dtype=np.float64) @@ -886,7 +886,7 @@ def _signs_times_v(self, Rijs, vec): :return: New candidate eigenvector. """ # host/gpu dispatch - if self._gpu_module: + if self.__gpu_module: new_vec = self._signs_times_v_cupy(Rijs, vec) else: new_vec = self._signs_times_v_host(Rijs, vec) @@ -979,7 +979,7 @@ def _signs_times_v_cupy(self, Rijs, vec): """ import cupy as cp - signs_times_v = self._gpu_module.get_function("signs_times_v") + signs_times_v = self.__gpu_module.get_function("signs_times_v") Rijs_dev = cp.array(Rijs, dtype=np.float64) vec_dev = cp.array(vec, dtype=np.float64) @@ -1000,7 +1000,7 @@ def _signs_times_v_cupy(self, Rijs, vec): return new_vec @staticmethod - def _init_cupy_module(): + def __init_cupy_module(): """ Private utility method to read in CUDA source and return as compiled CUPY module. From 8a2cd5315f33f4b0dfda41cbb2e9bb1b8c3c8477 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Mon, 19 Aug 2024 09:11:32 -0400 Subject: [PATCH 53/60] typo --- src/aspire/abinitio/commonline_sync3n.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.cu b/src/aspire/abinitio/commonline_sync3n.cu index 99582b3f24..eeaee723b9 100644 --- a/src/aspire/abinitio/commonline_sync3n.cu +++ b/src/aspire/abinitio/commonline_sync3n.cu @@ -1,5 +1,5 @@ -/* from i,j indoces to the common index in the N-choose-2 sized array */ +/* from i,j indices to the common index in the N-choose-2 sized array */ #define PAIR_IDX(N,I,J) ((2*N-I-1)*I/2 + J-I-1) From a89fb8f22678b637564c6d9d959ddc4fe0bfa847 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 27 Aug 2024 09:53:55 -0400 Subject: [PATCH 54/60] Add disable_gpu sync3n flag --- src/aspire/abinitio/commonline_sync3n.py | 31 ++++++++++++++---------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index e8149a4921..5e34491d42 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -57,6 +57,7 @@ def __init__( S_weighting=False, J_weighting=False, hist_intervals=100, + disable_gpu=False, ): """ Initialize object for estimating 3D orientations. @@ -77,6 +78,9 @@ def __init__( signs when computing `signs_times_v`. :param hist_intervals: Number of histogram bins used to compute triangle scores when `S_weighting` enabled. + :param disable_gpu: Disables GPU acceleration; + forces CPU only code for this module. + Defaults to automatically using GPU when available. """ super().__init__( @@ -109,20 +113,21 @@ def __init__( # Auto configure GPU self.__gpu_module = None - try: - import cupy as cp - - if cp.cuda.runtime.getDeviceCount() >= 1: - gpu_id = cp.cuda.runtime.getDevice() - logger.info( - f"cupy and GPU {gpu_id} found by cuda runtime; enabling cupy." - ) - self.__gpu_module = self.__init_cupy_module() - else: - logger.info("GPU not found, defaulting to numpy.") + if not disable_gpu: + try: + import cupy as cp + + if cp.cuda.runtime.getDeviceCount() >= 1: + gpu_id = cp.cuda.runtime.getDevice() + logger.info( + f"cupy and GPU {gpu_id} found by cuda runtime; enabling cupy." + ) + self.__gpu_module = self.__init_cupy_module() + else: + logger.info("GPU not found, defaulting to numpy.") - except ModuleNotFoundError: - logger.info("cupy not found, defaulting to numpy.") + except ModuleNotFoundError: + logger.info("cupy not found, defaulting to numpy.") ########################################### # High level algorithm steps # From 3d8da44468b2b11f3a3d78d2deb69124daaa676c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 27 Aug 2024 10:02:34 -0400 Subject: [PATCH 55/60] P->W typo --- src/aspire/abinitio/commonline_sync3n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 5e34491d42..e2186a5bc6 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -277,7 +277,7 @@ def _syncmatrix_weights( ): """ Given relative rotations matrix `Rij`, - compute and return probability weights `P` for S. + compute and return probability weights `W` for S. Default parameters here were taken from those in the MATLAB code, with the original author noting they were found From ff56876b5040f53205b8aa08867101ec2b69fac3 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 27 Aug 2024 10:06:14 -0400 Subject: [PATCH 56/60] use more specific language instead of resolution --- src/aspire/abinitio/commonline_sync3n.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index e2186a5bc6..3b02bb8d2d 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -65,8 +65,8 @@ def __init__( :param src: The source object of 2D denoised or class-averaged images with metadata :param n_rad: The number of points in the radial direction :param n_theta: The number of points in the theta direction - :param max_shift: Maximum range for shifts as a proportion of resolution. Default = 0.15. - :param shift_step: Resolution of shift estimation in pixels. Default = 1 pixel. + :param max_shift: Maximum range for shifts as a proportion of box size. Default = 0.15. + :param shift_step: Step size of shift estimation in pixels. Default = 1 pixel. :param epsilon: Tolerance for the power method. :param max_iter: Maximum iterations for the power method. :param seed: Optional seed for RNG. From a7f77b9d43641a24706298a80a50882d890a446c Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 27 Aug 2024 10:53:40 -0400 Subject: [PATCH 57/60] Replace histogram logic --- src/aspire/abinitio/commonline_sync3n.py | 29 +++++++----------------- 1 file changed, 8 insertions(+), 21 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 3b02bb8d2d..e3b1c50edc 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -383,9 +383,9 @@ def _triangle_scores_inner_host(self, Rijs): # Initialize probability result arrays scores_hist = np.zeros(self.hist_intervals, dtype=np.uint32) - h = 1 / self.hist_intervals c = np.empty((4), dtype=Rijs.dtype) + s = np.empty((3), dtype=Rijs.dtype) for i in trange(self.n_img - 2, desc="Computing triangle scores"): for j in range( i + 1, self.n_img - 1 @@ -427,28 +427,15 @@ def _triangle_scores_inner_host(self, Rijs): alt_ij_ik = c[self._ALTS[1][best_i][2]] # Compute scores - s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) - s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) - s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) + s[0] = 1 - np.sqrt(best_val / alt_ij_jk) # s_ij_jk + s[1] = 1 - np.sqrt(best_val / alt_ik_jk) # s_ik_jk + s[2] = 1 - np.sqrt(best_val / alt_ij_ik) # s_ij_ik # Update histogram - threshold = 0 - for _l1 in range(self.hist_intervals - 1): - threshold += h - if s_ij_jk < threshold: - break - - threshold = 0 - for _l2 in range(self.hist_intervals - 1): - threshold += h - if s_ik_jk < threshold: - break - - threshold = 0 - for _l3 in range(self.hist_intervals - 1): - threshold += h - if s_ij_ik < threshold: - break + # Find integer bin [0,self.hist_intervals) + _l1, _l2, _l3 = np.minimum( + (self.hist_intervals * s).astype(int), # implicit floor + self.hist_intervals-1) # clamp upper bound scores_hist[_l1] += 1 scores_hist[_l2] += 1 From bd34d3d7dd3bdd9d1c432046437395124432e483 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 27 Aug 2024 11:27:51 -0400 Subject: [PATCH 58/60] factor out sync3n score body --- src/aspire/abinitio/commonline_sync3n.py | 126 ++++++++++------------- 1 file changed, 56 insertions(+), 70 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index e3b1c50edc..e0ed1514e0 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -372,6 +372,54 @@ def _triangle_scores_inner(self, Rijs): return scores_hist + def _scores_inner_body(self, Rijs, c, s, i, j, k): + """ + Private method to compute scores `s` + given rotations `Rijs` and indices `i`, `j`, `k`. + + Note arrays `Rijs`, `c`, and `s` are passed by reference from caller. + """ + + ij = self._pairs_to_linear[i, j] + ik = self._pairs_to_linear[i, k] + jk = self._pairs_to_linear[j, k] + Rij = Rijs[ij] + Rik = Rijs[ik] + Rjk = Rijs[jk] + + # Compute conjugated rots + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + alt_ij_jk = c[self._ALTS[0][best_i][0]] + if c[self._ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[self._ALTS[1][best_i][0]] + + alt_ik_jk = c[self._ALTS[0][best_i][1]] + if c[self._ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[self._ALTS[1][best_i][1]] + + alt_ij_ik = c[self._ALTS[0][best_i][2]] + if c[self._ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[self._ALTS[1][best_i][2]] + + # Compute scores + s[0] = 1 - np.sqrt(best_val / alt_ij_jk) # s_ij_jk + s[1] = 1 - np.sqrt(best_val / alt_ik_jk) # s_ik_jk + s[2] = 1 - np.sqrt(best_val / alt_ij_ik) # s_ij_ik + def _triangle_scores_inner_host(self, Rijs): """ See _triangle_scores_inner. @@ -390,52 +438,17 @@ def _triangle_scores_inner_host(self, Rijs): for j in range( i + 1, self.n_img - 1 ): # check bound (taken from MATLAB mex) - ij = self._pairs_to_linear[i, j] - Rij = Rijs[ij] for k in range(j + 1, self.n_img): - ik = self._pairs_to_linear[i, k] - jk = self._pairs_to_linear[j, k] - Rik = Rijs[ik] - Rjk = Rijs[jk] - - # Compute conjugated rotats - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) - - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) - - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] - - # For each triangle side, find the best alternative - alt_ij_jk = c[self._ALTS[0][best_i][0]] - if c[self._ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[self._ALTS[1][best_i][0]] - - alt_ik_jk = c[self._ALTS[0][best_i][1]] - if c[self._ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[self._ALTS[1][best_i][1]] - - alt_ij_ik = c[self._ALTS[0][best_i][2]] - if c[self._ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[self._ALTS[1][best_i][2]] # Compute scores - s[0] = 1 - np.sqrt(best_val / alt_ij_jk) # s_ij_jk - s[1] = 1 - np.sqrt(best_val / alt_ik_jk) # s_ik_jk - s[2] = 1 - np.sqrt(best_val / alt_ij_ik) # s_ij_ik + self._scores_inner_body(Rijs, c, s, i, j, k) # Update histogram # Find integer bin [0,self.hist_intervals) _l1, _l2, _l3 = np.minimum( (self.hist_intervals * s).astype(int), # implicit floor - self.hist_intervals-1) # clamp upper bound + self.hist_intervals - 1, + ) # clamp upper bound scores_hist[_l1] += 1 scores_hist[_l2] += 1 @@ -523,46 +536,19 @@ def _pairs_probabilities_host(self, Rijs, P2, A, a, B, b, x0): ln_f_arb = np.zeros(len(Rijs), dtype=Rijs.dtype) c = np.empty((4), dtype=Rijs.dtype) + s = np.empty((3), dtype=Rijs.dtype) for i in trange(self.n_img - 2, desc="Computing pair probabilities"): for j in range(i + 1, self.n_img - 1): ij = self._pairs_to_linear[i, j] - Rij = Rijs[ij] for k in range(j + 1, self.n_img): ik = self._pairs_to_linear[i, k] jk = self._pairs_to_linear[j, k] - Rik = Rijs[ik] - Rjk = Rijs[jk] - - # Compute conjugated rotats - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) - - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) - - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] - - # For each triangle side, find the best alternative - alt_ij_jk = c[self._ALTS[0][best_i][0]] - if c[self._ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[self._ALTS[1][best_i][0]] - alt_ik_jk = c[self._ALTS[0][best_i][1]] - if c[self._ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[self._ALTS[1][best_i][1]] - alt_ij_ik = c[self._ALTS[0][best_i][2]] - if c[self._ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[self._ALTS[1][best_i][2]] # Compute scores - s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) - s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) - s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) + self._scores_inner_body(Rijs, c, s, i, j, k) + + # Unpack scores to local formula vars + s_ij_jk, s_ik_jk, s_ij_ik = s # Update probabilities # # Probability of pair ij having score given indicicative common line From 068ec9afb58370c1c3649b8bdcb7cf446ada021e Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 28 Aug 2024 08:01:31 -0400 Subject: [PATCH 59/60] Revert "factor out sync3n score body" This reverts commit bd34d3d7dd3bdd9d1c432046437395124432e483. --- src/aspire/abinitio/commonline_sync3n.py | 126 +++++++++++++---------- 1 file changed, 70 insertions(+), 56 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index e0ed1514e0..e3b1c50edc 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -372,54 +372,6 @@ def _triangle_scores_inner(self, Rijs): return scores_hist - def _scores_inner_body(self, Rijs, c, s, i, j, k): - """ - Private method to compute scores `s` - given rotations `Rijs` and indices `i`, `j`, `k`. - - Note arrays `Rijs`, `c`, and `s` are passed by reference from caller. - """ - - ij = self._pairs_to_linear[i, j] - ik = self._pairs_to_linear[i, k] - jk = self._pairs_to_linear[j, k] - Rij = Rijs[ij] - Rik = Rijs[ik] - Rjk = Rijs[jk] - - # Compute conjugated rots - Rij_J = J_conjugate(Rij) - Rik_J = J_conjugate(Rik) - Rjk_J = J_conjugate(Rjk) - - # Compute R muls and norms - c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) - c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) - c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) - c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) - - # Find best match - best_i = np.argmin(c) - best_val = c[best_i] - - # For each triangle side, find the best alternative - alt_ij_jk = c[self._ALTS[0][best_i][0]] - if c[self._ALTS[1][best_i][0]] < alt_ij_jk: - alt_ij_jk = c[self._ALTS[1][best_i][0]] - - alt_ik_jk = c[self._ALTS[0][best_i][1]] - if c[self._ALTS[1][best_i][1]] < alt_ik_jk: - alt_ik_jk = c[self._ALTS[1][best_i][1]] - - alt_ij_ik = c[self._ALTS[0][best_i][2]] - if c[self._ALTS[1][best_i][2]] < alt_ij_ik: - alt_ij_ik = c[self._ALTS[1][best_i][2]] - - # Compute scores - s[0] = 1 - np.sqrt(best_val / alt_ij_jk) # s_ij_jk - s[1] = 1 - np.sqrt(best_val / alt_ik_jk) # s_ik_jk - s[2] = 1 - np.sqrt(best_val / alt_ij_ik) # s_ij_ik - def _triangle_scores_inner_host(self, Rijs): """ See _triangle_scores_inner. @@ -438,17 +390,52 @@ def _triangle_scores_inner_host(self, Rijs): for j in range( i + 1, self.n_img - 1 ): # check bound (taken from MATLAB mex) + ij = self._pairs_to_linear[i, j] + Rij = Rijs[ij] for k in range(j + 1, self.n_img): + ik = self._pairs_to_linear[i, k] + jk = self._pairs_to_linear[j, k] + Rik = Rijs[ik] + Rjk = Rijs[jk] + + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) + + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + alt_ij_jk = c[self._ALTS[0][best_i][0]] + if c[self._ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[self._ALTS[1][best_i][0]] + + alt_ik_jk = c[self._ALTS[0][best_i][1]] + if c[self._ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[self._ALTS[1][best_i][1]] + + alt_ij_ik = c[self._ALTS[0][best_i][2]] + if c[self._ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[self._ALTS[1][best_i][2]] # Compute scores - self._scores_inner_body(Rijs, c, s, i, j, k) + s[0] = 1 - np.sqrt(best_val / alt_ij_jk) # s_ij_jk + s[1] = 1 - np.sqrt(best_val / alt_ik_jk) # s_ik_jk + s[2] = 1 - np.sqrt(best_val / alt_ij_ik) # s_ij_ik # Update histogram # Find integer bin [0,self.hist_intervals) _l1, _l2, _l3 = np.minimum( (self.hist_intervals * s).astype(int), # implicit floor - self.hist_intervals - 1, - ) # clamp upper bound + self.hist_intervals-1) # clamp upper bound scores_hist[_l1] += 1 scores_hist[_l2] += 1 @@ -536,19 +523,46 @@ def _pairs_probabilities_host(self, Rijs, P2, A, a, B, b, x0): ln_f_arb = np.zeros(len(Rijs), dtype=Rijs.dtype) c = np.empty((4), dtype=Rijs.dtype) - s = np.empty((3), dtype=Rijs.dtype) for i in trange(self.n_img - 2, desc="Computing pair probabilities"): for j in range(i + 1, self.n_img - 1): ij = self._pairs_to_linear[i, j] + Rij = Rijs[ij] for k in range(j + 1, self.n_img): ik = self._pairs_to_linear[i, k] jk = self._pairs_to_linear[j, k] + Rik = Rijs[ik] + Rjk = Rijs[jk] - # Compute scores - self._scores_inner_body(Rijs, c, s, i, j, k) + # Compute conjugated rotats + Rij_J = J_conjugate(Rij) + Rik_J = J_conjugate(Rik) + Rjk_J = J_conjugate(Rjk) + + # Compute R muls and norms + c[0] = np.sum(((Rij @ Rjk) - Rik) ** 2) + c[1] = np.sum(((Rij_J @ Rjk) - Rik) ** 2) + c[2] = np.sum(((Rij @ Rjk_J) - Rik) ** 2) + c[3] = np.sum(((Rij @ Rjk) - Rik_J) ** 2) - # Unpack scores to local formula vars - s_ij_jk, s_ik_jk, s_ij_ik = s + # Find best match + best_i = np.argmin(c) + best_val = c[best_i] + + # For each triangle side, find the best alternative + alt_ij_jk = c[self._ALTS[0][best_i][0]] + if c[self._ALTS[1][best_i][0]] < alt_ij_jk: + alt_ij_jk = c[self._ALTS[1][best_i][0]] + alt_ik_jk = c[self._ALTS[0][best_i][1]] + if c[self._ALTS[1][best_i][1]] < alt_ik_jk: + alt_ik_jk = c[self._ALTS[1][best_i][1]] + alt_ij_ik = c[self._ALTS[0][best_i][2]] + if c[self._ALTS[1][best_i][2]] < alt_ij_ik: + alt_ij_ik = c[self._ALTS[1][best_i][2]] + + # Compute scores + s_ij_jk = 1 - np.sqrt(best_val / alt_ij_jk) + s_ik_jk = 1 - np.sqrt(best_val / alt_ik_jk) + s_ij_ik = 1 - np.sqrt(best_val / alt_ij_ik) # Update probabilities # # Probability of pair ij having score given indicicative common line From f094f03fbc63c6cc7be6cb1de2313c0d65e50002 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 28 Aug 2024 08:04:05 -0400 Subject: [PATCH 60/60] black style --- src/aspire/abinitio/commonline_sync3n.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index e3b1c50edc..3c40eb3ac5 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -435,7 +435,8 @@ def _triangle_scores_inner_host(self, Rijs): # Find integer bin [0,self.hist_intervals) _l1, _l2, _l3 = np.minimum( (self.hist_intervals * s).astype(int), # implicit floor - self.hist_intervals-1) # clamp upper bound + self.hist_intervals - 1, + ) # clamp upper bound scores_hist[_l1] += 1 scores_hist[_l2] += 1