From 8ca54f15955245e58fb4ff082d7b12604e476246 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 1 Oct 2024 12:04:31 -0400 Subject: [PATCH 1/5] typo in B0 formula --- src/aspire/abinitio/commonline_sync3n.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index db1dc69c12..7ac94b1eb2 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -714,7 +714,7 @@ def _triangle_scores( * (a + 1) ) # normalization of 2nd component: B = P*N_delta/sum(f), where f is the component formula - B0 = P ** (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) / np.sum( + B0 = P * (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) / np.sum( ((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x)) ) start_values = np.array([B0, P, b, x0], dtype=np.float64) From 5837e4a3fe1fb2163c87e15e680560102ae7c9ff Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 1 Oct 2024 12:09:04 -0400 Subject: [PATCH 2/5] log diagnotics --- src/aspire/abinitio/commonline_sync3n.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 7ac94b1eb2..3c36f28339 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -714,8 +714,10 @@ def _triangle_scores( * (a + 1) ) # normalization of 2nd component: B = P*N_delta/sum(f), where f is the component formula - B0 = P * (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) / np.sum( - ((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x)) + B0 = ( + P + * (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) + / np.sum(((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x))) ) start_values = np.array([B0, P, b, x0], dtype=np.float64) lower_bounds = np.array([0, Pmin**3, 2, 0], dtype=np.float64) @@ -742,6 +744,8 @@ def fun(x, B, P, b, x0, A=A, a=a): P = P ** (1 / 3) sigma = (1 - x0) / peak2sigma + logger.info(f"Estimated CL Errors P,STD:\t{100*P}%\t{sigma}") + # Initialize probability computations # Local histograms analysis A = a + 1 # distribution 1st component normalization factor @@ -768,6 +772,10 @@ def fun(x, B, P, b, x0, A=A, a=a): ) Pij = np.nan_to_num(Pij) + logger.info( + f"Common lines probabilities to be indicative Pij={100*np.mean(Pij)}%" + ) + return P, sigma, Pij, scores_hist ########################################### From b8d4b8911434dd5cfce6407e719efa9c9a76cffe Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Tue, 1 Oct 2024 16:25:24 -0400 Subject: [PATCH 3/5] sanity check curve_fit start values better diagnostics --- src/aspire/abinitio/commonline_sync3n.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 3c36f28339..7d1bd79cbc 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -719,10 +719,18 @@ def _triangle_scores( * (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) / np.sum(((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x))) ) - start_values = np.array([B0, P, b, x0], dtype=np.float64) + # P must be in lower and upper bounds or `curve_fit` will error + # This was not the case for MATLAB... + P0 = np.clip(P, Pmin**3, Pmax**3) + start_values = np.array([B0, P0, b, x0], dtype=np.float64) lower_bounds = np.array([0, Pmin**3, 2, 0], dtype=np.float64) upper_bounds = np.array([np.inf, Pmax**3, np.inf, 1], dtype=np.float64) + with np.printoptions(precision=2): + logger.info(f"curve_fit lower_bounds:{lower_bounds}") + logger.info(f"curve_fit start_values:{start_values}") + logger.info(f"curve_fit upper_bounds:{upper_bounds}") + # Fit distribution def fun(x, B, P, b, x0, A=A, a=a): """Function to fit. x is data vector.""" @@ -744,7 +752,7 @@ def fun(x, B, P, b, x0, A=A, a=a): P = P ** (1 / 3) sigma = (1 - x0) / peak2sigma - logger.info(f"Estimated CL Errors P,STD:\t{100*P}%\t{sigma}") + logger.info(f"Estimated CL Errors P,STD:\t{100*P:.2f}%\t{sigma:.2f}") # Initialize probability computations # Local histograms analysis @@ -773,7 +781,7 @@ def fun(x, B, P, b, x0, A=A, a=a): Pij = np.nan_to_num(Pij) logger.info( - f"Common lines probabilities to be indicative Pij={100*np.mean(Pij)}%" + f"Common lines probabilities to be indicative Pij={100*np.mean(Pij):.2f}%" ) return P, sigma, Pij, scores_hist From b10cfce72ef2f7a2054821df4872b0802e3755f2 Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 2 Oct 2024 10:13:03 -0400 Subject: [PATCH 4/5] For now use default initial soln for Swt curve fitting --- src/aspire/abinitio/commonline_sync3n.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 7d1bd79cbc..434f638e62 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -722,7 +722,10 @@ def _triangle_scores( # P must be in lower and upper bounds or `curve_fit` will error # This was not the case for MATLAB... P0 = np.clip(P, Pmin**3, Pmax**3) - start_values = np.array([B0, P0, b, x0], dtype=np.float64) + # Note, MATLAB suggests the following, but I feel it is a bug. + # Will discuss with Yoel about the original code's intent. + # np.array([B0, P0, b, x0], dtype=np.float64) + start_values = None lower_bounds = np.array([0, Pmin**3, 2, 0], dtype=np.float64) upper_bounds = np.array([np.inf, Pmax**3, np.inf, 1], dtype=np.float64) From 1b556a0603f3e311cc9eaa66cc16be8d2d44729e Mon Sep 17 00:00:00 2001 From: Garrett Wright Date: Wed, 2 Oct 2024 10:14:19 -0400 Subject: [PATCH 5/5] leave matlab based code as comment for now --- src/aspire/abinitio/commonline_sync3n.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/aspire/abinitio/commonline_sync3n.py b/src/aspire/abinitio/commonline_sync3n.py index 434f638e62..6a7e5390d3 100644 --- a/src/aspire/abinitio/commonline_sync3n.py +++ b/src/aspire/abinitio/commonline_sync3n.py @@ -714,14 +714,14 @@ def _triangle_scores( * (a + 1) ) # normalization of 2nd component: B = P*N_delta/sum(f), where f is the component formula - B0 = ( - P - * (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) - / np.sum(((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x))) - ) + # B0 = ( + # P + # * (self.n_img * (self.n_img - 1) * (self.n_img - 2) / 2) + # / np.sum(((1 - hist_x) ** b) * np.exp(-b / (1 - x0) * (1 - hist_x))) + # ) # P must be in lower and upper bounds or `curve_fit` will error # This was not the case for MATLAB... - P0 = np.clip(P, Pmin**3, Pmax**3) + # P0 = np.clip(P, Pmin**3, Pmax**3) # Note, MATLAB suggests the following, but I feel it is a bug. # Will discuss with Yoel about the original code's intent. # np.array([B0, P0, b, x0], dtype=np.float64)