Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 47 additions & 0 deletions gallery/experiments/legacy_preprocessing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
"""
Legacy Preprocessing Pipeline
=============================

This notebook demonstrates reproducing preprocessing results from the
legacy ASPIRE MATLAB workflow system for the EMPIAR 10028 ribosome
dataset.

https://www.ebi.ac.uk/empiar/EMPIAR-10028
"""

# %%
# Source data and Preprocessing
# -----------------------------
#
# Load the data and apply preprocessing steps corresponding to the MATLAB prompts:
#
# >>> Phaseflip projections? (Y/N)? [Y] Y
# >>> Enter pixel size in Angstrom (-1 to read from STAR file): [-1.000000]
# >>> Number of projections to read? [105247]
# >>> Crop? (Y/N)? [Y] Y
# >>> Crop to size? [360]359
# >>> Downsample? (Y/N)? [Y] Y
# >>> Downsample to size? [360]179
# >>> Normalize background of images to variance 1? (Y/N)? [Y]
# >>> Prewhiten? (Y/N)? [Y]
# >>> Split data into groups? (Y/N)? [Y] N

from aspire.source import RelionSource

# Inputs
# Note the published ``shiny_2sets.star`` requires removal of a stray '9' character on line 5476.
starfile_in = "10028/data/shiny_2sets_fixed9.star"
# Caching, while not required, will increase speed in exchange for potentially increased memory usage.
src = RelionSource(starfile_in).cache()

# Define preprocessing steps.
src = src.phase_flip().cache()
src = src.crop_pad(359).cache()
src = src.legacy_downsample(179).cache()
src = src.legacy_normalize_background().cache()
src = src.legacy_whiten().cache()
src = src.invert_contrast().cache()

# Save the preprocessed images.
# `save_mode=single` will save a STAR file and single mrcs holding the image stack.
src.save("10028_legacy_preprocessed_179px.star", save_mode="single", overwrite=True)
28 changes: 14 additions & 14 deletions src/aspire/noise/noise.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,9 @@ class LegacyNoiseEstimator(NoiseEstimator):
Isotropic noise estimator ported from MATLAB `cryo_noise_estimation`.
"""

def __init__(self, src, bg_radius=None, max_d=None, batch_size=512):
def __init__(
self, src, bg_radius=None, max_d=None, batch_size=512, normalize_psd=False
):
"""
Given an `ImageSource`, prepares for estimation of noise spectrum.

Expand All @@ -375,6 +377,8 @@ def __init__(self, src, bg_radius=None, max_d=None, batch_size=512):
:param max_d: Max computed correlation distance as a proportion of `src.L`.
Default of `None` uses `np.floor(src.L/3) / L`.
:param batch_size: The size of the batches in which to compute the variance estimate.
:param normalize_psd: Optionally normalize PSD in way that reproduces MATLAB intermediates.
Only useful if utilizing the `PSD` for applications outside of built-in legacy whitening.
"""

if bg_radius is None:
Expand All @@ -386,6 +390,8 @@ def __init__(self, src, bg_radius=None, max_d=None, batch_size=512):
if self.max_d is None:
self.max_d = np.floor(src.L / 3) / src.L

self.normalize_psd = bool(normalize_psd)

def estimate(self):
"""
:return: The estimated noise variance of the images.
Expand Down Expand Up @@ -414,6 +420,7 @@ def _estimate_noise_psd(self):
samples_idx,
max_d_pixels,
batch_size=self.batch_size,
normalize_psd=self.normalize_psd,
)[0]

return psd
Expand Down Expand Up @@ -552,7 +559,7 @@ def _estimate_power_spectrum_distribution_2d(
Default of `None` yields `np.floor(L / 3)`.
:param batch_size: The size of the batches in which to compute the variance estimate.
:normalize_psd: Optionally normalize returned PSD.
Disabled by default because it will typiccally be
Disabled by default because it will typically be
renormalized later in preperation for the convolution
application in `Image.legacy_whiten`.
Enable to reproduce legacy PSD.
Expand All @@ -567,9 +574,6 @@ def _estimate_power_spectrum_distribution_2d(
L = samples_idx.shape[-1]
batch_size = min(batch_size, n_img)

# Migrate mask to GPU as needed
_samples_idx = xp.asarray(samples_idx)

# Correlations more than `max_d` pixels apart are not computed.
if max_d is None:
max_d = np.floor(L / 3)
Expand Down Expand Up @@ -612,25 +616,21 @@ def _estimate_power_spectrum_distribution_2d(
# to estimate it.

E = 0.0 # Total energy of the noise samples used to estimate the power spectrum.
samples = xp.zeros((batch_size, L, L), dtype=np.float64)
n_samples_per_img = int(np.count_nonzero(samples_idx))
samples = xp.zeros((batch_size, n_samples_per_img), dtype=np.float64)
for start in trange(
0, n_img, batch_size, desc="Estimating image noise energy"
):
end = min(n_img, start + batch_size)
cnt = end - start

samples[:cnt, _samples_idx] = images[start:end].asnumpy()[0][
samples_idx
]
samples[:cnt] = xp.asarray(images[start:end].asnumpy()[:, samples_idx])
E += xp.sum(
(
samples[:cnt]
- xp.mean(samples[:cnt], axis=(1, 2)).reshape(cnt, 1, 1)
)
(samples[:cnt] - xp.mean(samples[:cnt], axis=-1).reshape(cnt, 1))
** 2
)

# Mean energy of the noise samples
n_samples_per_img = xp.count_nonzero(_samples_idx)
meanE = E / (n_samples_per_img * n_img)

# Normalize P2 such that its mean energy is preserved and is equal to
Expand Down
Loading