# Making 3D slab projections for 2DTM results

This notebook holds the necessary code for making slab volumes with template structures placed at their identified orientations & locations.
These can be extended to much larger image regions (such plots are included as later figures), but this case focuses on the detections in Figure 1 subregions

In [1]:
import numpy as np
import pandas as pd
import mrcfile
import torch
import matplotlib.pyplot as plt
from scipy.ndimage import rotate
from scipy.spatial.transform import Rotation

# import napari
import torch_fourier_rescale

## Downloading, importing, and loading data

### Load in the simulated templates

Each of these volumes have been simulated and used to search a micrograph using 2DTM.
We load the MRC files into numpy arrays and additionally find their simulated pixel sizes.
These data will be used to downsample the volumes before placing them in the slab volumes.

In [2]:
template_60S_path = "/data/papers/Leopard-EM_paper_data/maps/60S_map_px0.936_bscale0.5.mrc"
template_40S_path = "/data/papers/Leopard-EM_paper_data/maps/SSU-body_map_px0.936_bscale0.5.mrc"
template_20S_path = "/shared/1ryp_2dtm/1ryp_sim.mrc"

In [3]:
# Load template files and print their shapes
with mrcfile.open(template_60S_path, mode="r") as f:
    template_60S = f.data.copy()
    px_60S = f.voxel_size.x.item()
    print(f"60S template shape: {template_60S.shape}")
    print(f"60S template voxel shape: {px_60S:.3f}")
    print()

with mrcfile.open(template_40S_path, mode="r") as f:
    template_40S = f.data.copy()
    px_40S = f.voxel_size.x.item()
    print(f"40S template shape: {template_40S.shape}")
    print(f"40S template voxel shape: {px_40S:.3f}")
    print()

with mrcfile.open(template_20S_path, mode="r") as f:
    template_20S = f.data.copy()
    px_20S = f.voxel_size.x.item()
    print(f"20S template shape: {template_20S.shape}")
    print(f"20S template voxel shape: {px_20S:.3f}")

60S template shape: (512, 512, 512)
60S template voxel shape: 0.936

40S template shape: (512, 512, 512)
40S template voxel shape: 0.936

20S template shape: (384, 384, 384)
20S template voxel shape: 1.060


### Load in the full micrographs

The 40S and 60S ribosome micrographs are the same while the 20S is a different image.
Again, these are MRC file which get loaded into numpy arrays.

In [4]:
img_60S_path = "/data/papers/Leopard-EM_paper_data/xe30kv/all_mgraphs/xenon_252_000_0.0_DWS.mrc"
img_40S_path = "/data/papers/Leopard-EM_paper_data/xe30kv/all_mgraphs/xenon_252_000_0.0_DWS.mrc"
img_proteasome_path = "150_Mar12_12.28.45_165_0.mrc"

In [5]:
# Load micrograph files and print their shapes
with mrcfile.open(img_60S_path, mode="r") as f:
    img_60S = f.data.copy()
    print(f"60S micrograph shape: {img_60S.shape}")
    print()

with mrcfile.open(img_40S_path, mode="r") as f:
    img_40S = f.data.copy()
    print(f"40S micrograph shape: {img_40S.shape}")
    print()

with mrcfile.open(img_proteasome_path, mode="r") as f:
    img_proteasome = f.data.copy()
    img_proteasome = img_proteasome[0]
    print(f"Proteasome micrograph shape: {img_proteasome.shape}")

60S micrograph shape: (4096, 4096)

40S micrograph shape: (4096, 4096)

Proteasome micrograph shape: (4092, 5760)


### Match template results files for each run

These are csv files parsed into a DataFrame.
We will use these later to get the orientations and depth (defocus) of each particle to plot the results of.

Also, we will filter results that are close to each other (within 10 pixels) to avoid selecting the same particle multiple times.

In [6]:
# # Match template results
# results_60S_df_path = "/data/papers/Leopard-EM_paper_data/xe30kv/results_match_tm_60S/xenon_252_000_0.0_DWS_results.csv"
# results_40S_df_path = "/data/papers/Leopard-EM_paper_data/xe30kv/results_match_tm_40S-body/xenon_252_000_0.0_DWS_results.csv"
# results_20S_df_path = "/shared/1ryp_2dtm/results_def/150_def_match_template_results.csv"

# Refine template results
results_60S_df_path = "/data/papers/Leopard-EM_paper_data/xe30kv/results_refine_tm_60S_2/xenon_252_000_0.0_DWS_refined_results.csv"
results_40S_df_path = "/home/mgiammar/git_repositories/tt2DTM/scratch_files/results_refine_tm_40S-body/xenon_252_000_0.0_DWS_refine_results.csv"
results_20S_df_path = "/shared/1ryp_2dtm/results_def/150_def_match_template_results.csv"

In [7]:
results_60S_df = pd.read_csv(results_60S_df_path)
results_40S_df = pd.read_csv(results_40S_df_path)
results_20S_df = pd.read_csv(results_20S_df_path)

In [8]:
def filter_close_results(df: pd.DataFrame, threshold: float = 10.0) -> pd.DataFrame:
    """Filters rows in a DataFrame based on proximity of 'pos_x' and 'pos_y' values.

    NOTE: This function give preference to rows with a higher 'scaled_mip' value
    when filtering out close results.

    Parameters:
    -----------
    df : pd.DataFrame
        DataFrame containing 'pos_x' and 'pos_y' columns.
    threshold : float
        Distance threshold for filtering. Rows with 'pos_x' and 'pos_y' values
        within this distance of each other will be filtered out.

    Returns:
    --------
    pd.DataFrame
        Filtered DataFrame containing only rows that are not close to each other
        based on the specified threshold.
    """
    filtered_df = df.copy()
    filtered_df = filtered_df.sort_values(by="scaled_mip", ascending=False)
    filtered_df = filtered_df.reset_index(drop=True)

    # List to store indices of rows to keep
    indices_to_keep = []

    for i, row in filtered_df.iterrows():
        # Check if this row is close to any already kept row
        is_close = False
        for kept_idx in indices_to_keep:
            kept_row = filtered_df.loc[kept_idx]
            if (
                abs(row["pos_x"] - kept_row["pos_x"]) < threshold
                and abs(row["pos_y"] - kept_row["pos_y"]) < threshold
            ):
                is_close = True
                break

        # If not close to any kept row, keep this one
        if not is_close:
            indices_to_keep.append(i)

    # Return filtered dataframe with only the kept indices
    return filtered_df.loc[indices_to_keep].sort_index()


results_60S_df = filter_close_results(results_60S_df, threshold=10)
results_40S_df = filter_close_results(results_40S_df, threshold=10)
results_20S_df = filter_close_results(results_20S_df, threshold=10)

## Helper functions for slab creation and visualization

### Creating slab with translations and rotations

Here, the slab is created to be the same size as the original micrograph if it were down-sampled.
This means the slab can be directly overlaid on the micrograph to visualize structure position

In [9]:
def create_empty_slab(
    image_shape: tuple[int, int],
    image_pixel_size: float,  # Angstroms
    slab_pixel_size: float,  # Angstroms
    slab_thickness: float,  # Angstroms
) -> np.ndarray:
    """Create an empty slab with the specified pixel size and thickness.

    Parameters
    ----------
    image_shape : tuple[int, int]
        Shape of the image (height, width).
    image_pixel_size : float
        Pixel size of the image, in Angstroms.
    slab_pixel_size : float
        Desired voxel size of the slab, in Angstroms (isotropic).
    slab_thickness : float
        Thickness of the slab, in Angstroms.
    """
    # Slab spans the same physical area as the image, but will have a different
    # voxel pitch.
    height = image_shape[0] * image_pixel_size
    width = image_shape[1] * image_pixel_size

    slab_height = int(height / slab_pixel_size)
    slab_width = int(width / slab_pixel_size)
    slab_depth = int(slab_thickness / slab_pixel_size)

    slab = np.zeros((slab_height, slab_width, slab_depth), dtype=np.float32)

    return slab

### Helper function to place a smaller volume into a larger volume

Note that this operates on integer coordinates (no sub-voxel accuracy), nor does this handle complex bounds checking.
It will just raise an error if the slab bounds are exceeded.

Also note that the rotation format is 'xyz' rather than the typical 'ZYZ' Euler angle format used by Leopard-EM

In [10]:
def place_into_larger_volume(
    small_volume: np.ndarray,
    large_volume: np.ndarray,
    position: tuple[int, int, int],
    orientation: tuple[float, float, float]
) -> np.ndarray:
    """Transform a small volume and place it into a larger volume.

    NOTE: The orientations are in ZYZ format, volumes are in ZYX format

    Parameters
    ----------
    small_volume : np.ndarray
        The small volume to be placed, shape (depth, height, width) in ZYX format.
    large_volume : np.ndarray
        The larger volume into which the small volume will be placed, shape
        (depth, height, width) in ZYX format.
    position : tuple[int, int, int]
        The (z, y, x) position in the larger volume where the small volume will be placed.
    orientation : tuple[float, float, float]
        The rotation angles (in degrees) for ZYZ Euler angles.
    
    Returns
    -------
    np.ndarray
        The larger volume with the small volume placed and rotated inside it.
    """
    # Calculate the position in the larger volume (only integer coordinates)
    z, y, x = position
    z_end = z + small_volume.shape[0]
    y_end = y + small_volume.shape[1]
    x_end = x + small_volume.shape[2]

    # Check if the small volume fits into the larger volume
    if (
        z < 0
        or y < 0
        or x < 0
        or z_end > large_volume.shape[0]
        or y_end > large_volume.shape[1]
        or x_end > large_volume.shape[2]
    ):
        print(f"Position: {position}")
        print(f"Small volume shape: {small_volume.shape}")
        print(f"Larger volume shape: {large_volume.shape}")
        raise ValueError("Volume is out of bounds!")

    # Sequentially rotate the small volume (ZYZ Euler angles)
    # For ZYX format: axes (0,1,2) correspond to (Z,Y,X)
    phi, theta, psi = orientation
    volume_rotated = rotate(small_volume, -phi, axes=(1, 2), reshape=False, order=2)  # Rotation around Z
    volume_rotated = rotate(volume_rotated, theta, axes=(0, 2), reshape=False, order=2)  # Rotation around Y
    volume_rotated = rotate(volume_rotated, -psi, axes=(0, 1), reshape=False, order=2)  # Rotation around Z

    # Place the rotated small volume into the larger volume
    large_volume[z:z_end, y:y_end, x:x_end] += volume_rotated

    return large_volume

### Helper function to render volume from results DataFrame

In [11]:
def construct_slab_from_results_df(
    template_volume: np.ndarray,
    results_df: pd.DataFrame,
    image_shape: tuple[int, int],
    image_pixel_size: float,
    slab_pixel_size: float,
    slab_thickness: float = None,  # Default None means thickness is inferred
    use_refined_columns: bool = True,
) -> np.ndarray:
    """Takes in a results_df DataFrame and constructs a slab for the volume.

    Parameters
    ----------
    template_volume : np.ndarray
        The template volume to be placed into the slab, shape (depth, height, width).
    results_df : pd.DataFrame
        DataFrame containing the results with columns 'pos_x', 'pos_y', 'relative_defocus',
        'phi', 'theta', and 'psi'.
    image_shape : tuple[int, int]
        Shape of the image (height, width) that the slab will cover.
    image_pixel_size : float
        Pixel size of the image, in Angstroms.
    slab_pixel_size : float
        Desired voxel size of the slab, in Angstroms (isotropic).
    slab_thickness : float, optional
        Thickness of the slab, in Angstroms. If None, it will be inferred from the
        defocus values in the results_df.
    use_refined_columns : bool, optional
        If True, then preference is given to refined columns in the results_df.
        If False, the original columns are used. Falls back to original columns
        if the refined columns are not present.

    Returns
    -------
    np.ndarray
        The constructed slab containing the placed template volumes, shape
        (slab_height, slab_width, slab_depth).
    """
    pos_x_col = "pos_x"
    pos_y_col = "pos_y"
    phi_col = "phi"
    theta_col = "theta"
    psi_col = "psi"
    relative_defocus_col = "relative_defocus"
    if use_refined_columns:
        if (
            "refined_pos_x" in results_df.columns
            and "refined_pos_y" in results_df.columns
            and "refined_phi" in results_df.columns
            and "refined_theta" in results_df.columns
            and "refined_psi" in results_df.columns
            and "refined_relative_defocus" in results_df.columns
        ):
            pos_x_col = "refined_pos_x"
            pos_y_col = "refined_pos_y"
            phi_col = "refined_phi"
            theta_col = "refined_theta"
            psi_col = "refined_psi"
            relative_defocus_col = "refined_relative_defocus"
        else:
            print("Refined columns not found, using original columns.")

    # Find minimum/maximum defocus values to set zero position
    min_defocus = results_df[relative_defocus_col].min()
    max_defocus = results_df[relative_defocus_col].max()

    # Use defocus range as slab thickness if not provided
    if slab_thickness is None:
        slab_thickness = max_defocus - min_defocus
        slab_thickness += (template_volume.shape[0] + 1) * slab_pixel_size

    slab = create_empty_slab(
        image_shape=image_shape,
        image_pixel_size=image_pixel_size,
        slab_pixel_size=slab_pixel_size,
        slab_thickness=slab_thickness,
    )

    for i, row in results_df.iterrows():
        print(f"Placing volume {i + 1} of {len(results_df)}")
        # Get full image position and convert to slab coordinates
        position = [
            row[pos_x_col] * image_pixel_size,
            row[pos_y_col] * image_pixel_size,
            float(row[relative_defocus_col]) - min_defocus,
        ]
        position = np.array(position)
        position = np.round(position / slab_pixel_size).astype(int)
        position = tuple(position.tolist())

        orientation = [row[phi_col], row[theta_col], row[psi_col]]

        slab = place_into_larger_volume(
            small_volume=template_volume,
            large_volume=slab,
            position=position,
            orientation=orientation,
        )

    # # Slab is in XYZ format, but MRC are generally defined in ZYX format.
    # slab = np.transpose(slab, (2, 0, 1))  # Change to ZYX format

    return slab

## Filtering results based on regions of interest

These regions are defined in the previous notebook for plotting the 2D images: [01_plot_2dtm_results.ipynb](01_plot_2dtm_results.ipynb).

In [12]:
# These are the regions in the results file for the 60S and 40S results
# NOTE: This definition is backwards based on the results file...

x1_region = (100, 780)
y1_region = (600, 1280)

In [13]:
# Filter pos_x and pos_y to be within the specified regions
results_60S_df_filtered = results_60S_df[
    (results_60S_df["pos_x"].between(*x1_region)) &
    (results_60S_df["pos_y"].between(*y1_region))
].copy()
results_40S_df_filtered = results_40S_df[
    (results_40S_df["pos_x"].between(*x1_region)) &
    (results_40S_df["pos_y"].between(*y1_region))
].copy()

# Translate the x and y positions to the new region
results_60S_df_filtered["pos_x"] -= x1_region[0]
results_60S_df_filtered["pos_y"] -= y1_region[0]
if "refined_pos_x" in results_60S_df_filtered.columns:
    results_60S_df_filtered["refined_pos_x"] -= x1_region[0]
    results_60S_df_filtered["refined_pos_y"] -= y1_region[0]

results_40S_df_filtered["pos_x"] -= x1_region[0]
results_40S_df_filtered["pos_y"] -= y1_region[0]
if "refined_pos_x" in results_40S_df_filtered.columns:
    results_40S_df_filtered["refined_pos_x"] -= x1_region[0]
    results_40S_df_filtered["refined_pos_y"] -= y1_region[0]

In [14]:
# This is the region (in the other micrograph) for the 20S results
x3_region = (3370, 3970)
y3_region = (1815, 2415)

In [15]:
# Translate the x and y positions to the new region
results_20S_df_filtered = results_20S_df.copy()
results_20S_df_filtered["pos_x"] -= x3_region[0]
results_20S_df_filtered["pos_y"] -= y3_region[0]
if "refined_pos_x" in results_20S_df_filtered.columns:
    results_20S_df_filtered["refined_pos_x"] -= x3_region[0]
    results_20S_df_filtered["refined_pos_y"] -= y3_region[0]

## Generating slab for 60S ribosome results

The results DataFrame is filtered to only include particles within a specified region of interest which corresponds to to the other notebook used to generate figure 1.

In [16]:
template_60S_rescaled, px_60S_rescaled = torch_fourier_rescale.fourier_rescale_3d(
    image=torch.from_numpy(template_60S),
    source_spacing=px_60S,
    target_spacing=5.0,
)
px_60S_rescaled = px_60S_rescaled[0].item()

print(f"Rescaled 60S template voxel shape: {px_60S_rescaled:.3f} Angstroms")

Rescaled 60S template voxel shape: 4.992 Angstroms


In [17]:
slab_60S = construct_slab_from_results_df(
    template_volume=template_60S_rescaled,
    results_df=results_60S_df_filtered,
    image_shape=(
        x1_region[1] - x1_region[0] + template_60S.shape[0],
        y1_region[1] - y1_region[0] + template_60S.shape[1],
    ),
    image_pixel_size=px_60S,
    slab_pixel_size=px_60S_rescaled,
    slab_thickness=1200.0,  # Angstroms
)
slab_60S.shape

Placing volume 14 of 7
Placing volume 33 of 7
Placing volume 41 of 7
Placing volume 62 of 7
Placing volume 85 of 7
Placing volume 91 of 7
Placing volume 97 of 7


(223, 223, 240)

In [18]:
# Save the slab to a MRC file
slab_60S_mrc_path = "slab_60S_refined.mrc"
with mrcfile.new(slab_60S_mrc_path, overwrite=True) as mrc:
    mrc.set_data(slab_60S.astype(np.float32))
    mrc.voxel_size = (px_60S_rescaled, px_60S_rescaled, px_60S_rescaled)
    mrc.update_header_from_data()
    print(f"Slab saved to {slab_60S_mrc_path}")

Slab saved to slab_60S_refined.mrc


## Generating slab volume for 40S

In [19]:
template_40S_rescaled, px_40S_rescaled = torch_fourier_rescale.fourier_rescale_3d(
    image=torch.from_numpy(template_40S),
    source_spacing=px_40S,
    target_spacing=5.0,
)
px_40S_rescaled = px_40S_rescaled[0].item()

print(f"Rescaled 40S template voxel shape: {px_40S_rescaled:.3f} Angstroms")

Rescaled 40S template voxel shape: 4.992 Angstroms


In [20]:
slab_40S = construct_slab_from_results_df(
    template_volume=template_40S_rescaled,
    results_df=results_40S_df_filtered,
    image_shape=(
        x1_region[1] - x1_region[0] + template_40S.shape[0],
        y1_region[1] - y1_region[0] + template_40S.shape[1],
    ),
    image_pixel_size=px_40S,
    slab_pixel_size=px_40S_rescaled,
    slab_thickness=1200.0,  # Angstroms
)
slab_40S.shape

Placing volume 4 of 4
Placing volume 10 of 4
Placing volume 12 of 4
Placing volume 21 of 4


(223, 223, 240)

In [21]:
# Save the slab to a MRC file
slab_40S_mrc_path = "slab_40S_refined.mrc"
with mrcfile.new(slab_40S_mrc_path, overwrite=True) as mrc:
    mrc.set_data(slab_40S.astype(np.float32))
    mrc.voxel_size = (px_40S_rescaled, px_40S_rescaled, px_40S_rescaled)
    mrc.update_header_from_data()
    print(f"Slab saved to {slab_40S_mrc_path}")

Slab saved to slab_40S_refined.mrc


## Generating Slab for 20S Proteasome

In [22]:
template_20S_rescaled, px_20S_rescaled = torch_fourier_rescale.fourier_rescale_3d(
    image=torch.from_numpy(template_20S),
    source_spacing=px_20S,
    target_spacing=5.0,
)
px_20S_rescaled = px_20S_rescaled[0].item()

print(f"Rescaled 20S template voxel shape: {px_20S_rescaled:.3f} Angstroms")

Rescaled 20S template voxel shape: 4.964 Angstroms


In [23]:
slab_20S = construct_slab_from_results_df(
    template_volume=template_20S_rescaled,
    results_df=results_20S_df_filtered,
    image_shape=(
        x3_region[1] - x3_region[0] + template_20S.shape[0],
        y3_region[1] - y3_region[0] + template_20S.shape[1],
    ),
    image_pixel_size=px_20S,
    slab_pixel_size=px_20S_rescaled,
    # slab_thickness=1200.0,  # Angstroms
)

Refined columns not found, using original columns.
Placing volume 1 of 1


In [24]:
# Save the slab to a MRC file
slab_20S_mrc_path = "slab_20S_refined.mrc"
with mrcfile.new(slab_20S_mrc_path, overwrite=True) as mrc:
    mrc.set_data(slab_20S.astype(np.float32))
    mrc.voxel_size = (px_20S_rescaled, px_20S_rescaled, px_20S_rescaled)
    mrc.update_header_from_data()
    print(f"Slab saved to {slab_20S_mrc_path}")

Slab saved to slab_20S_refined.mrc
