# Examining the Ground Truth LBM Dataset

Input: mh89_hemisphere_00001.tif -> mh89_hemisphere_00010.tif
- [25320, 5104, 145]
- [Zt, y, x]

## 1) Pre-Processing / Motion Correction

Output: Fig2_dataset_plane_n.mat, where n = 1:30, containing the following fields:

- Y: motion-corrected single plane imaging data [nx,ny,nt]
- pixelResolution: pixel sampling [um]
- sizY: size of y [nx,ny,nt]
- volumeRate: volume rate [Hz]

## 2) Segmentation

Output: Fig2_collated_caiman_output.mat, containing the following fields:

- T_all: raw neuronal traces [K,nt]
- nx: neuronal coordinate in the x direction [K,1], [um]
- ny: neuronal coordinate in the y direction [K,1], [um]
- nz: neuronal coordinate in the z direction [K,1], [um]

In [99]:
import os
import sys
import copy
import time
import cv2
import datetime
import glob
import h5py
import json
import logging
import matplotlib.pyplot as plt
import numpy as np
import scipy.signal
import skimage
import dask.array as da
import napari
import cv2
from pprint import pprint
from skimage.io import imread
import tifffile
from pathlib import Path

@argumentToString.register(np.ndarray)
def _(obj):
    """Format our return for the icecream debug statement."""
    return f"ndarray, shape={obj.shape}, dtype={obj.dtype}"

def init_params():
    """
    Initializes and returns a dictionary containing parameters for preprocessing, reconstruction,
    visualization, and saving of imaging data.

    Returns
    -------
    params : dict
        A dictionary with the following keys and default values:
        - debug (bool): Enable debug messages.
        - chans_order_{n}planes (np.array): Channel or plane reordering array to arrange data by tissue depth for n planes.
        - save_output (bool): If True, saves each plane in a separate folder; otherwise, saves the full volume.
        - raw_data_dirs (list): List of strings specifying the absolute paths to folders containing data.
        - fname_must_contain (str): Filenames must contain this string to be included in analysis.
        - fname_must_NOT_contain (str): Filenames must not contain this string to be included in analysis.
        - make_template_seams_and_plane_alignment (bool): Flag to indicate whether to start reconstruction.
        - reconstruct_all_files (bool): If True, iterate over all files; otherwise, use 'reconstruct_until_this_ifile'.
        - reconstruct_until_this_ifile (int): Number of files to process in each directory when 'reconstruct_all_files' is False.
        - list_files_for_template (list): Indices of files to use for creating a template.
        - seams_overlap (str or int or list): Strategy for calculating overlap. If "calculate", dynamically determine the optimal overlap; if int, use as fixed overlap; if list, specify overlap for each plane.
        - save_as_volume_or_planes (str): Specifies saving mode, either as "volume" or "planes".
        - concatenate_all_h5_to_tif (bool): If True, concatenate all .h5 files into a single .tif file.
        - n_ignored_pixels_sides (int): Number of pixels to ignore on each side of the MROI for overlap calculation.
        - min_seam_overlap (int): Minimum seam overlap in pixels for dynamic overlap calculation.
        - max_seam_overlap (int): Maximum seam overlap in pixels for dynamic overlap calculation.
        - alignment_plot_checks (bool): If True, generate plots to check alignment during processing.
        - gaps_columns (int), gaps_rows (int): Gap sizes in pixels for visualization.
        - intensity_percentiles (list): Percentiles for intensity scaling in visualization.
        - meanf_png_only_first_file (bool), video_only_first_file (bool): Flags to limit certain outputs to the first file processed.
        - video_play_speed (int), rolling_average_frames (int), video_duration_secs (int): Parameters for video visualization.
        - lateral_align_planes (bool): If True, perform lateral alignment across planes.
        - make_nonan_volume (bool): If True, ensure the volume does not contain NaNs by trimming or padding.
        - add_1000_for_nonegative_volume (bool): If True, add 1000 to pixel values to ensure non-negative volumes.
        - output_dir (Path): The directory where output files should be saved.
        - json_logging (bool): Enable JSON format for logging debug and process information.

    Notes
    -----
    This function should be modified to include any additional parameters required by the imaging processing
    and analysis pipeline. Users are encouraged to adjust the default values according to their specific needs.

    TODO: Implement checks with clear warnings or errors for parameter inconsistencies.
          Detect the number of planes based on file metadata instead of relying on filename conventions.
    """
    params = {
        "debug": True,
        "chans_order_1plane": np.array([0]),
        "chans_order_15planes": np.arange(1, 16) - 1,
        "chans_order_30planes": np.arange(1, 31) - 1,
        "raw_data_dirs": ["/v-data4/foconnell/data/lbm/raw"],
        "output_dir": "preprocessed_4",
        "fname_must_contain": "",
        "fname_must_NOT_contain": "",
        "make_template_seams_and_plane_alignment": True,
        "reconstruct_all_files": True,
        "list_files_for_template": [0],
        "seams_overlap": "calculate",
        "save_output": True,
        "save_as_volume_or_planes": "planes",
        "concatenate_all_h5_to_tif": False,
        "n_ignored_pixels_sides": 5,
        "min_seam_overlap": 5,
        "max_seam_overlap": 20,
        "alignment_plot_checks": False,
        "gaps_columns": 5,
        "gaps_rows": 5,
        "intensity_percentiles": [15, 99.5],
        "save_mp4": True,
        "save_meanf_png": True,
        "meanf_png_only_first_file": True,
        "video_only_first_file": True,
        "video_play_speed": 1,
        "rolling_average_frames": 1,
        "video_duration_secs": 20,
        "lateral_align_planes": False,
        "make_nonan_volume": True,
        "add_1000_for_nonegative_volume": True,
        "json_logging": False,
    }

    return params

params = init_params()


### Explore the ScanImage metadata


In [100]:
with open(raw_fnames[0], 'rb') as fh:

    metadata = tifffile.read_scanimage_metadata(fh)
    static_metadata = metadata[0]
    frame_metadata = metadata[1]['RoiGroups']['imagingRoiGroup']['rois']

rois = [x['scanfields'] for x in frame_metadata]

print(f'Number of roi: {len(rois)}')

Number of roi: 5


### Pull out some metadata needed for restitching

In [98]:
centerXY = first['centerXY']
sizeXY = first['sizeXY']
pixel_resolution_xy = first['pixelResolutionXY']

num_frames = static_metadata['SI.hStackManager.framesPerSlice']
num_planes = len(static_metadata['SI.hChannels.channelsActive'])
frame_rate = static_metadata['SI.hRoiManager.scanVolumeRate']  # scanVolumeRate/scanFrameRate are the same now, but may not always be 
objective_resolution = static_metadata['SI.objectiveResolution']  # 157.5

# Explore:
lines_per_frame = static_metadata['SI.hRoiManager.linesPerFrame']
print(f'{lines_per_frame} lines/frame')
px_per_line = static_metadata['SI.hRoiManager.pixelsPerLine']
print(f'{px_per_line} pixels/line')
print(f'The difference gives {lines_per_frame - px_per_line} extra px on each strip')
print('---')

scan_frame_rate = static_metadata['SI.hRoiManager.scanFrameRate']
print(f'Volume/frame rate: {scan_volume_rate} / {scan_frame_rate} Hz')
scan_zoom_factor = static_metadata['SI.hRoiManager.scanZoomFactor']
frames_per_tiff = static_metadata['SI.hScan2D.logFramesPerFile']
print(f'{frames_per_tiff} frames/tiff')

fov = np.multiply(objective_resolution, sizeXY).astype(int)
print(f'objective resolution: {objective_resolution} micron/angle')
print(f'sizeXY: {sizeXY} (angle, angle)')
print(f'FOV: {fov[0]} um x {fov[1]} um for each strip')
print(f'pixelResolutionXY: { pixel_resolution_xy } px, which is really the number of pixels, though called "resolution"')


# is fov micron/angle and xy to be angle, angle? 
# obj res micron/angle, angle 

144 lines/frame
128 pixels/line
The difference gives 16 extra px on each strip
---
Volume/frame rate: 4.68692 / 4.68692 Hz
844 frames/tiff
objective resolution: 157.5 micron/angle
sizeXY: [3.80952381, 31.74603175] (angle, angle)
FOV: 600 um x 5000 um for each strip
pixelResolutionXY: [144, 1000] px, which is really the number of pixels, though called "resolution"


In [88]:
datapath = Path('/data2/fpo/lbm/')
caiman_path = Path('/data2/fpo/lbm/output/')
caiman_files = [x for x in caiman_path.glob('*.mat') if 'caiman_output_plane' in x.name]
raw_tiff = [x for x in datapath.glob('*.tif')]
mat_files = [x for x in datapath.glob('*.mat')]
mc_files = [x for x in mat_files if 'plane' in x.name]

caiman_files


[PosixPath('/data2/fpo/lbm/output/caiman_output_plane_16.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_17.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_6.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_10.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_9.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_11.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_20.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_4.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_14.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_19.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_15.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_23.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_26.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_8.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_output_plane_1.mat'),
 PosixPath('/data2/fpo/lbm/output/caiman_outp

In [49]:
arr = tifffile.imread("/data2/fpo/lbm/*.tif", aszarr=True)
arr = dask.array.from_zarr(arr)

arr.shape

<tifffile.TiffFile 'mh89_hemisphere…_00001_00008.tif'> asarray failed to reshape (25320, 5104, 145) to (8440, 30, 5104, 145)


(10, 25320, 5104, 145)

In [48]:
print(f'Data Array size: {arr.nbytes / 1e9} GB')
arr

Data Array size: 374.776512 GB


Unnamed: 0,Array,Chunk
Bytes,349.04 GiB,34.90 GiB
Shape,"(10, 25320, 5104, 145)","(1, 25320, 5104, 145)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int16 numpy.ndarray,int16 numpy.ndarray
"Array Chunk Bytes 349.04 GiB 34.90 GiB Shape (10, 25320, 5104, 145) (1, 25320, 5104, 145) Dask graph 10 chunks in 2 graph layers Data type int16 numpy.ndarray",10  1  145  5104  25320,

Unnamed: 0,Array,Chunk
Bytes,349.04 GiB,34.90 GiB
Shape,"(10, 25320, 5104, 145)","(1, 25320, 5104, 145)"
Dask graph,10 chunks in 2 graph layers,10 chunks in 2 graph layers
Data type,int16 numpy.ndarray,int16 numpy.ndarray
