# Examining the Ground Truth LBM Dataset

Input: mh89_hemisphere_00001.tif -> mh89_hemisphere_00010.tif
- [25320, 5104, 145] 
- [Zt, y, x]

## 1) Pre-Processing / Motion Correction

Output: Fig2_dataset_plane_n.mat, where n = 1:30, containing the following fields:

- Y: motion-corrected single plane imaging data [nx,ny,nt]
- pixelResolution: pixel sampling [um]
- sizY: size of y [nx,ny,nt]
- volumeRate: volume rate [Hz]

## 2) Segmentation

Output: Fig2_collated_caiman_output.mat, containing the following fields:

- T_all: raw neuronal traces [K,nt]
- nx: neuronal coordinate in the x direction [K,1], [um]
- ny: neuronal coordinate in the y direction [K,1], [um]
- nz: neuronal coordinate in the z direction [K,1], [um]

In [7]:
import time
import json
from pathlib import Path
import numpy as np
import tifffile
import scipy.io
import dask
import napari
from pprint import pprint
from skimage.io import imread
from dask import delayed

try:
    from icecream import ic, install, argumentToString
    install()
except ImportError:  # graceful fallback if icecream isn't installed.
    ic = lambda *a: None if not a else (a[0] if len(a) == 1 else a)  # noqa

install()

In [2]:
lazy_imread = delayed(tifffile.imread)

reader = lazy_imread('/data2/fpo/lbm/*.tif')  # doesn't actually read the file

In [None]:
# tifffile.imread(fname, as_zarr=True) -> Dask
arr = da.imread("/data2/fpo/lbm/*.tif", chunks=(1, 25320, 5104, 145))
arr

do_metadata = False

if do_metadata:
    json_filename = "/v-data4/foconnell/data.json"
    try:
        with open(json_filename, 'w', encoding='utf-8') as f:
            f.write(serialized_metadata)
    except Exception as e:
        print(f"An error occured: {e}")

In [None]:
arr = arr.rechunk(1, 30, 5104, 145)

IOStream.flush timed out


In [30]:
import matplotlib.pylab as plt
figname = "/v-data4/foconnell/figA.png"

print(arr.shape)
#slice = arr[1, 1, :1021, :].compute()

(10, 25320, 5104, 145)


In [None]:
fig, ax = plt.subplots(nrows=1, ncols=1)
ax.imshow(arr[1, 1, :, :])
#ax[0,1].imshow(tiled_astronaut_images[1])
#ax[1,0].imshow(tiled_astronaut_images[2])
#ax[1,1].imshow(tiled_astronaut_images[3])
#ax.imshow(arr.compute(), cmap="gray")
plt.gcf()
#plt.savefig(figname)
plt.show()
plt.close()

# Metadata

ScanImage metadata via tags and reader.


In [None]:
with tifffile.TiffFile(p[0]) as tif:
    metadata = {}
    for tag in tif.pages[0].tags.values():
        tag_name, tag_value = tag.name, tag.value
        metadata[tag_name] = tag_value
        
metadims = {}
for k, v in metadata.items():
    if v in ['145', '144', 145, 144, 5104, '5104']:
        metadims[k] = v
        
pprint(metadims)

In [None]:
chans_order = np.array([ 1,  5,  6,  7,  8,  9,  2, 10, 11, 12, 13, 14, 15, 16, 17,
                            3, 18, 19, 20, 21, 22, 23,  4, 24, 25, 26, 27, 28, 29, 30]) - 1
n_planes = 30

In [None]:
r_rarr = 

In [None]:
import time
import h5py

print(data.shape)

# Define chunk sizes to test
chunk_sizes = [32 * 1024, 64 * 1024, 128 * 1024, 256 * 1024, 512 * 1024]  # in bytes

# Metrics to collect
creation_times = []
file_sizes = []
sequential_read_times = []
random_read_times = []

for chunk_size in chunk_sizes:
    # Calculate chunk dimensions
    chunk_dims = (chunk_size // data.dtype.itemsize // data.shape[1], data.shape[1])
    
    # Create HDF5 file with specified chunk size and compression
    h5_file_path = f'temp_{chunk_size}.h5'
    start_time = time.time()
    with h5py.File(h5_file_path, 'w') as f:
        dset = f.create_dataset('data', data=data, compression='gzip', chunks=chunk_dims)
    creation_times.append(time.time() - start_time)
    
    # Measure file size
    file_sizes.append(os.path.getsize(h5_file_path) / (1024 * 1024))  # Convert to MB
    
    # Sequential read
    start_time = time.time()
    with h5py.File(h5_file_path, 'r') as f:
        _ = f['data'][:]
    sequential_read_times.append(time.time() - start_time)
    
    # Random read
    start_time = time.time()
    with h5py.File(h5_file_path, 'r') as f:
        for _ in range(100):
            index = np.random.randint(0, data.shape[0])
            _ = f['data'][index, :]
    random_read_times.append(time.time() - start_time)
    
    # Cleanup
    os.remove(h5_file_path)

# Plotting results
fig, axs = plt.subplots(2, 2, figsize=(15, 10))
axs[0, 0].plot(chunk_sizes, creation_times, marker='o')
axs[0, 0].set_title('Creation Time vs. Chunk Size')
axs[0, 0].set_xlabel('Chunk Size (bytes)')
axs[0, 0].set_ylabel('Time (s)')

axs[0, 1].plot(chunk_sizes, file_sizes, marker='o')
axs[0, 1].set_title('File Size vs. Chunk Size')
axs[0, 1].set_xlabel('Chunk Size (bytes)')
axs[0, 1].set_ylabel('Size (MB)')

axs[1, 0].plot(chunk_sizes, sequential_read_times, marker='o')
axs[1, 0].set_title('Sequential Read Time vs. Chunk Size')
axs[1, 0].set_xlabel('Chunk Size (bytes)')
axs[1, 0].set_ylabel('Time (s)')

axs[1, 1].plot(chunk_sizes, random_read_times, marker='o')
axs[1, 1].set_title('Random Read Time vs. Chunk Size')
axs[1, 1].set_xlabel('Chunk Size (bytes)')
axs[1, 1].set_ylabel('Time (s)')

plt.tight_layout()
plt.show()