In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from aicsimageio import AICSImage
import time
import zarr

In [2]:
input_file_path = '/Users/apple/Desktop/Akamatsu_Lab/Data/Channel1_full.tif'

# Load the file
img = AICSImage(input_file_path)

In [12]:
file_path = "/Users/apple/Desktop/Akamatsu_Lab/Data/full_movie_output/track_df_c3_cleaned.pkl"
track_df = pd.read_pickle(file_path)

In [3]:
# Check which reader is being used
print(type(img.reader))

<class 'aicsimageio.readers.tiff_reader.TiffReader'>


In [4]:
dask_array = img.xarray_dask_data 

    >>> with dask.config.set(**{'array.slicing.split_large_chunks': False}):
    ...     array.reshape(shape)

To avoid creating the large chunks, set the option
    >>> with dask.config.set(**{'array.slicing.split_large_chunks': True}):
    >>> array.reshape(shape, limit='128 MiB')
  dask_array = img.xarray_dask_data


In [5]:
dask_array

Unnamed: 0,Array,Chunk
Bytes,19.60 GiB,154.36 MiB
Shape,"(130, 1, 116, 2052, 340)","(1, 1, 116, 2052, 340)"
Dask graph,130 chunks in 394 graph layers,130 chunks in 394 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray
"Array Chunk Bytes 19.60 GiB 154.36 MiB Shape (130, 1, 116, 2052, 340) (1, 1, 116, 2052, 340) Dask graph 130 chunks in 394 graph layers Data type uint16 numpy.ndarray",1  130  340  2052  116,

Unnamed: 0,Array,Chunk
Bytes,19.60 GiB,154.36 MiB
Shape,"(130, 1, 116, 2052, 340)","(1, 1, 116, 2052, 340)"
Dask graph,130 chunks in 394 graph layers,130 chunks in 394 graph layers
Data type,uint16 numpy.ndarray,uint16 numpy.ndarray


In [6]:
metadata= img.metadata

In [7]:
metadata

'ImageJ=1.54f\nimages=15080\nslices=116\nframes=130\nhyperstack=true\nunit=micron\nfinterval=2.320209302325581\nspacing=0.14499219272808386\nloop=false\nmin=95.0\nmax=268.0'

In [8]:
dask_array.attrs = []

In [9]:
dask_array.to_zarr(store = '/Users/apple/Desktop/Akamatsu_Lab/Data/zarr_data', mode = 'w', compute = True)  

<xarray.backends.zarr.ZarrStore at 0x7f78dd5a0640>

In [6]:
z2 = zarr.open('/Users/apple/Desktop/Akamatsu_Lab/Data/zarr_data/transpose-d124a28f31ae405a0278047b912cb6bd', mode='r')

In [7]:
z2.info

0,1
Type,zarr.core.Array
Data type,uint16
Shape,"(130, 1, 116, 2052, 340)"
Chunk shape,"(1, 1, 116, 2052, 340)"
Order,C
Read-only,True
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,21042028800 (19.6G)
No. bytes stored,4566338052 (4.3G)


In [9]:
x = z2[0]
x_new = x[0,:,:,:]
x_new.shape

(116, 2052, 340)

In [15]:
#Variable radii(sigma values) version for handling bigger files which do not fit in memory 
def extract_pixels_data_variable_bd(raw_image, mean_col_names: list, dataframe: pd.DataFrame
                                 ,radi_col_names: list, frame_col_name: str, offset: list = [0,0]):
    '''
    The function above uses a volume of pixels from one channel and extracts data for the same pixels from the 
    other channel. It uses a variable radii and ignores all the zero pixel values which exist while doing calculations. 

    **Inputs**: 
    1. raw_image: this raw image is the img object from AICSImageio tool. Its not imported into memory and will use dask_arrays to work
    for a single time frame at a time
    2. mean_col_names: type(list), this is a list of strings. This contains the names of the columns which contain the center
    coordinates. It must be passed in the form [z,y,x]
    3. dataframe: type(dataframe), this is the main dataframe returned from tracking of our primary channel  and contains 
    the frame number along with radi/sigma values so that they can be used for constructing the volume 
    4. radi_col_names: type(list), this is a list of strings. This contains the name of the columns which will act as a 
    radius to construct volume around the coordinates. It must be passed in the form [sigma_z,sigma_y,sigma_x]
    5. frame_col_name: type(str), the column name which contains the frame number
    6. offset: type(list), this is an optional argument, it can be used to adjust for offset if there is any between 
    two channels. Must be in the form [y,x]

    **Outputs**: 
    1. mean: type(list), returns the mean pixel value for the volume for each time frame of a specific track 
    2. maximum: type(list), returns the max pixel value for the volume for each time frame of a specific track
    3. minimum: type(list), returns the min pixel value for the volume for each time frame of a specific track
    4. pixel_values: type(list), this is a list of arrays, returns all the non zero pixel values for the selected volume
    5. max_loc: type(list), this is a list of arrays, returns the coordinates of the pixel with maximum value in the volume 
    '''

    #if not isinstance(raw_image, np.ndarray):
        #raise TypeError("Input must be a NumPy array")
    #if raw_image.ndim != 4:
        #raise ValueError("Input array must be 4-D")
    
    #if not isinstance(dataframe, pd.DataFrame):
        #raise TypeError("Input must be a DataFrame")
    
    mean = []
    maximum = []
    minimum = []
    pixel_values = []
    max_loc = []
    max_z = raw_image.dims.Z
    max_y = raw_image.dims.Y
    max_x = raw_image.dims.X
    frames = raw_image.dims.T
    z2 = zarr.open('/Users/apple/Desktop/Akamatsu_Lab/Data/zarr_data/transpose-d124a28f31ae405a0278047b912cb6bd', mode='r')
    for frame in range(frames):
        print(f'current frame number is {frame}')
        current_df = dataframe[dataframe['frame'] == frame].reset_index()
        lazy_single_frame_input = raw_image.get_image_dask_data("ZYX", T=frame, C=0)
        current_image = lazy_single_frame_input.compute()
        
        for i in range(len(current_df)):
            #print('current coordinates are: ', coords)
            z = current_df.loc[i, mean_col_names[0]]
            y = max(0,current_df.loc[i, mean_col_names[1]] - offset[0])
            x = max(0,current_df.loc[i, mean_col_names[2]] - offset[1])
            radius_z = current_df.loc[i,radi_col_names[0]]
            radius_y = current_df.loc[i,radi_col_names[1]]
            radius_x = current_df.loc[i,radi_col_names[2]]


            # Ensure lower bounds
            z_start = int(max(0, z - radius_z))
            y_start = int(max(0, y - radius_y))
            x_start = int(max(0, x - radius_x))

            # Ensure upper bounds
            z_end = int(min(max_z, z + radius_z + 1))
            y_end = int(min(max_y, y + radius_y + 1))
            x_end = int(min(max_x, x + radius_x + 1))

            # Extract relevant pixels
            extracted_pixels = current_image[z_start:z_end, y_start:y_end, x_start:x_end]

            # Exclude pixels with value 0 before calculating mean
            non_zero_pixels = extracted_pixels[extracted_pixels != 0]

            if non_zero_pixels.size > 0:
                # Calculate statistics
                mean_value = np.mean(non_zero_pixels)
                max_value = np.max(non_zero_pixels)
                voxel_sum = np.sum(non_zero_pixels)

                # Get coordinates of the maximum value
                max_index = np.unravel_index(np.argmax(extracted_pixels), extracted_pixels.shape)
                max_coords = (z_start + max_index[0], y_start + max_index[1], x_start + max_index[2])
                min_value = np.min(non_zero_pixels)

                mean.append(mean_value)
                maximum.append(max_value)
                max_loc.append(max_coords)
                minimum.append(min_value)
                pixel_values.append(extracted_pixels)
            else:
                # If all pixels are 0, handle this case as needed
                mean.append(np.nan)  # Use NaN or any other suitable value
                maximum.append(np.nan)
                minimum.append(np.nan)
                temp = (np.nan, np.nan, np.nan)
                max_loc.append(temp)
                pixel_values.append(extracted_pixels)

    return mean,maximum,minimum,pixel_values,max_loc

In [13]:
start_time = time.time()
offset = [0,0]
col_names = ['mu_z', 'mu_y', 'mu_x']
radi_list = ['sigma_z', 'sigma_y', 'sigma_x']
mean,maximum,minimum,pixel_values,max_loc = extract_pixels_data_variable_bd(raw_image = img, 
        mean_col_names = col_names, dataframe = track_df, radi_col_names = radi_list, frame_col_name = 'frame')

end_time = time.time()
print('time taken (seconds)', end_time - start_time)

current frame number is 0
current frame number is 1
current frame number is 2
current frame number is 3
current frame number is 4
current frame number is 5
current frame number is 6
current frame number is 7
current frame number is 8
current frame number is 9
current frame number is 10
current frame number is 11
current frame number is 12
current frame number is 13
current frame number is 14
current frame number is 15
current frame number is 16
current frame number is 17
current frame number is 18
current frame number is 19
current frame number is 20
current frame number is 21
current frame number is 22
current frame number is 23
current frame number is 24
current frame number is 25
current frame number is 26
current frame number is 27
current frame number is 28
current frame number is 29
current frame number is 30
current frame number is 31
current frame number is 32
current frame number is 33
current frame number is 34
current frame number is 35
current frame number is 36
current fra

In [14]:
mean_aics_channel_1 = mean 

In [None]:
#Variable radii(sigma values) version for handling bigger files which do not fit in memory 
def extract_pixels_data_variable_bd_zarr(raw_image, mean_col_names: list, dataframe: pd.DataFrame
                                 ,radi_col_names: list, frame_col_name: str, offset: list = [0,0]):
    '''
    The function above uses a volume of pixels from one channel and extracts data for the same pixels from the 
    other channel. It uses a variable radii and ignores all the zero pixel values which exist while doing calculations. 

    **Inputs**: 
    1. raw_image: this raw image is the img object from AICSImageio tool. Its not imported into memory and will use dask_arrays to work
    for a single time frame at a time
    2. mean_col_names: type(list), this is a list of strings. This contains the names of the columns which contain the center
    coordinates. It must be passed in the form [z,y,x]
    3. dataframe: type(dataframe), this is the main dataframe returned from tracking of our primary channel  and contains 
    the frame number along with radi/sigma values so that they can be used for constructing the volume 
    4. radi_col_names: type(list), this is a list of strings. This contains the name of the columns which will act as a 
    radius to construct volume around the coordinates. It must be passed in the form [sigma_z,sigma_y,sigma_x]
    5. frame_col_name: type(str), the column name which contains the frame number
    6. offset: type(list), this is an optional argument, it can be used to adjust for offset if there is any between 
    two channels. Must be in the form [y,x]

    **Outputs**: 
    1. mean: type(list), returns the mean pixel value for the volume for each time frame of a specific track 
    2. maximum: type(list), returns the max pixel value for the volume for each time frame of a specific track
    3. minimum: type(list), returns the min pixel value for the volume for each time frame of a specific track
    4. pixel_values: type(list), this is a list of arrays, returns all the non zero pixel values for the selected volume
    5. max_loc: type(list), this is a list of arrays, returns the coordinates of the pixel with maximum value in the volume 
    '''

    #if not isinstance(raw_image, np.ndarray):
        #raise TypeError("Input must be a NumPy array")
    #if raw_image.ndim != 4:
        #raise ValueError("Input array must be 4-D")
    
    #if not isinstance(dataframe, pd.DataFrame):
        #raise TypeError("Input must be a DataFrame")
    
    mean = []
    maximum = []
    minimum = []
    pixel_values = []
    max_loc = []
    max_z = raw_image.dims.Z
    max_y = raw_image.dims.Y
    max_x = raw_image.dims.X
    frames = raw_image.dims.T
    
    for frame in range(frames):
        print(f'current frame number is {frame}')
        current_df = dataframe[dataframe['frame'] == frame].reset_index()
        lazy_single_frame_input = raw_image.get_image_dask_data("ZYX", T=frame, C=0)
        current_image = lazy_single_frame_input.compute()
        
        for i in range(len(current_df)):
            #print('current coordinates are: ', coords)
            z = current_df.loc[i, mean_col_names[0]]
            y = max(0,current_df.loc[i, mean_col_names[1]] - offset[0])
            x = max(0,current_df.loc[i, mean_col_names[2]] - offset[1])
            radius_z = current_df.loc[i,radi_col_names[0]]
            radius_y = current_df.loc[i,radi_col_names[1]]
            radius_x = current_df.loc[i,radi_col_names[2]]


            # Ensure lower bounds
            z_start = int(max(0, z - radius_z))
            y_start = int(max(0, y - radius_y))
            x_start = int(max(0, x - radius_x))

            # Ensure upper bounds
            z_end = int(min(max_z, z + radius_z + 1))
            y_end = int(min(max_y, y + radius_y + 1))
            x_end = int(min(max_x, x + radius_x + 1))

            # Extract relevant pixels
            extracted_pixels = current_image[z_start:z_end, y_start:y_end, x_start:x_end]

            # Exclude pixels with value 0 before calculating mean
            non_zero_pixels = extracted_pixels[extracted_pixels != 0]

            if non_zero_pixels.size > 0:
                # Calculate statistics
                mean_value = np.mean(non_zero_pixels)
                max_value = np.max(non_zero_pixels)
                voxel_sum = np.sum(non_zero_pixels)

                # Get coordinates of the maximum value
                max_index = np.unravel_index(np.argmax(extracted_pixels), extracted_pixels.shape)
                max_coords = (z_start + max_index[0], y_start + max_index[1], x_start + max_index[2])
                min_value = np.min(non_zero_pixels)

                mean.append(mean_value)
                maximum.append(max_value)
                max_loc.append(max_coords)
                minimum.append(min_value)
                pixel_values.append(extracted_pixels)
            else:
                # If all pixels are 0, handle this case as needed
                mean.append(np.nan)  # Use NaN or any other suitable value
                maximum.append(np.nan)
                minimum.append(np.nan)
                temp = (np.nan, np.nan, np.nan)
                max_loc.append(temp)
                pixel_values.append(extracted_pixels)

    return mean,maximum,minimum,pixel_values,max_loc