# The following notebook uses the dataframe on which tracking has been performed and uses the spots coordinates to extract activity from alternate channels. 

In [1]:
from os import path
import pandas as pd
from IPython.display import display
import numpy as np 
import sys 
import time 
import zarr

sys.path.append('../src/')

from extract_pixel_data import Extractor

#plt.rcParams["font.family"] = ""

In [2]:
#provide the file path of the tracking dataframe returned from notebook 04 
file_path = "/Users/apple/Desktop/Akamatsu_Lab/Data/full_movie_output/track_df_c3_cleaned.pkl"
track_df = pd.read_pickle(file_path)

In [3]:
track_df

Unnamed: 0,frame,index,amplitude,mu_x,mu_y,mu_z,sigma_x,sigma_y,sigma_z,frame_y,tree_id,track_id
0,0,0,175.000000,19.0,1813.0,3.0,2.0,1.0,2.0,0,0,0
1,0,1,188.000000,24.0,1804.0,4.0,2.0,2.0,3.0,0,1,1
2,0,2,182.305173,49.0,569.0,4.0,2.0,1.0,2.0,0,2,2
3,0,3,176.000000,134.0,739.0,4.0,2.0,2.0,3.0,0,3,3
4,0,4,178.000000,39.0,1124.0,4.0,3.0,2.0,3.0,0,4,4
...,...,...,...,...,...,...,...,...,...,...,...,...
515930,129,3059,227.867919,231.0,1921.0,110.0,2.0,2.0,2.0,129,198325,198325
515931,129,3060,269.333333,258.0,1272.0,110.0,2.0,1.0,2.0,129,198326,198326
515932,129,3061,186.475768,259.0,1339.0,110.0,3.0,2.0,2.0,129,198327,198327
515933,129,3062,184.892993,275.0,1325.0,110.0,2.0,2.0,2.0,129,195783,195783


In [4]:
# read the zarr file which contains the data of all three channels 
z = zarr.open('/Users/apple/Desktop/Akamatsu_Lab/Data/Zarr_file_new/all_channels_data', mode='r')

In [5]:
z.info

0,1
Type,zarr.core.Array
Data type,uint16
Shape,"(130, 3, 116, 2052, 340)"
Chunk shape,"(1, 1, 116, 2052, 340)"
Order,C
Read-only,True
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.DirectoryStore
No. bytes,63126086400 (58.8G)
No. bytes stored,13548767504 (12.6G)


In [6]:
track_df['frame'].nunique()

130

## In the below cell the Extractor object is created 

In [7]:
#Create an extractor object with required parameters to properly run the methods in next steps 
# radii is the expected radius of the CME sites (will be used for calculations which need fixed radius) 
# radii_col_name is a list of the variable sigma estimated by gaussian fitting (note: the convention for the list is z,y,x)
# n_jobs is the cores to be used. (parallel processing for gaussian fitting on the other two channels could be done to speed up the process)
extractor = Extractor(z, dataframe = track_df, radii=[4,2,2], frame_col_name = 'frame', 
                      radi_col_name = ['sigma_z', 'sigma_y', 'sigma_x'], n_jobs =2)

# Extract Information for Channel 3 (Clathrin)

## Extracting voxel sum

In [None]:
# pass the channel for which voxel sum is needed and the coordinates around which voxel sum is supposed to be calculated
# convention for coords [z,y,x] 
# convention for channel is 1 for channel 1, 2 for channel 2 and so on 
# channel number is to passed according to whichever channel we want to extract the data for
start_time = time.time()
voxel_sum_array, _ = extractor.voxel_sum_fixed_bd(center_col_names = ['mu_z', 'mu_y', 'mu_x'], channel = 3)
end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [10]:
voxel_sum_array, _, adj_voxel_sum_1 = extractor.voxel_sum_fixed_background_v2(center_col_names = ['mu_z', 'mu_y', 'mu_x'], channel = 3)

current frame is 0
area_small 96
area_large 252
voxel sum small patch 12927
voxel sum larger patch 33821
background adjusted sum 69.15384615384664
area_small 112
area_large 288
voxel sum small patch 15894
voxel sum larger patch 41080
background adjusted sum -133.4545454545446
area_small 112
area_large 288
voxel sum small patch 14697
voxel sum larger patch 36356
background adjusted sum 914.0
area_small 112
area_large 288
voxel sum small patch 15905
voxel sum larger patch 40803
background adjusted sum 60.81818181818153
area_small 112
area_large 288
voxel sum small patch 14972
voxel sum larger patch 38479
background adjusted sum 13.0
area_small 128
area_large 324
voxel sum small patch 17522
voxel sum larger patch 43952
background adjusted sum 261.59183673469306
area_small 112
area_large 288
voxel sum small patch 15730
voxel sum larger patch 40574
background adjusted sum -79.81818181818153
area_small 128
area_large 324
voxel sum small patch 17743
voxel sum larger patch 44890
background adj

In [11]:
voxel_sum_array, _, adj_voxel_sum_2 = extractor.voxel_sum_fixed_background(center_col_names = ['mu_z', 'mu_y', 'mu_x'], channel = 3)

current frame is 0
area_small 96
area_large 252
voxel sum small patch 12927
voxel sum larger patch 33821
background adjusted sum 69.15384615384573
area_small 112
area_large 288
voxel sum small patch 15894
voxel sum larger patch 41080
background adjusted sum -133.45454545454595
area_small 112
area_large 288
voxel sum small patch 14697
voxel sum larger patch 36356
background adjusted sum 914.0
area_small 112
area_large 288
voxel sum small patch 15905
voxel sum larger patch 40803
background adjusted sum 60.81818181818198
area_small 112
area_large 288
voxel sum small patch 14972
voxel sum larger patch 38479
background adjusted sum 13.0
area_small 128
area_large 324
voxel sum small patch 17522
voxel sum larger patch 43952
background adjusted sum 261.5918367346967
area_small 112
area_large 288
voxel sum small patch 15730
voxel sum larger patch 40574
background adjusted sum -79.81818181818198
area_small 128
area_large 324
voxel sum small patch 17743
voxel sum larger patch 44890
background adj

In [None]:
# pass the channel for which pixel values are needed and the coordinates around which values are supposed to be calculated
# convention for coords [z,y,x] 
# convention for channel is 1 for channel 1, 2 for channel 2 and so on 
start_time = time.time()
offset = [0,0]
col_names = ['mu_z', 'mu_y', 'mu_x']
mean,maximum,minimum,pixel_values,max_loc = extractor.extract_pixels_data_variable_bd(center_col_names = col_names, channel = 3)

end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [None]:
max_loc = np.array(max_loc)

In [None]:
track_df['c3_mean_amp'] = mean
track_df['c3_voxel_sum'] = voxel_sum_array
track_df['c3_peak_amp'] = maximum 
track_df['c3_peak_x'] = max_loc[:,2]
track_df['c3_peak_y'] = max_loc[:,1]
track_df['c3_peak_z'] = max_loc[:,0]

In [None]:
track_df

# Extract information for Channel 2 

In [None]:
# pass the channel for which pixel values are needed and the coordinates around which values are supposed to be calculated
# convention for coords [z,y,x] 
# convention for channel is 1 for channel 1, 2 for channel 2 and so on 
start_time = time.time()
offset = [0,0]
col_names = ['mu_z', 'mu_y', 'mu_x']
mean,maximum,minimum,pixel_values,max_loc = extractor.extract_pixels_data_variable_bd(center_col_names = col_names, channel = 2)

end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [None]:
max_loc = np.array(max_loc)

## Finding peak pixel for Channel 2 as not sure about the offset between Channel 3 and Channel 2
**Reason for offset is being shot from different cameras**

In [None]:
track_df['c2_amp'] = mean
track_df['c2_peak'] = maximum
track_df['c2_peak_x'] = max_loc[:,2]
track_df['c2_peak_y'] = max_loc[:,1]
track_df['c2_peak_z'] = max_loc[:,0]

In [None]:
track_df

## Finding mean value around the peak pixel value for channel 2 

In [None]:
start_time = time.time()
col_names = ['c2_peak_z', 'c2_peak_y', 'c2_peak_x']
peak_mean,maxima,_,_,_ = extractor.extract_pixels_data_variable_bd(center_col_names = col_names, 
                                                     channel = 2)
end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [None]:
track_df['c2_peak_mean'] = peak_mean

In [None]:
track_df

## Finding voxel sum around peak for channel 2 

In [None]:
# pass the channel for which voxel sum is needed and the coordinates around which voxel sum is supposed to be calculated
# convention for coords [z,y,x] 
# convention for channel is 1 for channel 1, 2 for channel 2 and so on 
start_time = time.time()
voxel_sum_array, _ = extractor.voxel_sum_fixed_bd(center_col_names = ['c2_peak_z', 'c2_peak_y', 'c2_peak_x'],
                                       channel = 2)
end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [None]:
#calculated around the peak value coordinates
track_df['c2_voxel_sum'] = voxel_sum_array

In [None]:
track_df

### Gaussian Fitting for Channel 2 around peak values 

In [None]:
#Calculating the gaussian fitting estimates around peak coords for channel 2 
#expected sigma value needed for gaussian fitting 
#set all frames to False for processing limited frames 
#max_frames determines the frames to be processed if all_frames is false
start_time = time.time()
channel2_gaussians_df = extractor.run_parallel_frame_processing(expected_sigma = [4,2,2], 
                                        center_col_name = ['c2_peak_z', 'c2_peak_y', 'c2_peak_x'], 
                                       dist_between_spots = 10 , channel = 2,  max_frames =  2, all_frames = False)
end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [None]:
track_df['c2_gaussian_amp'] = channel2_gaussians_df['amplitude']
track_df['c2_mu_x'] = channel2_gaussians_df['mu_x']
track_df['c2_mu_y'] = channel2_gaussians_df['mu_y']
track_df['c2_mu_z'] = channel2_gaussians_df['mu_z']
track_df['c2_sigma_x'] = channel2_gaussians_df['sigma_x']
track_df['c2_sigma_y'] = channel2_gaussians_df['sigma_y']
track_df['c2_sigma_z'] = channel2_gaussians_df['sigma_z']

In [None]:
track_df

# Extract information for channel 1

In [None]:
offset = [0,0]
col_names = ['mu_z', 'mu_y', 'mu_x']
radi_list = ['sigma_z', 'sigma_y', 'sigma_x']
mean,maximum,minimum,pixel_values,max_loc = extractor.extract_pixels_data_variable_bd(center_col_names = col_names, channel = 1)

In [None]:
max_loc = np.array(max_loc)

In [None]:
track_df['c1_amp'] = mean
track_df['c1_peak'] = maximum
track_df['c1_peak_x'] = max_loc[:,2]
track_df['c1_peak_y'] = max_loc[:,1]
track_df['c1_peak_z'] = max_loc[:,0]

In [None]:
voxel_sum_array, _ = extractor.voxel_sum_fixed_bd(center_col_names = ['mu_z', 'mu_y', 'mu_x'], channel = 1)


In [None]:
#calculated around the peak value coordinates
track_df['c1_voxel_sum'] = voxel_sum_array

In [None]:
track_df

In [None]:
track_df.shape

### Perform Guassian Fitting for channel 1

In [None]:
start_time = time.time()
channel1_gaussians_df = extractor.run_parallel_frame_processing(expected_sigma = [4,2,2], 
                                        center_col_name = ['c1_peak_z', 'c1_peak_y', 'c1_peak_x'], 
                                       dist_between_spots = 10, channel = 1,  max_frames =  2, all_frames = False)
end_time = time.time()
print('time taken (seconds)', end_time - start_time)

In [None]:
channel1_gaussians_df

In [None]:
track_df['c1_gaussian_amp'] = channel1_gaussians_df['amplitude']
track_df['c1_mu_x'] = channel1_gaussians_df['mu_x']
track_df['c1_mu_y'] = channel1_gaussians_df['mu_y']
track_df['c1_mu_z'] = channel1_gaussians_df['mu_z']
track_df['c1_sigma_x'] = channel1_gaussians_df['sigma_x']
track_df['c1_sigma_y'] = channel1_gaussians_df['sigma_y']
track_df['c1_sigma_z'] = channel1_gaussians_df['sigma_z']

In [None]:
track_df

In [None]:
#file_path = "/Users/apple/Desktop/Akamatsu_Lab/Data/full_movie_output_zarr/track_df_cleaned_final_full.pkl"
#track_df.to_pickle(file_path)

# To test the following functions change the markdown files to code files 
1. Voxel sum using the variable sigma/radi values directly inferred from the dataset 
2. Extracting pixel values mean,max etc using fixed radi
3. Performing Gaussian Fitting on peak coords for single frame

### Function 1 (Voxel Sum)
start_time = time.time()
voxel_sum_array_variable, _ = extractor.voxel_sum_variable_bd(dataframe = track_df,center_col_names = ['mu_z', 'mu_y', 'mu_x'],
                                                  radi_col_name = ['sigma_z','sigma_y','sigma_x'], frame_col_name = 'frame', channel = 3)
end_time = time.time()
print('time taken (seconds)', end_time - start_time)

### Function 2 (Extracting Pixel Values)
start_time = time.time()
offset = [0,0]
col_names = ['mu_z', 'mu_y', 'mu_x']
radii = [4, 2, 2]
mean_v,maximum_v,minimum_v,pixel_values_v,max_loc_v = extractor.extract_pixels_data_fixed_bd(center_col_names = col_names,
                dataframe = track_df, radii = radii, frame_col_name = 'frame', channel = 3)

end_time = time.time()
print('time taken (seconds)', end_time - start_time)

### Function 3 (Gaussian fitting on one frame for one channel)
df = extractor.gaussian_fitting_single_frame(expected_sigma = [4,2,2], 
                              center_col_names = ['c2_peak_z', 'c2_peak_y', 'c2_peak_x'],
                                      frame = 0, channel = 2, dist_between_spots = 10)