In [54]:
import pandas as pd
import numpy as np
import os
import helper

In [55]:
cellprofiler_path = "/home/krarm/AutomatedCiliaMeasurements/sample_csvs"

In [56]:
# Targeted fields to read from cellprofiler csvs
fields = ["ImageNumber", "ObjectNumber", "Location_Center_X", "Location_Center_Y"]

In [57]:
nucleus_df = pd.read_csv(
    os.path.join(cellprofiler_path, "MyExpt_Nucleus.csv"), 
    skipinitialspace=True, 
    usecols=fields
)
nucleus_df.rename(columns={"ObjectNumber": "Nucleus"}, inplace=True)
nucleus_df

Unnamed: 0,ImageNumber,Nucleus,Location_Center_X,Location_Center_Y
0,1,1,364.820513,12.092593
1,1,2,814.714777,19.994845
2,1,3,782.897168,22.812221
3,1,4,670.829710,36.637681
4,1,5,422.262814,38.510360
...,...,...,...,...
7068,25,334,441.176606,997.100917
7069,25,335,704.082414,991.824140
7070,25,336,38.092421,1007.036969
7071,25,337,989.922353,1008.974118


In [58]:
centriole_df = pd.read_csv(
    os.path.join(cellprofiler_path, "MyExpt_Centriole.csv"), 
    skipinitialspace=True, 
    usecols=fields
)
centriole_df.rename(columns={"ObjectNumber": "Centriole"}, inplace=True)
centriole_df

Unnamed: 0,ImageNumber,Centriole,Location_Center_X,Location_Center_Y
0,1,1,741.066667,14.466667
1,1,2,955.210526,19.157895
2,1,3,998.350000,30.150000
3,1,4,1018.076923,35.538462
4,1,5,32.793103,41.517241
...,...,...,...,...
6838,25,319,291.500000,1006.200000
6839,25,320,2.125000,1007.875000
6840,25,321,466.222222,1016.666667
6841,25,322,654.666667,1017.266667


In [59]:
cilia_df = pd.read_csv(
    os.path.join(cellprofiler_path, "MyExpt_Cilia.csv"), 
    skipinitialspace=True, 
    usecols=fields
)
cilia_df.rename(columns={"ObjectNumber": "Cilia"}, inplace=True)
cilia_df

Unnamed: 0,ImageNumber,Cilia,Location_Center_X,Location_Center_Y
0,1,1,129.636364,3.090909
1,1,2,873.000000,4.000000
2,1,3,645.333333,6.222222
3,1,4,427.333333,6.888889
4,1,5,256.000000,9.000000
...,...,...,...,...
8071,25,223,821.000000,1003.000000
8072,25,224,641.661765,1008.397059
8073,25,225,532.093750,1016.625000
8074,25,226,905.607843,1017.568627


In [60]:
grouped_nucleus = nucleus_df.groupby("ImageNumber")
grouped_centriole = centriole_df.groupby("ImageNumber")
grouped_cilia = cilia_df.groupby("ImageNumber")

In [61]:

# Iterate over groups 
# Note: groups in grouped_nucleus, grouped_centriole and grouped_cilia are expected to be aligned
for key in grouped_nucleus.groups.keys():
    
    # Fetch respective group
    nucleus_group = grouped_nucleus.get_group(key)
    centriole_group = grouped_centriole.get_group(key)
    cilia_group = grouped_cilia.get_group(key)

coord_fields = ["Location_Center_X", "Location_Center_Y"]

nucleus_group


Unnamed: 0,ImageNumber,Nucleus,Location_Center_X,Location_Center_Y
6735,25,1,774.465686,22.617647
6736,25,2,503.384615,24.955335
6737,25,3,299.553191,24.944681
6738,25,4,825.792053,25.543046
6739,25,5,342.576592,27.784854
...,...,...,...,...
7068,25,334,441.176606,997.100917
7069,25,335,704.082414,991.824140
7070,25,336,38.092421,1007.036969
7071,25,337,989.922353,1008.974118


In [62]:
coord_fields = ["Location_Center_X", "Location_Center_Y"]

# Match nucleus (child) with closest 2 centrioles (parent) 
dist_arr, idx_arr = helper.match(
    data=centriole_group.loc[:, coord_fields].values, 
    lookups=nucleus_group.loc[:, coord_fields].values, 
    arity=2
)


In [63]:
# Convert np arrays from matching to dataframes and merge
dist_df = pd.DataFrame(dist_arr, columns=['NCent1', 'NCent2'])
idx_df = pd.DataFrame(idx_arr, columns=['Centriole_1', 'Centriole_2'])
nucleus_centriole_df = pd.merge(idx_df, dist_df, left_index=True, right_index=True)

# Rearranging index to match Nucleus number
nucleus_centriole_df.index = np.arange(1, len(nucleus_centriole_df) + 1)

# Merge nucleus-centriole matching dataframe with nucleus_group dataframe
nucleus_group = pd.merge(nucleus_group, nucleus_centriole_df, left_on="Nucleus", right_index=True)

nucleus_group

Unnamed: 0,ImageNumber,Nucleus,Location_Center_X,Location_Center_Y,Centriole_1,Centriole_2,NCent1,NCent2
6735,25,1,774.465686,22.617647,3,15,54.555093,61.897576
6736,25,2,503.384615,24.955335,6,13,63.004055,64.483832
6737,25,3,299.553191,24.944681,1,22,43.506656,81.298632
6738,25,4,825.792053,25.543046,16,7,64.852594,68.626516
6739,25,5,342.576592,27.784854,1,22,19.261070,105.368943
...,...,...,...,...,...,...,...,...
7068,25,334,441.176606,997.100917,311,320,10.152629,31.782093
7069,25,335,704.082414,991.824140,309,321,13.628816,55.580916
7070,25,336,38.092421,1007.036969,319,313,35.977183,40.178886
7071,25,337,989.922353,1008.974118,310,315,27.019827,66.645354
