In [18]:
import pandas as pd
import numpy as np
import os
from scipy.spatial import KDTree
from bisect import insort
from collections import defaultdict
import math

In [19]:
def match(parents, childs, arity, thresholds=None):

    kd_tree = KDTree(data=parents, leafsize=10)

    child_to_parent = {}
    visited_child = {}
    parent_to_child = defaultdict(list)

    num_parents = len(parents) + 1 # 1-based for KDTree query

    for child_idx, child_coords in enumerate(childs):

        # Initialize child_to_parent record in dictionary
        child_to_parent[child_idx] = {}
        child_to_parent[child_idx]["path_length"] = float("inf") # The length of the shortest path
        child_to_parent[child_idx]["parent"] = None # The index of the cell to which the shortest path corresponds

        # Record coordinates in visited_child dictionary
        visited_child[child_idx] = child_coords

        # Target child_idx to lookup in KDTree
        lookup_child_idx = child_idx

        for k in range(1, num_parents+1):

            # Query closest parent
            dist_arr, parent_idx_arr = kd_tree.query(x=visited_child[lookup_child_idx], k=[k], workers=1)
            dist = float(dist_arr)
            parent_idx = int(parent_idx_arr)

            # Get threshold
            if thresholds:
                threshold = 2.5 * thresholds[parent_idx]
            else:
                threshold = float("inf")

            # If closest parent distance is greater than threshold, child is automatically invalidated
            if dist > threshold:
                child_to_parent[lookup_child_idx]["path_length"] = -1
                child_to_parent[lookup_child_idx]["parent"] = -1
                break

            # Add parent information to child_to_parent dictionary
            child_to_parent[lookup_child_idx]["path_length"] = dist
            child_to_parent[lookup_child_idx]["parent"] = parent_idx

            # Add child information to parent_to_child dictionary
            insort(parent_to_child[parent_idx], (dist, lookup_child_idx))

            # Check if parent has number of childs exceeding arity
            if len(parent_to_child[parent_idx]) > arity:

                # Remove the furthest child
                _, child_to_remove = parent_to_child[parent_idx].pop()

                # Case of no possible match - no more parents left
                if k == num_parents:
                    child_to_parent[child_to_remove]["path_length"] = -1
                    child_to_parent[child_to_remove]["parent"] = -1
                else: 
                # Proceed to match with next possible closest parent

                    # Reintialize child_to_parent record in dictionary
                    child_to_parent[child_to_remove]["path_length"] = float("inf")
                    child_to_parent[child_to_remove]["parent"] = None

                    # Set removed child as lookup target to match with next available neighbor
                    lookup_child_idx = child_to_remove
            else:
                # If insertion suceed, proceed to next child in list
                break

    return child_to_parent      


In [20]:
cellprofiler_path = "/home/krarm/AutomatedCiliaMeasurements/sample_csvs"

In [21]:
# Targeted fields to read from cellprofiler csvs
fields = ["ImageNumber", "ObjectNumber", "Location_Center_X", "Location_Center_Y", "AreaShape_MeanRadius"]

In [22]:
nucleus_df = pd.read_csv(
    os.path.join(cellprofiler_path, "MyExpt_Nucleus.csv"), 
    skipinitialspace=True
)
nucleus_df = nucleus_df.astype({"ImageNumber": pd.Int64Dtype(), "ObjectNumber": pd.Int64Dtype()})
nucleus_df.rename(columns={"ObjectNumber": "Nucleus"}, inplace=True)
nucleus_df

Unnamed: 0,ImageNumber,Nucleus,AreaShape_Area,AreaShape_BoundingBoxArea,AreaShape_BoundingBoxMaximum_X,AreaShape_BoundingBoxMaximum_Y,AreaShape_BoundingBoxMinimum_X,AreaShape_BoundingBoxMinimum_Y,AreaShape_Center_X,AreaShape_Center_Y,...,AreaShape_MeanRadius,AreaShape_MedianRadius,AreaShape_MinFeretDiameter,AreaShape_MinorAxisLength,AreaShape_Orientation,AreaShape_Perimeter,AreaShape_Solidity,Location_Center_X,Location_Center_Y,Location_Center_Z
0,1,1,702,988,393,22,341,3,364.820513,12.092593,...,3.816239,3.605551,17.029272,17.360803,1.453188,121.840620,0.937250,364.820513,12.092593,0
1,1,2,582,896,830,36,798,8,814.714777,19.994845,...,4.293767,4.000000,22.235372,22.420547,1.031736,96.911688,0.941748,814.714777,19.994845,0
2,1,3,671,1020,799,41,765,11,782.897168,22.812221,...,4.409043,4.000000,27.347945,26.584813,1.314761,109.254834,0.899464,782.897168,22.812221,0
3,1,4,552,864,685,53,658,21,670.829710,36.637681,...,4.164916,3.605551,21.693046,20.995950,-0.652995,94.083261,0.950086,670.829710,36.637681,0
4,1,5,917,1218,443,54,401,25,422.262814,38.510360,...,5.487574,5.000000,27.574534,28.135618,-1.440286,116.325902,0.975532,422.262814,38.510360,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7068,25,334,436,744,456,1009,425,985,441.176606,997.100917,...,3.597257,3.162278,17.473056,18.204215,1.060228,86.568542,0.931624,441.176606,997.100917,0
7069,25,335,1541,2150,732,1015,682,972,704.082414,991.824140,...,6.336996,5.830952,41.350911,43.340953,-1.546355,163.438600,0.935073,704.082414,991.824140,0
7070,25,336,541,756,52,1022,24,995,38.092421,1007.036969,...,4.316920,4.000000,21.628155,22.275149,0.960773,88.426407,0.962633,38.092421,1007.036969,0
7071,25,337,425,576,1007,1018,975,1000,989.922353,1008.974118,...,3.565770,3.162278,16.570395,16.653563,1.477893,83.941125,0.946548,989.922353,1008.974118,0


In [23]:
centriole_df = pd.read_csv(
    os.path.join(cellprofiler_path, "MyExpt_Centriole.csv"), 
    skipinitialspace=True
)
centriole_df = centriole_df.astype({"ImageNumber": pd.Int64Dtype(), "ObjectNumber": pd.Int64Dtype()})
centriole_df.rename(columns={"ObjectNumber": "Centriole"}, inplace=True)
centriole_df

Unnamed: 0,ImageNumber,Centriole,AreaShape_Area,AreaShape_BoundingBoxArea,AreaShape_BoundingBoxMaximum_X,AreaShape_BoundingBoxMaximum_Y,AreaShape_BoundingBoxMinimum_X,AreaShape_BoundingBoxMinimum_Y,AreaShape_Center_X,AreaShape_Center_Y,...,AreaShape_MeanRadius,AreaShape_MedianRadius,AreaShape_MinFeretDiameter,AreaShape_MinorAxisLength,AreaShape_Orientation,AreaShape_Perimeter,AreaShape_Solidity,Location_Center_X,Location_Center_Y,Location_Center_Z
0,1,1,15,28,745,17,738,13,741.066667,14.466667,...,1.027614,1.0,2.84605,4.030074,-1.425595,9.621320,0.714286,741.066667,14.466667,0
1,1,2,19,35,959,22,952,17,955.210526,19.157895,...,1.096233,1.0,4.00000,4.835651,-1.364923,14.449747,0.730769,955.210526,19.157895,0
2,1,3,20,30,1002,33,996,28,998.350000,30.150000,...,1.112132,1.0,4.00000,5.073696,-0.945033,12.449747,0.769231,998.350000,30.150000,0
3,1,4,13,28,1022,38,1015,34,1018.076923,35.538462,...,1.031863,1.0,2.84605,3.664182,-1.444637,8.621320,0.722222,1018.076923,35.538462,0
4,1,5,29,49,37,46,30,39,32.793103,41.517241,...,1.140382,1.0,6.00000,6.162250,-0.610874,19.278175,0.743590,32.793103,41.517241,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6838,25,319,10,12,294,1008,290,1005,291.500000,1006.200000,...,1.082843,1.0,2.00000,2.993326,1.570796,8.828427,1.000000,291.500000,1006.200000,0
6839,25,320,8,9,4,1010,1,1007,2.125000,1007.875000,...,1.051777,1.0,2.00000,2.738613,-0.785398,7.414214,1.000000,2.125000,1007.875000,0
6840,25,321,9,12,468,1019,465,1015,466.222222,1016.666667,...,1.046024,1.0,2.00000,3.130045,-0.133126,7.621320,0.900000,466.222222,1016.666667,0
6841,25,322,15,20,658,1020,653,1016,654.666667,1017.266667,...,1.216176,1.0,3.00000,3.981909,-1.468099,11.656854,1.000000,654.666667,1017.266667,0


In [24]:
cilia_df = pd.read_csv(
    os.path.join(cellprofiler_path, "MyExpt_Cilia.csv"), 
    skipinitialspace=True
)
cilia_df = cilia_df.astype({"ImageNumber": pd.Int64Dtype(), "ObjectNumber": pd.Int64Dtype()})
cilia_df.rename(columns={"ObjectNumber": "Cilia"}, inplace=True)
cilia_df

Unnamed: 0,ImageNumber,Cilia,AreaShape_Area,AreaShape_BoundingBoxArea,AreaShape_BoundingBoxMaximum_X,AreaShape_BoundingBoxMaximum_Y,AreaShape_BoundingBoxMinimum_X,AreaShape_BoundingBoxMinimum_Y,AreaShape_Center_X,AreaShape_Center_Y,...,AreaShape_MeanRadius,AreaShape_MedianRadius,AreaShape_MinFeretDiameter,AreaShape_MinorAxisLength,AreaShape_Orientation,AreaShape_Perimeter,AreaShape_Solidity,Location_Center_X,Location_Center_Y,Location_Center_Z
0,1,1,11,12,132,5,128,2,129.636364,3.090909,...,1.128565,1.0,2.000000,3.067356,-1.307763,9.414214,1.000000,129.636364,3.090909,0
1,1,2,1,1,874,5,873,4,873.000000,4.000000,...,1.000000,1.0,0.000000,0.000000,0.785398,0.000000,1.000000,873.000000,4.000000,0
2,1,3,9,16,648,9,644,5,645.333333,6.222222,...,1.000000,1.0,1.788854,2.331612,-0.717197,7.414214,0.900000,645.333333,6.222222,0
3,1,4,9,12,430,9,426,6,427.333333,6.888889,...,1.092047,1.0,2.000000,2.795443,1.216483,8.242641,1.000000,427.333333,6.888889,0
4,1,5,1,1,257,10,256,9,256.000000,9.000000,...,1.000000,1.0,0.000000,0.000000,0.785398,0.000000,1.000000,256.000000,9.000000,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8071,25,223,1,1,822,1004,821,1003,821.000000,1003.000000,...,1.000000,1.0,0.000000,0.000000,0.785398,0.000000,1.000000,821.000000,1003.000000,0
8072,25,224,68,126,654,1012,633,1006,641.661765,1008.397059,...,1.316908,1.0,4.638124,4.378861,-1.456749,44.727922,0.772727,641.661765,1008.397059,0
8073,25,225,32,84,539,1021,527,1014,532.093750,1016.625000,...,1.176777,1.0,3.545274,3.647015,-1.162154,25.278175,0.727273,532.093750,1016.625000,0
8074,25,226,51,120,914,1022,899,1014,905.607843,1017.568627,...,1.287819,1.0,3.772969,3.960346,1.094184,33.763456,0.879310,905.607843,1017.568627,0


In [25]:
# Create location dictionary (for easy distance calculation)
nucleus_loc_dict = nucleus_df.groupby("ImageNumber")[["Nucleus", "Location_Center_X", "Location_Center_Y"]].apply(lambda x : x.set_index("Nucleus").to_dict(orient="index")).to_dict()
centriole_loc_dict = centriole_df.groupby("ImageNumber")[["Centriole", "Location_Center_X", "Location_Center_Y"]].apply(lambda x : x.set_index("Centriole").to_dict(orient="index")).to_dict()
cilia_loc_dict = cilia_df.groupby("ImageNumber")[["Cilia", "Location_Center_X", "Location_Center_Y"]].apply(lambda x : x.set_index("Cilia").to_dict(orient="index")).to_dict()

In [26]:
grouped_nucleus = nucleus_df.groupby("ImageNumber")
grouped_centriole = centriole_df.groupby("ImageNumber")
grouped_cilia = cilia_df.groupby("ImageNumber")

In [27]:
# Initialize c2c_df
c2c_df = pd.DataFrame(columns=['ImageNumber', 'Nucleus', 'Centriole1', 'Centriole2', 'Cilia', 'Nuc_Cent1', 'Nuc_Cent2', 'Nuc_Cil'])

# Iterate over groups 
# Note: groups in grouped_nucleus, grouped_centriole and grouped_cilia are expected to be aligned
for key in grouped_nucleus.groups.keys():
    
    # Fetch respective group
    nucleus_group = grouped_nucleus.get_group(key)
    centriole_group = grouped_centriole.get_group(key)
    cilia_group = grouped_cilia.get_group(key)

    coord_fields = ["Location_Center_X", "Location_Center_Y"]
    threshold_field = "AreaShape_MeanRadius"

    #region : Nucleus - Centriole Matching

    # Match nucleus (parent) with closest 2 centrioles (child) 
    nucleus_centriole_match_dict = match(
        parents=nucleus_group.loc[:, coord_fields].values, 
        childs=centriole_group.loc[:, coord_fields].values, 
        arity=2,
        thresholds=nucleus_group[threshold_field].to_list()
    )

    # Make df from dict and rename columns
    nucleus_centriole_match_df = pd.DataFrame.from_dict(nucleus_centriole_match_dict, orient='index')
    nucleus_centriole_match_df.rename(columns={"path_length":"Nuc_Cent", "parent":"Nucleus"}, inplace=True)
    nucleus_centriole_match_df.reset_index(inplace=True, names="Centriole") 

    # Drop unmatched/invalid centriole
    nucleus_centriole_match_df.drop(nucleus_centriole_match_df[nucleus_centriole_match_df.Nucleus == -1].index, inplace=True)   
    
    # Increment Centriole and Nucleus number since they are 1-based
    nucleus_centriole_match_df["Centriole"] += 1
    nucleus_centriole_match_df["Nucleus"] += 1

    # Sort values by nucleus number and distance from nucleus
    nucleus_centriole_match_df = nucleus_centriole_match_df.sort_values(by=['Nucleus', 'Nuc_Cent']).groupby(['Nucleus'], as_index=False).agg(list)

    # Split Centriole number and distances from nucleus
    try:
        nucleus_centriole_split_centriole_df = pd.DataFrame(nucleus_centriole_match_df['Centriole'].to_list(), columns = ['Centriole1', 'Centriole2'], dtype=pd.Int64Dtype())
    except ValueError:
        nucleus_centriole_split_centriole_df = pd.DataFrame(nucleus_centriole_match_df['Centriole'].to_list(), columns = ['Centriole1'], dtype=pd.Int64Dtype())
        nucleus_centriole_split_centriole_df['Centriole2'] = pd.NA

    try:
        nucleus_centriole_split_nc_df = pd.DataFrame(nucleus_centriole_match_df['Nuc_Cent'].to_list(), columns = ['Nuc_Cent1', 'Nuc_Cent2'])
    except ValueError:
        nucleus_centriole_split_nc_df = pd.DataFrame(nucleus_centriole_match_df['Nuc_Cent'].to_list(), columns = ['Nuc_Cent1'])
        nucleus_centriole_split_nc_df['Nuc_Cent2'] = np.nan

    nucleus_centriole_match_df = pd.concat([nucleus_centriole_match_df, nucleus_centriole_split_centriole_df, nucleus_centriole_split_nc_df], axis=1)
    nucleus_centriole_match_df.drop(['Centriole', 'Nuc_Cent'], axis=1, inplace=True)
    nucleus_centriole_match_df.drop_duplicates(inplace=True)

    #endregion

    #region : Nucleus - Cilia Matching
    
    # Match cilia (child) with closest nucleus (parent) 
    nucleus_cilia_match_dict = match(
        parents=nucleus_group.loc[:, coord_fields].values, 
        childs=cilia_group.loc[:, coord_fields].values, 
        arity=1
    )

    # Make df from dict and rename columns
    nucleus_cilia_match_df = pd.DataFrame.from_dict(nucleus_cilia_match_dict, orient='index')
    nucleus_cilia_match_df.rename(columns={"path_length":"Nuc_Cil", "parent":"Nucleus"}, inplace=True)
    nucleus_cilia_match_df.reset_index(inplace=True, names="Cilia")

    # Drop unmatched/invalid cilia
    nucleus_cilia_match_df.drop(nucleus_cilia_match_df[nucleus_cilia_match_df.Nucleus == -1].index, inplace=True)   
    
    # Increment Cilia and Nucleus number since they are 1-based
    nucleus_cilia_match_df["Cilia"] += 1
    nucleus_cilia_match_df["Nucleus"] += 1

    #endregion

    # Merge two matching dataframes
    nucleus_centriole_cilia_df = nucleus_centriole_match_df.merge(right=nucleus_cilia_match_df, how='outer', on=['Nucleus'])

    # Set ImageNumber 
    nucleus_centriole_cilia_df["ImageNumber"] = key

    # Concat in c2c output
    c2c_df = pd.concat([c2c_df, nucleus_centriole_cilia_df], ignore_index=True)

# Ensure all columns are in appropriate datatypes
c2c_type_dict = {'ImageNumber': pd.Int64Dtype(), 'Nucleus': pd.Int64Dtype(), 'Centriole1': pd.Int64Dtype(), 'Centriole2': pd.Int64Dtype(), 'Cilia': pd.Int64Dtype()}
c2c_df = c2c_df.astype(c2c_type_dict)

c2c_df

## Should we drop incomplete matching (no cilia, or no cent2) ????


ValueError: 1 columns passed, passed data had 194 columns

In [None]:
c2c_df["Cent1_Cil"] = c2c_df.apply(lambda x : math.dist(
    [centriole_loc_dict[x["ImageNumber"]][x["Centriole1"]]["Location_Center_X"], centriole_loc_dict[x["ImageNumber"]][x["Centriole1"]]["Location_Center_Y"]], 
    [cilia_loc_dict[x["ImageNumber"]][x["Cilia"]]["Location_Center_X"], cilia_loc_dict[x["ImageNumber"]][x["Cilia"]]["Location_Center_Y"]]
    ) if pd.notna(x["Centriole1"]) and pd.notna(x["Cilia"]) else np.NaN, axis=1)
c2c_df["Cent2_Cil"] = c2c_df.apply(lambda x : math.dist(
    [centriole_loc_dict[x["ImageNumber"]][x["Centriole2"]]["Location_Center_X"], centriole_loc_dict[x["ImageNumber"]][x["Centriole2"]]["Location_Center_Y"]], 
    [cilia_loc_dict[x["ImageNumber"]][x["Cilia"]]["Location_Center_X"], cilia_loc_dict[x["ImageNumber"]][x["Cilia"]]["Location_Center_Y"]]
    ) if pd.notna(x["Centriole2"]) and pd.notna(x["Cilia"]) else np.NaN, axis=1)
c2c_df["Cent1_Cent2"] = c2c_df.apply(lambda x : math.dist(
    [centriole_loc_dict[x["ImageNumber"]][x["Centriole1"]]["Location_Center_X"], centriole_loc_dict[x["ImageNumber"]][x["Centriole1"]]["Location_Center_Y"]], 
    [centriole_loc_dict[x["ImageNumber"]][x["Centriole2"]]["Location_Center_X"], centriole_loc_dict[x["ImageNumber"]][x["Centriole2"]]["Location_Center_Y"]]
    ) if pd.notna(x["Centriole1"]) and pd.notna(x["Centriole2"]) else np.NaN, axis=1)
c2c_df

Unnamed: 0,ImageNumber,Nucleus,Centriole1,Centriole2,Cilia,Nuc_Cent1,Nuc_Cent2,Nuc_Cil,Cent1_Cil,Cent2_Cil,Cent1_Cent2
0,1,11,9,,20,3.759711,,17.476649,20.905221,,
1,1,17,12,,27,7.812985,,33.773248,40.159522,,
2,1,22,15,18,39,6.353778,10.841737,95.237758,101.577928,86.661025,16.252835
3,1,30,22,,58,11.429326,,32.988965,41.304164,,
4,1,43,30,,72,4.197301,,29.567108,25.391808,,
...,...,...,...,...,...,...,...,...,...,...,...
6742,25,322,,,223,,,46.681912,,,
6743,25,302,,,224,,,213.363249,,,
6744,25,326,,,225,,,42.911489,,,
6745,25,332,,,226,,,36.719345,,,


In [None]:
features_df = c2c_df

In [None]:
features_df = features_df.merge(right=nucleus_df.drop(columns=["Location_Center_X", "Location_Center_Y"]).add_prefix("Nucleus_"), how='left', left_on=['ImageNumber', 'Nucleus'], right_on=['Nucleus_ImageNumber', 'Nucleus_Nucleus'])
features_df.drop(columns=['Nucleus_ImageNumber', 'Nucleus_Nucleus'], inplace=True)
features_df

Unnamed: 0,ImageNumber,Nucleus,Centriole1,Centriole2,Cilia,Nuc_Cent1,Nuc_Cent2,Nuc_Cil,Cent1_Cil,Cent2_Cil,...,Nucleus_AreaShape_MaxFeretDiameter,Nucleus_AreaShape_MaximumRadius,Nucleus_AreaShape_MeanRadius,Nucleus_AreaShape_MedianRadius,Nucleus_AreaShape_MinFeretDiameter,Nucleus_AreaShape_MinorAxisLength,Nucleus_AreaShape_Orientation,Nucleus_AreaShape_Perimeter,Nucleus_AreaShape_Solidity,Nucleus_Location_Center_Z
0,1,11,9,,20,3.759711,,17.476649,20.905221,,...,57.384667,18.973666,7.437027,6.708204,37.565942,37.703203,-1.232934,158.124892,0.986175,0.0
1,1,17,12,,27,7.812985,,33.773248,40.159522,,...,49.396356,15.132746,6.312313,5.830952,31.000000,32.636786,1.509435,139.254834,0.972871,0.0
2,1,22,15,18,39,6.353778,10.841737,95.237758,101.577928,86.661025,...,34.014703,12.000000,4.763507,4.123106,23.264749,24.756173,-1.434177,98.526912,0.958512,0.0
3,1,30,22,,58,11.429326,,32.988965,41.304164,,...,65.000000,18.384776,7.000375,6.324555,42.000000,39.295307,0.143990,191.237590,0.920168,0.0
4,1,43,30,,72,4.197301,,29.567108,25.391808,,...,33.286634,13.152946,4.938908,4.242641,27.649061,28.098656,-1.138962,100.426407,0.954733,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6742,25,322,,,223,,,46.681912,,,...,45.880279,14.317821,5.859323,5.242092,28.846154,29.817702,-1.200372,128.468037,0.969314,0.0
6743,25,302,,,224,,,213.363249,,,...,41.340053,13.892444,5.561427,5.000000,27.727243,28.081400,0.517340,115.154329,0.974495,0.0
6744,25,326,,,225,,,42.911489,,,...,53.935146,13.038405,5.585982,5.000000,25.975040,26.539428,1.372512,136.811183,0.970364,0.0
6745,25,332,,,226,,,36.719345,,,...,35.014283,9.848858,3.835752,3.605551,19.435949,20.429599,1.382711,93.840620,0.932961,0.0


In [None]:
features_df = features_df.merge(right=centriole_df.drop(columns=["Location_Center_X", "Location_Center_Y"]).add_prefix("Centriole1_"), how='left', left_on=['ImageNumber', 'Centriole1'], right_on=['Centriole1_ImageNumber', 'Centriole1_Centriole'])
features_df.drop(columns=['Centriole1_ImageNumber', 'Centriole1_Centriole'], inplace=True)
features_df

Unnamed: 0,ImageNumber,Nucleus,Centriole1,Centriole2,Cilia,Nuc_Cent1,Nuc_Cent2,Nuc_Cil,Cent1_Cil,Cent2_Cil,...,Centriole1_AreaShape_MaxFeretDiameter,Centriole1_AreaShape_MaximumRadius,Centriole1_AreaShape_MeanRadius,Centriole1_AreaShape_MedianRadius,Centriole1_AreaShape_MinFeretDiameter,Centriole1_AreaShape_MinorAxisLength,Centriole1_AreaShape_Orientation,Centriole1_AreaShape_Perimeter,Centriole1_AreaShape_Solidity,Centriole1_Location_Center_Z
0,1,11,9,,20,3.759711,,17.476649,20.905221,,...,5.099020,1.000000,1.000000,1.0,2.000000,2.936186,-1.369636,7.207107,0.833333,0.0
1,1,17,12,,27,7.812985,,33.773248,40.159522,,...,7.071068,1.414214,1.018828,1.0,4.608177,5.726535,-1.567955,13.621320,0.628571,0.0
2,1,22,15,18,39,6.353778,10.841737,95.237758,101.577928,86.661025,...,7.071068,2.000000,1.093443,1.0,4.919350,5.337563,1.330679,14.242641,0.666667,0.0
3,1,30,22,,58,11.429326,,32.988965,41.304164,,...,6.708204,1.000000,1.000000,1.0,4.608177,5.329474,1.097910,10.207107,0.620690,0.0
4,1,43,30,,72,4.197301,,29.567108,25.391808,,...,6.082763,1.000000,1.000000,1.0,4.000000,5.067557,-1.361322,11.207107,0.615385,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6742,25,322,,,223,,,46.681912,,,...,,,,,,,,,,
6743,25,302,,,224,,,213.363249,,,...,,,,,,,,,,
6744,25,326,,,225,,,42.911489,,,...,,,,,,,,,,
6745,25,332,,,226,,,36.719345,,,...,,,,,,,,,,


In [None]:
features_df = features_df.merge(right=centriole_df.drop(columns=["Location_Center_X", "Location_Center_Y"]).add_prefix("Centriole2_"), how='left', left_on=['ImageNumber', 'Centriole2'], right_on=['Centriole2_ImageNumber', 'Centriole2_Centriole'])
features_df.drop(columns=['Centriole2_ImageNumber', 'Centriole2_Centriole'], inplace=True)
features_df

Unnamed: 0,ImageNumber,Nucleus,Centriole1,Centriole2,Cilia,Nuc_Cent1,Nuc_Cent2,Nuc_Cil,Cent1_Cil,Cent2_Cil,...,Centriole2_AreaShape_MaxFeretDiameter,Centriole2_AreaShape_MaximumRadius,Centriole2_AreaShape_MeanRadius,Centriole2_AreaShape_MedianRadius,Centriole2_AreaShape_MinFeretDiameter,Centriole2_AreaShape_MinorAxisLength,Centriole2_AreaShape_Orientation,Centriole2_AreaShape_Perimeter,Centriole2_AreaShape_Solidity,Centriole2_Location_Center_Z
0,1,11,9,,20,3.759711,,17.476649,20.905221,,...,,,,,,,,,,
1,1,17,12,,27,7.812985,,33.773248,40.159522,,...,,,,,,,,,,
2,1,22,15,18,39,6.353778,10.841737,95.237758,101.577928,86.661025,...,8.602325,2.236068,1.259718,1.0,6.379052,6.59097,-1.10689,23.449747,0.735849,0.0
3,1,30,22,,58,11.429326,,32.988965,41.304164,,...,,,,,,,,,,
4,1,43,30,,72,4.197301,,29.567108,25.391808,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6742,25,322,,,223,,,46.681912,,,...,,,,,,,,,,
6743,25,302,,,224,,,213.363249,,,...,,,,,,,,,,
6744,25,326,,,225,,,42.911489,,,...,,,,,,,,,,
6745,25,332,,,226,,,36.719345,,,...,,,,,,,,,,


In [None]:
features_df = features_df.merge(right=cilia_df.drop(columns=["Location_Center_X", "Location_Center_Y"]).add_prefix("Cilia_"), how='left', left_on=['ImageNumber', 'Cilia'], right_on=['Cilia_ImageNumber', 'Cilia_Cilia'])
features_df.drop(columns=['Cilia_ImageNumber', 'Cilia_Cilia'], inplace=True)
features_df

Unnamed: 0,ImageNumber,Nucleus,Centriole1,Centriole2,Cilia,Nuc_Cent1,Nuc_Cent2,Nuc_Cil,Cent1_Cil,Cent2_Cil,...,Cilia_AreaShape_MaxFeretDiameter,Cilia_AreaShape_MaximumRadius,Cilia_AreaShape_MeanRadius,Cilia_AreaShape_MedianRadius,Cilia_AreaShape_MinFeretDiameter,Cilia_AreaShape_MinorAxisLength,Cilia_AreaShape_Orientation,Cilia_AreaShape_Perimeter,Cilia_AreaShape_Solidity,Cilia_Location_Center_Z
0,1,11,9,,20,3.759711,,17.476649,20.905221,,...,6.082763,2.828427,1.399476,1.0,4.000000,5.028315,1.570796,16.485281,1.000000,0.0
1,1,17,12,,27,7.812985,,33.773248,40.159522,,...,2.236068,1.000000,1.000000,1.0,1.000000,2.000000,1.570796,6.000000,1.000000,0.0
2,1,22,15,18,39,6.353778,10.841737,95.237758,101.577928,86.661025,...,8.602325,2.236068,1.289387,1.0,4.242641,4.471413,-0.897869,22.727922,0.868421,0.0
3,1,30,22,,58,11.429326,,32.988965,41.304164,,...,9.433981,1.414214,1.043601,1.0,2.200000,2.380541,0.570593,17.863961,0.791667,0.0
4,1,43,30,,72,4.197301,,29.567108,25.391808,,...,3.000000,1.000000,1.000000,1.0,1.000000,1.885618,1.570796,6.414214,1.000000,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6742,25,322,,,223,,,46.681912,,,...,0.000000,1.000000,1.000000,1.0,0.000000,0.000000,0.785398,0.000000,1.000000,0.0
6743,25,302,,,224,,,213.363249,,,...,20.396078,2.236068,1.316908,1.0,4.638124,4.378861,-1.456749,44.727922,0.772727,0.0
6744,25,326,,,225,,,42.911489,,,...,11.704700,2.000000,1.176777,1.0,3.545274,3.647015,-1.162154,25.278175,0.727273,0.0
6745,25,332,,,226,,,36.719345,,,...,15.652476,2.236068,1.287819,1.0,3.772969,3.960346,1.094184,33.763456,0.879310,0.0


In [None]:
features_df.sort_values(by=["ImageNumber", "Cilia"], inplace=True, ignore_index=True)
features_df

Unnamed: 0,ImageNumber,Nucleus,Centriole1,Centriole2,Cilia,Nuc_Cent1,Nuc_Cent2,Nuc_Cil,Cent1_Cil,Cent2_Cil,...,Cilia_AreaShape_MaxFeretDiameter,Cilia_AreaShape_MaximumRadius,Cilia_AreaShape_MeanRadius,Cilia_AreaShape_MedianRadius,Cilia_AreaShape_MinFeretDiameter,Cilia_AreaShape_MinorAxisLength,Cilia_AreaShape_Orientation,Cilia_AreaShape_Perimeter,Cilia_AreaShape_Solidity,Cilia_Location_Center_Z
0,1,345,,,1,,,inf,,,...,3.605551,2.000000,1.128565,1.0,2.000000,3.067356,-1.307763,9.414214,1.000000,0.0
1,1,9,,,2,,,45.523807,,,...,0.000000,1.000000,1.000000,1.0,0.000000,0.000000,0.785398,0.000000,1.000000,0.0
2,1,6,,,3,,,39.185023,,,...,4.242641,1.000000,1.000000,1.0,1.788854,2.331612,-0.717197,7.414214,0.900000,0.0
3,1,10,,,9,,,29.764514,,,...,4.472136,1.414214,1.075312,1.0,2.000000,2.565289,-0.424765,10.242641,1.000000,0.0
4,1,4,,,12,,,32.062528,,,...,2.236068,1.000000,1.000000,1.0,1.788854,2.666667,0.463648,6.414214,0.857143,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6742,25,186,183,191,,4.523332,9.044593,,,,...,,,,,,,,,,
6743,25,202,213,,,4.246551,,,,,...,,,,,,,,,,
6744,25,203,224,,,10.578341,,,,,...,,,,,,,,,,
6745,25,231,242,,,7.338615,,,,,...,,,,,,,,,,
