# Making Stacked Raw Data
In this notebook we are going to transform the data so that we don't have features for each timepoint, but instead concatenate them and add a column for the hour from which they were extracted. This can be used for visualisation and for the investigation of dynamics over time. First we load the data:

In [1]:
import pandas as pd
import organoid_prediction_python as opp
import numpy as np

home_directory = r"C:\Users\savill\OneDrive\Documents\PhD Jesse\Embryonic_organoid_prediction\Processing dataset for Paper\TLS_2D_morphostate_investigation/"
table_location = home_directory + "tabular_data"

raw_features = pd.read_csv(f"{table_location}/All_Samples_Feature_Space_All_Features_48h,72h,96h.csv").set_index(["Run","Plate","ID"])
raw_features

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 0,048h_raw_BF_AreaShape_area,048h_raw_BF_AreaShape_aspect_ratio,048h_raw_BF_AreaShape_axis_major_length,048h_raw_BF_AreaShape_axis_minor_length,048h_raw_BF_AreaShape_eccentricity,048h_raw_BF_AreaShape_feret_diameter_max,048h_raw_BF_AreaShape_perimeter,048h_raw_BF_AreaShape_solidity,048h_str_BF_AreaShape_area,...,096h_str_BF_AreaShape_moments_hu_1,096h_str_BF_AreaShape_moments_hu_2,096h_str_BF_AreaShape_moments_hu_3,096h_str_BF_AreaShape_moments_hu_4,096h_str_BF_AreaShape_moments_hu_5,096h_str_BF_AreaShape_moments_hu_6,096h_str_BF_AreaShape_orientation,096h_str_BF_AreaShape_perimeter,096h_str_CH_AreaShape_Bra_MajorAxis_Polarisation,096h_str_CH_AreaShape_Bra_MinorAxis_Polarisation
Run,Plate,ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
RR,1,A1,0,51429.057934,1.036110,260.694837,251.609209,0.261703,266.724612,848.528995,0.985088,51519.761740,...,0.000012,0.000019,4.824832e-09,-1.412213e-15,-1.643041e-11,-3.074749e-16,0.052614,1338.362215,0.027743,0.018767
RR,1,A2,1,46107.767989,1.018480,244.753205,240.312256,0.189631,252.890770,807.094108,0.984030,46032.181484,...,0.000523,0.000235,1.227808e-06,2.059185e-12,8.695410e-10,2.075727e-11,-0.311135,1265.903481,0.013417,0.031259
RR,1,A3,2,54482.752732,1.070369,272.945953,255.001802,0.356599,280.334245,885.017919,0.983893,54898.478509,...,0.000140,0.000034,6.502767e-08,-8.891570e-14,-7.558533e-10,-3.658872e-14,-0.151310,1293.544459,0.041057,0.014838
RR,1,A4,3,46999.688747,1.054378,251.337713,238.375438,0.316996,256.319703,817.033443,0.985420,46886.308989,...,0.002951,0.000093,5.961453e-06,1.366487e-10,3.202327e-07,3.202046e-11,0.086875,1354.224446,0.053157,0.007887
RR,1,A5,4,50680.751535,1.067224,262.580886,246.040977,0.349302,267.361474,844.917216,0.985450,50348.170914,...,0.003706,0.000031,7.229710e-07,-1.487676e-12,-1.642923e-08,3.067576e-12,-0.076661,1188.613239,0.038555,0.006555
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TR,2,H8,762,47634.615388,1.074280,255.443622,237.781139,0.365387,260.429973,822.255724,0.984534,46878.750339,...,0.001501,0.000359,4.130417e-06,7.295109e-11,4.233366e-08,1.412304e-10,-0.125910,1381.212217,0.010821,0.026928
TR,2,H9,763,45616.455707,1.141554,257.800200,225.832628,0.482314,262.626514,798.260048,0.984502,44943.735814,...,0.002145,0.000292,4.471516e-06,-6.053277e-11,-1.001351e-07,1.499027e-10,0.088982,1479.048220,0.035616,0.026081
TR,2,H10,764,42336.001394,1.017190,234.372899,230.412135,0.183066,242.141388,769.823637,0.985571,42358.677346,...,0.000897,0.000324,1.865052e-06,-2.186069e-11,-2.439511e-08,4.027224e-11,0.042767,1474.998259,0.009338,0.004076
TR,2,H11,765,40484.132024,1.034860,231.114030,223.328768,0.257365,237.301383,758.159345,0.983294,39803.853480,...,0.006455,0.000210,9.167153e-06,-3.484705e-10,-6.793602e-07,2.008960e-10,-0.080494,1449.277625,0.012343,0.042325


### Adding 48h Intensity information for comparison (not included in main analysis due to the extremely low signal)

In [2]:
def rename_columns(df,hour,raw_str_prefix,channel_prefix,cellprof_remove = "__72h_brachyury_original"):
    new_keys = [key.replace(cellprof_remove,"") for key in df.keys()]
    mapping = {key:f"{hour}_{raw_str_prefix}_{channel_prefix}_{new_key}" for key,new_key in zip(df.keys(),new_keys)}
    return df.rename(columns = mapping)

data_bf_mask_48 = opp.reform_cellprofiler_table(
    pd.read_csv(f"{table_location}/Cellprofiler_Data/20230609_BRA_intensity_measures_BF_Object_48h.csv"),spit_out=True
).set_index(["Run","Plate","ID"]).drop(["ObjectNumber"],axis=1)

renamed_bf_mask_48 = rename_columns(data_bf_mask_48,"048h","raw","CH__bf_mask")

df_polarisations = [
    pd.read_csv(
        f"{table_location}/{hour}h/straight_morgana_brachyury_polarisation_{hour}h.csv"
    ).set_index(["Run","Plate","ID"]).rename(columns={
        "first_axis_polarisation":f"0{hour}h_str_CH_AreaShape_Bra_MajorAxis_Polarisation",
        "second_axis_polarisation":f"0{hour}h_str_CH_AreaShape_Bra_MinorAxis_Polarisation",
    }) for hour in [48,72,96]
]
df_polarisations[0]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,048h_str_CH_AreaShape_Bra_MajorAxis_Polarisation,048h_str_CH_AreaShape_Bra_MinorAxis_Polarisation
Run,Plate,ID,Unnamed: 3_level_1,Unnamed: 4_level_1
RR,1,A1,0.033602,0.009469
RR,1,A2,0.006087,0.002021
RR,1,A3,0.043632,0.003480
RR,1,A4,0.003000,0.005707
RR,1,A5,0.026882,0.017541
...,...,...,...,...
TR,2,H8,0.005869,0.005420
TR,2,H9,0.011629,0.020696
TR,2,H10,0.007411,0.010388
TR,2,H11,0.002839,0.002295


In [3]:
all_props = pd.concat(
    [raw_features,renamed_bf_mask_48,df_polarisations[0]],
    axis=1
).dropna(thresh=2)
all_props

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 0,048h_raw_BF_AreaShape_area,048h_raw_BF_AreaShape_aspect_ratio,048h_raw_BF_AreaShape_axis_major_length,048h_raw_BF_AreaShape_axis_minor_length,048h_raw_BF_AreaShape_eccentricity,048h_raw_BF_AreaShape_feret_diameter_max,048h_raw_BF_AreaShape_perimeter,048h_raw_BF_AreaShape_solidity,048h_str_BF_AreaShape_area,...,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_2of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_3of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_4of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_5of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_6of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_7of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_8of9,048h_raw_CH__bf_mask_RadialDistribution_RadialCV_9of9,048h_str_CH_AreaShape_Bra_MajorAxis_Polarisation,048h_str_CH_AreaShape_Bra_MinorAxis_Polarisation
Run,Plate,ID,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
RR,1,A1,0.0,51429.057934,1.036110,260.694837,251.609209,0.261703,266.724612,848.528995,0.985088,51519.761740,...,0.004932,0.006158,0.005967,0.004276,0.003854,0.006812,0.005774,0.007652,0.033602,0.009469
RR,1,A2,1.0,46107.767989,1.018480,244.753205,240.312256,0.189631,252.890770,807.094108,0.984030,46032.181484,...,0.004867,0.005225,0.006738,0.004694,0.003055,0.009238,0.007633,0.008265,0.006087,0.002021
RR,1,A3,2.0,54482.752732,1.070369,272.945953,255.001802,0.356599,280.334245,885.017919,0.983893,54898.478509,...,0.008475,0.008893,0.013924,0.005426,0.004544,0.006895,0.010967,0.012468,0.043632,0.003480
RR,1,A4,3.0,46999.688747,1.054378,251.337713,238.375438,0.316996,256.319703,817.033443,0.985420,46886.308989,...,0.006409,0.005854,0.008474,0.004658,0.004575,0.007493,0.007832,0.006192,0.003000,0.005707
RR,1,A5,4.0,50680.751535,1.067224,262.580886,246.040977,0.349302,267.361474,844.917216,0.985450,50348.170914,...,0.011102,0.008453,0.004818,0.007611,0.002931,0.004113,0.009191,0.005434,0.026882,0.017541
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
TR,2,H8,762.0,47634.615388,1.074280,255.443622,237.781139,0.365387,260.429973,822.255724,0.984534,46878.750339,...,0.006284,0.004163,0.008859,0.003569,0.004913,0.005814,0.004934,0.006323,0.005869,0.005420
TR,2,H9,763.0,45616.455707,1.141554,257.800200,225.832628,0.482314,262.626514,798.260048,0.984502,44943.735814,...,0.010501,0.005135,0.009122,0.009625,0.008506,0.005900,0.005898,0.014392,0.011629,0.020696
TR,2,H10,764.0,42336.001394,1.017190,234.372899,230.412135,0.183066,242.141388,769.823637,0.985571,42358.677346,...,0.008601,0.011960,0.005931,0.012458,0.005735,0.008507,0.006295,0.010104,0.007411,0.010388
TR,2,H11,765.0,40484.132024,1.034860,231.114030,223.328768,0.257365,237.301383,758.159345,0.983294,39803.853480,...,0.013089,0.005111,0.009048,0.003876,0.005534,0.007856,0.013335,0.011711,0.002839,0.002295


Using a function implemented in organoid_prediction_python to doi the stacking and saving the result afterwards

In [4]:
stacked_raw_data = opp.stack_time_data(
    all_props.reset_index()
)
stacked_raw_data.to_csv(table_location+"/"+"raw_data_stacked_timepoints.csv")

In [5]:
stacked_raw_data.keys().to_numpy()

array(['raw_BF_AreaShape_area', 'raw_BF_AreaShape_aspect_ratio',
       'raw_BF_AreaShape_axis_major_length',
       'raw_BF_AreaShape_axis_minor_length',
       'raw_BF_AreaShape_eccentricity',
       'raw_BF_AreaShape_feret_diameter_max',
       'raw_BF_AreaShape_perimeter', 'raw_BF_AreaShape_solidity',
       'str_BF_AreaShape_area', 'str_BF_AreaShape_aspect_ratio',
       'str_BF_AreaShape_eccentricity',
       'str_BF_AreaShape_equivalent_diameter', 'str_BF_AreaShape_extent',
       'str_BF_AreaShape_form_factor',
       'str_BF_AreaShape_inertia_tensor_0_0',
       'str_BF_AreaShape_inertia_tensor_0_1',
       'str_BF_AreaShape_inertia_tensor_1_0',
       'str_BF_AreaShape_inertia_tensor_1_1',
       'str_BF_AreaShape_inertia_tensor_eigvals_0',
       'str_BF_AreaShape_inertia_tensor_eigvals_1',
       'str_BF_AreaShape_locoefa_PC_1', 'str_BF_AreaShape_locoefa_PC_2',
       'str_BF_AreaShape_locoefa_PC_3', 'str_BF_AreaShape_locoefa_PC_4',
       'str_BF_AreaShape_locoefa_PC_5', '