In [1]:
import pandas as pd

# Display larger dataframes
pd.set_option('display.max_columns', 500)
pd.options.display.max_colwidth = 100

from IPython.core.display import display, HTML
display(HTML("<style>.container { width:80% !important; }</style>"))



Integrate XY and time coordinates and derive differentiation times

In [2]:
root = "/Volumes/TOB_WD2/Data_Analysis/DataFrames" + "/"

space_df = pd.read_csv(root + "MasterDataFrame_Coordinates.csv")
time_df = pd.read_csv(root + "MasterDataFrame_Times.csv")

In [3]:
def get_LowZoom_ID(x):
    # Identify corresponding LowZoom acquisition ID
    # in preparation for dataframe merging
    date_string, stem = x.split("_")
    ID_subposition = stem.split("--", 1)[1]
    LowZoom_ID = date_string + "_LowZoom--" + ID_subposition.rsplit("--", 1)[0]
    return LowZoom_ID

space_df["LowZoom_ID"] = space_df.Cell_ID.apply(get_LowZoom_ID)
df = space_df.merge(time_df, on = "LowZoom_ID")
df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
df.shape

(24726, 12)

In [4]:
# dictionary containing the differentiation times (minutes) at the onset of imaging
# IMPORTANT - you already considered the dataset-internal global start times in the previous notebook (06)
# so the function needs to look at these only

diff_times = {
    "20190227-182447": 420, # seeded and NI 7h before imaging
    
    "20190618-conc": 420, # seeded and NI 7h before imaging
    
    "20190827-181922": 3180, # started imaging after 53h post NI
   
    "20200724-201611": 540, # seeded and NI 9h before imaging
    "20200725-142832": 1633, # 540 min + 1093 min
    
    "20200728-174144": 300, # seeded and NI 5h before imaging
    
    "20200730-190931": 3268, # 300 min + 24h + 1528 min 
    
    "20200731-175845": 3300, # started imaging after 55h post NI
    "20200802-104739": 3300, # 5749, # 3300 min + 40h + 49min
    "20200803-181830": 3300, # 7640, # 5749 min + 24h + 7h31min
    
    "20200806-164803": 7440, # started imaging after 124h post NI
    
    "20200807-174159": 3180, # started imaging after 53h post NI
    "20200809-114236": 3180, #5701, # 3180 min + 378 min + 1440 min + 702 min + 1 min 
    "20200810-225504": 3180, #7813, # 5701 min + 737 min + 1375 min (=130h and goes on until max 142.5h)
}

def get_Diff_time(x, dictionary):
    # What is the duration of differentiation
    # at the onset of imaging of the dataset?
    diff_time_start = dictionary[x.Experiment]
    diff_time = diff_time_start + x.Experiment_Time_mins
    return diff_time

df["Differentiation_mins"] = df.apply(get_Diff_time, dictionary = diff_times, axis = 1)

# Create binning for differentiation time using pd.cut

Intervals_diff = pd.interval_range(start = 0, freq = 2880, end = 9000) # 48h bins
df['Differentiation_bins'] = pd.cut(df['Differentiation_mins'], bins = Intervals_diff).astype(str)

In [5]:
# Identify experimental setup (condition)
# based on Position integer

def getCondition(x):
    Position = x.Position
    date_string = x.Experiment
    if date_string != "20200806-164803":
        if Position == 1:
            return "1_Pluripotent"
        else:
            return "2_Differentiation"
    else:
        return "2_Differentiation"
    
df["Condition"] = df.apply(getCondition, axis = 1)

In [6]:
# annotate parent dataset (important for 
# interrupted experiments)

def getDataset(x):
    experiment = x.Experiment
    
    if experiment == "20200725-142832":
        return "20200724"
    elif experiment in ["20200802-104739", "20200803-181830"]:
        return "20200731"
    elif experiment in ["20200809-114236", "20200810-225504"]:
        return "20200807"
    else:
        return experiment.split("-")[0]
    
df["Dataset"] = df.apply(getDataset, axis = 1)

20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802-104739
20200802

20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803-181830
20200803

In [7]:
# Make final subselection of relevant columns

subselection = ["Cell_ID", 
                "LowZoom_ID",
                "Dataset",
                "Experiment", 
                "Position",
                "Condition",
                "Has_duplicate", 
                "X_px", 
                "Y_px", 
                "Timepoint", 
                "Time", 
                "Differentiation_mins", 
                "Differentiation_bins",
                "Experiment_Time_mins"
               ]
df = df[subselection]

In [8]:
df.to_csv(root + "MasterDataFrame_SpaceTime.csv")
print("Finished analysis and saved final dataframe: {}".format(root + "MasterDataFrame_SpaceTime.csv"))

Finished analysis and saved final dataframe: /Volumes/TOB_WD2/Data_Analysis/DataFrames/MasterDataFrame_SpaceTime.csv
