In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
import re
import utils
import functools

%load_ext autoreload
%autoreload 2

In [None]:
def read_data(directory, regex=None, debug_regex=False):
    directory = Path(directory)
    # some variables reused below
    extra_columns = ['ImageNumber', 'Parent_Cell']
    
    extras = {
        'extra_columns': extra_columns,
        'merge_fcn': functools.partial(utils.merge_result, merge_on=extra_columns)
    }
    
    # Parse information from filename using the provided regex
    result, _ = utils.analyze(directory / 'Image.csv', 
                              parsers=[
                                  utils.ImageParser(regex, debug_regex=debug_regex),                              
                              ])
    # Combine with DilatedGC for using to merge with other measures
    result, _ = utils.analyze(directory / 'Cell.csv',
                              previous_result=result,
                              parsers=[
                                  utils.BlankParser(['ObjectNumber']),
                              ],
                              extra_columns=['ImageNumber', ],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber'], how='left'),
                             )
    result, _ = utils.analyze(directory / 'Cell.csv',
                              previous_result=result,
                              parsers=[
                                      utils.IntensityParser(images=['plasmid_star', 'endogenous_28S']),
                              ],
                              region='Cell',
                              extra_columns=['ImageNumber', 'ObjectNumber'],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber', 'ObjectNumber']),
                             )
    result = result.rename(columns={'ObjectNumber': 'Parent_Cell'})
    
    # Measure features from objects
    for region in ('Nuclei', 'Cytoplasm'):
        result, _ = utils.analyze(directory / f'{region}.csv',
                                  previous_result=result,
                                  parsers=[
                                      utils.IntensityParser(images=['plasmid_star', 'endogenous_28S']),
                                  ],
                                  region=region,
                                  **extras
                                 )
    return result

data = read_data('/scratch/gpfs/tcomi/cp_paper_redo/exogenous_cytoplasm/testing/outputs', 
                 r'/[A-G]\d+_(?P<treatment>[^_]+).*nd2$', 
                 )
# data.loc[data.isna().any(axis=1), 'Metadata_FileLocation'].unique()
data

In [None]:
sns.relplot(data=data, row='treatment', x='Cytoplasm_MeanIntensity_plasmid_star', y='Cell_MeanIntensity_plasmid_star')

In [None]:
sns.displot(data=data, hue='treatment', x='Cytoplasm_MeanIntensity_plasmid_star', kind='ecdf')