In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
import re
import utils
import functools

%load_ext autoreload
%autoreload 2

In [None]:
# defined for each pipeline to simplify data parsing
def read_data(directory, regex=None, dfc=True, bins=4, debug_regex=False):
    directory = Path(directory)
    # some variables reused below
    extra_columns = ['ImageNumber', 'Parent_DilatedGC']
    
    extras = {
        'extra_columns': extra_columns,
        'reduce': True,
        'merge_fcn': functools.partial(utils.merge_reduced, merge_on=extra_columns)
    }
    
    extras_left = {
        'extra_columns': extra_columns,
        'reduce': True,
        'merge_fcn': functools.partial(utils.merge_reduced, merge_on=extra_columns, how='left')
    }

    # Parse information from filename using the provided regex
    result, _ = utils.analyze(directory / 'Image.csv', 
                              parsers=[
                                  utils.ImageParser(regex, debug_regex=debug_regex),                              
                              ])
    # Combine with DilatedGC for using to merge with other measures
    result, _ = utils.analyze(directory / 'DilatedGC.csv',
                              previous_result=result,
                              parsers=[utils.BlankParser(['ObjectNumber'])],
                              extra_columns=['ImageNumber', ],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber'], how='left'),
                             )
    result = result.rename(columns={'ObjectNumber': 'Parent_DilatedGC'})

    # Measure features from GC objects
    result, extra = utils.analyze(directory / 'InitialGC.csv',
                              previous_result=result,
                              parsers=[
                                  utils.CountingParser(),
                                  utils.ShapeParser(),
                                  utils.RDFParser(id_vars=['ImageNumber', 'ObjectNumber', 'Parent_DilatedGC']),
                              ],
                              region='GC',
                              **extras
                             )
    
    # Measure features from FC objects
    result, _ = utils.analyze(directory / 'InitialFC.csv',
                              previous_result=result,
                              parsers=[
                                  utils.CountingParser(),
                                  utils.ShapeParser(),
                              ],
                              region='FC',
                              **extras_left
                             )
    
    return result, extra

# multiple dataframes can be combined with unique experiments and subsets of overlapping variables (e.g. time or treatment)
full_data, extra_data = read_data('/scratch/gpfs/tcomi/cp_paper_redo/rdf/testing/outputs', 
                      r'/[A-G]\d+_(?P<treatment>SCR|RPL5KD)_15p(?P<time>\d+)c.*nd2', bins=4)
rdf, = extra_data
full_data

In [None]:
# need to average GCs from each parent
rdf_avg = []
groups = ['ImageNumber', 'Parent_DilatedGC', 'channel', 'radius']
for name, dat in rdf.groupby(groups):
    rdf_avg.append(dict(
        zip(groups, name),
        intensity=((dat['intensity'] * dat['counts']).fillna(0).sum()) / dat['counts'].sum(),
        counts=dat['counts'].sum(),
    ))
rdf_avg = pd.DataFrame(rdf_avg)
rdf_avg

In [None]:
# get cell information
merged = rdf_avg.merge(full_data[['ImageNumber', 'Parent_DilatedGC', 'time', 'treatment']], 
                   on=['ImageNumber', 'Parent_DilatedGC'])
# average raw values based on target and ssu
groups = ['time', 'treatment', 'channel', 'radius']
channels = ['', 'EU', 'DFC', 'FC', 'GC']
rdf_data = []
for name, dat in merged.groupby(groups):
    rdf_data.append(dict(
        zip(groups, name),
        intensity=((dat['intensity'] * dat['counts']).fillna(0).sum()) / dat['counts'].sum(),
        channel=channels[name[2]]
    ))
rdf_data = pd.DataFrame(rdf_data)  
rdf_data['time'] = rdf_data.time.astype(int)

In [None]:
sns.relplot(data=rdf_data, x='radius', y='intensity', col='channel', 
            kind='line', style='treatment', hue='time', facet_kws=dict(sharex=True, sharey=False))

In [None]:
normalized = rdf_data.copy()
normalized['normalized_intensity'] = normalized.groupby(['channel', 'treatment', 'time']).intensity.transform(lambda x: (x - x.min()) / (x.max() - x.min()), )
sns.relplot(data=normalized, x='radius', y='normalized_intensity', col='channel', 
            kind='line', style='treatment', hue='time', facet_kws=dict(sharex=True, sharey=False))

In [None]:
sns.relplot(data=normalized, x='radius', y='normalized_intensity', col='time', col_wrap=3,
            kind='line', style='treatment', hue='channel', facet_kws=dict(sharex=True, sharey=False))