In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import sklearn
import sklearn.preprocessing
import sklearn.decomposition
import scipy

from pathlib import Path
import re

In [None]:
# helper function to add total intensity by multiplying the area with the intensity
def add_total_intens(df, channels, name):
    if isinstance(channels, str):
        channels = [channels]
    for channel in channels:
        df[f'total_intens_{channel}_{name}'] = df[f'Intensity_MeanIntensity_{channel}'] * df['AreaShape_Area']
    return df

# helper function to merge incoming dfs with the results by image number and cell number.  Can change reduction to be count, sum, etc.
# map cols renames the columns from the input to output DFs
def merge_reduced(result_df, df, map_cols, reduction, how='inner'):
    return result_df.merge(
        df.groupby(['ImageNumber', 'Parent_DilatedGC'])[list(map_cols.keys())].aggregate(reduction).reset_index().rename(columns=map_cols),
        on=['ImageNumber', 'Parent_DilatedGC'],
        how=how,
    )

def build_initial_data(directory, common, regex):
    gc = pd.read_csv(
        directory / 'InitialGC.csv', 
        usecols=common,
    )
    
    # read image metadata, mainly file location
    images = pd.read_csv(directory / 'Image.csv', usecols=['Metadata_FileLocation', 'ImageNumber', 'Metadata_Series'])
    
    # add in regex of filename (time, treatment, etc)
    if regex:
        images = images.join(images['Metadata_FileLocation'].str.extract(regex))
    
    # start building the final result, with image number and merged_gc number (renamed to cell number)
    result = gc[['ImageNumber', 'Parent_DilatedGC']].drop_duplicates().rename(columns={
        'Parent_DilatedGC': 'CellNumber'
    })
    
    # add image file location and regex info
    result = result.merge(images, on='ImageNumber', how="left")
    
    # gc number, done separately because the cellnumber doesn't match parent_DilatedGC any more
    result = result.merge(
        gc.groupby(['ImageNumber', 'Parent_DilatedGC'])['AreaShape_Area'].count().rename('Count_GC').reset_index(),
        left_on=['ImageNumber', 'CellNumber'], right_on=['ImageNumber', 'Parent_DilatedGC']
    )

    return result

def add_initial_gc(result, directory, common, dfc_intens=False):
    # read GC, plus intensities for GC and Probe and eccentricity
    gc = pd.read_csv(
        directory / 'InitialGC.csv', 
        usecols=common + ['Intensity_MeanIntensity_GC', 'Intensity_MeanIntensity_Probe', 
                          'AreaShape_Eccentricity', 'AreaShape_Perimeter',
                          'Location_CenterMassIntensity_X_GC',  'Location_CenterMassIntensity_Y_GC',
                         ] + (['Intensity_MeanIntensity_DFC',] if dfc_intens else []),
    )
    
    # gc intensity in gc  -> may not be the same, normalize to control in each folder
    gc = add_total_intens(gc, ['GC', 'Probe']+ (['DFC',] if dfc_intens else []), 'GC')
    
    # calculate circularity as 4 pi area / perimeter ** 2
    gc['AreaShape_Cicularity'] = 4 * np.pi * gc['AreaShape_Area'] / gc['AreaShape_Perimeter'] ** 2
    
    # gc shape and size
    map_cols = {
        'AreaShape_Eccentricity': 'mean_GC_eccentricity',
        'AreaShape_Cicularity': 'mean_GC_circularity',
        'Location_CenterMassIntensity_X_GC': 'center_x',
        'Location_CenterMassIntensity_Y_GC': 'center_y',
    }
    result = merge_reduced(result, gc, map_cols, 'mean')
    map_cols = {
        'total_intens_GC_GC': 'total_GC_intens',
        'total_intens_Probe_GC': 'total_Probe_intens_GC',
        'AreaShape_Area': 'GC_area',
    }
    if dfc_intens:
        map_cols['total_intens_DFC_GC'] = 'total_DFC_intens_GC'
    result = merge_reduced(result, gc, map_cols, 'sum')
    result['mean_mean_GC_intens'] = result['total_GC_intens']/ result['GC_area']
    result['mean_mean_Probe_intens'] = result['total_Probe_intens_GC']/ result['GC_area']
    if dfc_intens:
        result['mean_mean_DFC_intens'] = result['total_DFC_intens_GC']/ result['GC_area']

    return result

def add_initial_fc(result, directory, common):
    # read FC intensities, positions (not used yet) and number of children in each region.
    fc = pd.read_csv(
        directory / 'InitialFC.csv', 
        usecols=common + ['Intensity_MeanIntensity_FC', 'Intensity_MeanIntensity_Probe', 
                          'Location_CenterMassIntensity_X_FC', 'Location_CenterMassIntensity_Y_FC',],
    )
    
    # fc intensity in fc
    # size of fc
    fc = add_total_intens(fc, ['FC', 'Probe'], 'FC')

    # add fcs total
    result = merge_reduced(result, fc, {"AreaShape_Area": "Count_FC"}, 'count')
    
    map_cols = {
        'total_intens_FC_FC': 'total_FC_intens',
        'total_intens_FC_FC': 'total_Probe_intens_FC',
        'AreaShape_Area': 'FC_area',
    }
    result = merge_reduced(result, fc, map_cols, 'sum')
    result['FC_density'] = result['Count_FC']/ result['GC_area']

    # add fcs outside of GC
    fc = pd.read_csv(
        directory / 'ExtraNucleolarFCs.csv', 
        usecols=common,
    )
    result = merge_reduced(result, fc, {"AreaShape_Area": "Count_Nucleoplasmic_FC"}, 'count', how='left')
    result.loc[result['Count_Nucleoplasmic_FC'].isna(), 'Count_Nucleoplasmic_FC'] = 0

    return result


def add_rim(result, directory, common, dfc, bins=1, total=10):
    cols = open(directory / 'InitialGC.csv').readline().split(',')
    cols = [c for c in cols 
            if c.startswith('RadialDistribution_FracAtD')
            # or c.startswith('RadialDistribution_MeanFrac')
           ]
    distributions = pd.read_csv(
        directory / 'InitialGC.csv', 
        usecols=common + (cols if dfc else [c for c in cols if 'DFC' not in c]),
    )

    bins = [i for i in range(total, total-bins, -1)]
    relative_areas = distributions[[f'RadialDistribution_FracAtD_GCObjectImage_{bin}of{total}' for bin in bins]].sum(axis=1)
    distributions['fc_rim_enrichment'] = distributions[[f'RadialDistribution_FracAtD_FC_{bin}of{total}' for bin in bins]].sum(axis=1) / relative_areas
    if dfc:
        distributions['dfc_rim_enrichment'] = distributions[[f'RadialDistribution_FracAtD_DFC_{bin}of{total}' for bin in bins]].sum(axis=1) / relative_areas

    # print(dists[[f'RadialDistribution_MeanFrac_DFC_{bin}of10' for bin in bins]].sum(axis=1))
    # print(relative_areas)
        
    map_cols = {
        'fc_rim_enrichment': 'fc_rim_enrichment',
        'dfc_rim_enrichment': 'dfc_rim_enrichment',
    } if dfc else {'fc_rim_enrichment': 'fc_rim_enrichment'}
    result = merge_reduced(result, distributions, map_cols, 'mean')

    return result

def add_correlation(result, directory, common):
    # correlations over combined regions
    cols = open(directory / 'CombinedObjects.csv').readline().split(',')
    cols = [c for c in cols 
            if c.startswith('Correlation_Correlation')
            or c.startswith('Correlation_Overlap')
           ]
    corr = pd.read_csv(
        directory / 'CombinedObjects.csv', 
        usecols=common + cols,
    )
    
    # multiply all correlations by area
    corr[cols] *= corr['AreaShape_Area'].to_numpy()[:, None]
    
    # sum and add to result
    map_cols = {c: c[12:] for c in cols}
    # map_cols = {c: c for c in cols}
    map_cols['AreaShape_Area'] = 'combined_area'
    result = merge_reduced(result, corr, map_cols, 'sum')
    
    # divide by total area for correlation
    result[[c for c in map_cols.values() if c != 'combined_area']] /= result['combined_area'].to_numpy()[:, None]
    return result
    
def read_data(directory, regex=None, dfc=True, bins=4, dfc_intens=False):
    directory = Path(directory)
    # image and object number are uniuqe identifiers.  Area is used a lot and the parent_mergedGC should corresopnd to a single cell
    common = ['ImageNumber', 'ObjectNumber', 'AreaShape_Area', 'Parent_DilatedGC']

    result = build_initial_data(directory, common, regex)
    
    result = add_initial_gc(result, directory, common, dfc_intens)
    
    result = add_initial_fc(result, directory, common)
    
    result = add_rim(result, directory, common, dfc, bins, total=20)
    
    result = add_correlation(result, directory, common)
    
    return result.drop(columns='Parent_DilatedGC')
    # fc position (stringyness?, graph morphology of fcs)

full_data = read_data('morphology/240820_FISH/outputs', r'/.*_10A_(?P<probe>[^_0]+)(?:\d{3}\d?)?.nd2', bins=4)
full_data.loc[full_data.isna().any(axis=1), 'Metadata_FileLocation'].unique()
full_data.probe.unique()

In [None]:
df = full_data[full_data.exp == 'CX_EU'].copy()
df['time'] = df['time'].astype(int)
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['treatment', 'time']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('FibKD', 'ctl'), ('-', '--')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='time', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
# sns.lineplot(data=df, x='time', y='RWC_FC_Probe', label='FC_Probe', style='treatment', legend=False)
# sns.lineplot(data=df, x='time', y='RWC_DFC_Probe', label='DFC_Probe', style='treatment', legend=False)
# sns.lineplot(data=df, x='time', y='RWC_GC_Probe', label='GC_Probe', style='treatment', legend=False)
sns.lineplot(data=df, x='time', y='Correlation_FC_Probe', label='FC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_DFC_Probe', label='DFC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_GC_Probe', label='GC_Probe', style='treatment')
plt.savefig('EU_CX.pdf')

In [None]:
df = full_data[full_data.exp == 'Fib'].copy()
df['time'] = df['time'].astype(int)
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['treatment', 'time']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('FibKD', 'ctl'), ('-', '--')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='time', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
# sns.lineplot(data=df, x='time', y='RWC_FC_Probe', label='FC_Probe', style='treatment', legend=False)
# sns.lineplot(data=df, x='time', y='RWC_DFC_Probe', label='DFC_Probe', style='treatment', legend=False)
# sns.lineplot(data=df, x='time', y='RWC_GC_Probe', label='GC_Probe', style='treatment', legend=False)
sns.lineplot(data=df, x='time', y='Correlation_FC_Probe', label='FC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_DFC_Probe', label='DFC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_GC_Probe', label='GC_Probe', style='treatment')
plt.savefig('EU_fib.pdf')

In [None]:
df = full_data[full_data.exp == 'FVP_EU'].copy()
# drop some data from another experiment...
df = df[~df.time.isna()]
df['time'] = df['time'].astype(int)
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['exp', 'treatment', 'time']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('FVP', 'DMSO'), ('-', 'dotted')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='time', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
sns.lineplot(data=df, x='time', y='Correlation_FC_Probe', label='FC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_DFC_Probe', label='DFC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_GC_Probe', label='GC_Probe', style='treatment')
plt.savefig('EU_fvp.pdf')

In [None]:
df = full_data[full_data.exp == 'FVP_nodfc'].copy()
df['time'] = df['time'].astype(int)
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'DFC' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['exp', 'treatment', 'time']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('FVP2uM', 'DMSO'), ('-', 'dotted')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='time', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
sns.lineplot(data=df, x='time', y='Correlation_FC_Probe', label='FC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_GC_Probe', label='GC_Probe', style='treatment')
plt.savefig('EU_fvp_nodfc.pdf')

In [None]:
df = full_data[full_data.exp == 'FVP_FISH'].copy()
df['time'] = df['time'].astype(int)
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['exp', 'treatment', 'time', 'probe']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('FVP', 'DMSO'), ('-', 'dotted')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='time', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
sns.lineplot(data=df, x='time', y='Correlation_FC_Probe', label='FC_Probe', style='probe')
sns.lineplot(data=df, x='time', y='Correlation_DFC_Probe', label='DFC_Probe', style='probe')
sns.lineplot(data=df, x='time', y='Correlation_GC_Probe', label='GC_Probe', style='probe')

In [None]:
df = full_data[full_data.exp == 'RPL5'].copy()
df['time'] = df['time'].astype(int)
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['treatment', 'time']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('RPL5KD', 'SCR'), ('-', '--')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='time', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
# sns.lineplot(data=df, x='time', y='RWC_FC_Probe', label='FC_Probe', style='treatment', legend=False)
# sns.lineplot(data=df, x='time', y='RWC_DFC_Probe', label='DFC_Probe', style='treatment', legend=False)
# sns.lineplot(data=df, x='time', y='RWC_GC_Probe', label='GC_Probe', style='treatment', legend=False)
sns.lineplot(data=df, x='time', y='Correlation_FC_Probe', label='FC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_DFC_Probe', label='DFC_Probe', style='treatment')
sns.lineplot(data=df, x='time', y='Correlation_GC_Probe', label='GC_Probe', style='treatment')
plt.savefig('EU_rpl5.pdf')

In [None]:
df = full_data[full_data.exp == 'RPL5_FISH'].copy()
df
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
print(df.groupby(['treatment', 'probe']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    for treatment, style in zip(('RPL5KD', 'SCR'), ('-', '--')):
        sns.ecdfplot(data=df[df.treatment == treatment], x=col, hue='probe', linestyle=style, ax=ax, label=treatment)

fig, axes = plt.subplots()
sns.barplot(data=df, x='probe', y='Correlation_FC_Probe', hue='treatment')
plt.xticks(rotation=60)
fig, axes = plt.subplots()
sns.barplot(data=df, x='probe', y='Correlation_DFC_Probe', hue='treatment')
plt.xticks(rotation=60)
fig, axes = plt.subplots()
sns.barplot(data=df, x='probe', y='Correlation_GC_Probe', hue='treatment')
plt.xticks(rotation=60)

In [None]:
df = full_data[full_data.probe == 'U8FISH'].copy()
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          # and not 'Probe' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
df = df[(df.mean_mean_Probe_intens < 0.002) | (df.treatment == 'SCR')]
print(df.groupby(['exp', 'treatment', 'probe']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    sns.ecdfplot(data=df, x=col, hue='treatment', ax=ax)
    
fig, axes = plt.subplots()
sns.ecdfplot(data=df[df.treatment != 'SCR'], x='mean_mean_Probe_intens', hue='treatment', ax=axes)
axes.set_xlim(0, 0.0025)
fig, axes = plt.subplots()
# sns.kdeplot(data=df, x='dfc_rim_enrichment', hue='treatment', ax=axes)
sns.ecdfplot(data=df, x='dfc_rim_enrichment', hue='treatment', ax=axes)

In [None]:
df = full_data[full_data.probe == 'U3FISH'].copy()
toplot = [c for c in df.columns[6:] 
          if not c.startswith('total') 
          and not c.startswith('center') 
          and not 'median' in c 
          # and not 'Probe' in c 
          and not c in ('treatement', 'time', 'probe', 'exp')
         ]
df = df[(df.mean_mean_Probe_intens < 0.005) | (df.treatment == 'SCR')]
print(df.groupby(['exp', 'treatment', 'probe']).CellNumber.count())

n_cols = 4
n_rows = int(np.ceil(len(toplot) / n_cols))

figsize = (5*n_cols+5, 5*n_rows)

fig, axes = plt.subplots(n_rows, n_cols, figsize=figsize)
for col, ax in zip(toplot, axes.flatten()):
    sns.ecdfplot(data=df, x=col, hue='treatment', ax=ax)
    
fig, axes = plt.subplots()
sns.ecdfplot(data=df[df.treatment != 'SCR'], x='mean_mean_Probe_intens', hue='treatment', ax=axes)
fig, axes = plt.subplots()
sns.ecdfplot(data=df, x='fc_rim_enrichment', hue='treatment', ax=axes)

# CP RDF data

In [None]:
def add_rdf(result, directory, common):
    cols = open(directory / 'InitialGC.csv', 'r').readline().split(',')
    nucl = pd.read_csv(
        directory / 'InitialGC.csv',
        usecols=['ImageNumber', 'ObjectNumber', 'Parent_DilatedGC'] + [c.strip() for c in cols if c.startswith('RDF_')],
        )
    # intensity
    nucl = nucl.melt(id_vars=['ImageNumber', 'ObjectNumber', 'Parent_DilatedGC'])
    rdf = nucl[nucl.variable.str.startswith('RDF_Intensity')].reset_index(drop=True)
    extract = rdf.variable.str.extract(r'RDF_Intensity_C(\d)_R([-0-9]+)')
    rdf = rdf.assign(
        channel=extract[0].astype(int),
        radius=extract[1].astype(int),
    ).rename(columns={'value': 'intensity'}).drop(columns='variable')

    counts = nucl[nucl.variable.str.startswith('RDF_Count')].reset_index(drop=True)
    extract = counts.variable.str.extract(r'RDF_Counts_R([-0-9]+)')
    counts = counts.assign(
        radius=extract[0].astype(int)
    ).rename(columns={'value': 'counts'}).drop(columns='variable')

    rdf = rdf.merge(counts, on=['ImageNumber', 'ObjectNumber', 'Parent_DilatedGC', 'radius'])

    return result, rdf

def read_data(directory, regex=None, dfc=True, bins=4, dfc_intens=False):
    directory = Path(directory)
    # image and object number are uniuqe identifiers.  Area is used a lot and the parent_mergedGC should corresopnd to a single cell
    common = ['ImageNumber', 'ObjectNumber', 'AreaShape_Area', 'Parent_DilatedGC']

    result = build_initial_data(directory, common, regex)
    
    result = add_initial_gc(result, directory, common, dfc_intens)
    
    result = add_initial_fc(result, directory, common)
    
    result = add_rim(result, directory, common, dfc, bins, total=20)
    
    result = add_correlation(result, directory, common)

    result, rdf = add_rdf(result, directory, common)
    
    return result.drop(columns='Parent_DilatedGC'), rdf

data, rdf = read_data('morphology_rdf/RPL5/outputs/',  r'/[A-G]\d+_(?P<treatment>SCR|RPL5KD)_15p(?P<time>\d+)c.*nd2')
data

In [None]:
rdf

In [None]:
# need to average GCs from each parent
rdf_avg = []
groups = ['ImageNumber', 'Parent_DilatedGC', 'channel', 'radius']
for name, dat in rdf.groupby(groups):
    rdf_avg.append(dict(
        zip(groups, name),
        intensity=((dat['intensity'] * dat['counts']).fillna(0).sum()) / dat['counts'].sum(),
        counts=dat['counts'].sum(),
    ))
rdf_avg = pd.DataFrame(rdf_avg)
rdf_avg

In [None]:

rdf_avg = pd.DataFrame(rdf_avg)
rdf_avg

In [None]:
# get cell information
merged = rdf_avg.merge(data[['ImageNumber', 'CellNumber', 'time', 'treatment']], 
                   left_on=['ImageNumber', 'Parent_DilatedGC'], 
                   right_on=['ImageNumber', 'CellNumber'])
# average raw values based on target and ssu
groups = ['time', 'treatment', 'channel', 'radius']
channels = ['', 'EU', 'DFC', 'FC', 'GC']
rdf_data = []
for name, dat in merged.groupby(groups):
    rdf_data.append(dict(
        zip(groups, name),
        intensity=((dat['intensity'] * dat['counts']).fillna(0).sum()) / dat['counts'].sum(),
        channel=channels[name[2]]
    ))
rdf_data = pd.DataFrame(rdf_data)  
rdf_data['time'] = rdf_data.time.astype(int)

In [None]:
sns.relplot(data=rdf_data, x='radius', y='intensity', col='channel', 
            kind='line', style='treatment', hue='time', facet_kws=dict(sharex=True, sharey=False))

In [None]:
normalized = rdf_data.copy()
normalized['normalized_intensity'] = normalized.groupby(['channel', 'treatment', 'time']).intensity.transform(lambda x: (x - x.min()) / (x.max() - x.min()), )
sns.relplot(data=normalized, x='radius', y='normalized_intensity', col='channel', 
            kind='line', style='treatment', hue='time', facet_kws=dict(sharex=True, sharey=False))

In [None]:
sns.relplot(data=normalized, x='radius', y='normalized_intensity', col='time', col_wrap=3,
            kind='line', style='treatment', hue='channel', facet_kws=dict(sharex=True, sharey=False))

In [None]:
normalized.to_csv('rpl5_rdf_cp.csv')