In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
import re
import utils
import functools

%load_ext autoreload
%autoreload 2

In [None]:
def read_data(directory, regex=None, debug_regex=False):
    directory = Path(directory)
    # some variables reused below
    extra_columns = ['ImageNumber', 'Parent_DilatedNucleoli']
    
    extras = {
        'extra_columns': extra_columns,
        'reduce': False,
        'merge_fcn': functools.partial(utils.merge_result, merge_on=extra_columns)
    }
    
    # Parse information from filename using the provided regex
    result, _ = utils.analyze(directory / 'Image.csv', 
                              parsers=[
                                  utils.ImageParser(regex, debug_regex=debug_regex),                              
                              ])
    # Combine with DilatedGC for using to merge with other measures
    result, _ = utils.analyze(directory / 'InitialNucleoli.csv',
                              previous_result=result,
                              parsers=[utils.BlankParser(['Parent_DilatedNucleoli'])],
                              extra_columns=['ImageNumber', ],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber'], how='left'),
                             )
    
    # Measure features from GC objects
    result, _ = utils.analyze(directory / 'InitialNucleoli.csv',
                              previous_result=result,
                              parsers=[
                                  utils.ShapeParser(),
                                  utils.IntensityParser(images=['NOP56', 'NPM1', 'starPlasmid', 'PF']),
                              ],
                              region='GC',
                              **extras
                             )
    
    # Measure features from nucleoplasm rim
    result, _ = utils.analyze(directory / 'NucleoplasmRim.csv',
                              previous_result=result,
                              parsers=[
                                  utils.IntensityParser(images=['NOP56', 'NPM1', 'starPlasmid', 'PF']),
                              ],
                              region='Rim',
                              **extras
                             )
    
    # Measure rdf
    result, extra = utils.analyze(directory / 'DilatedNucleoli.csv',
                              previous_result=result,
                              parsers=[
                                  utils.RDFParser(id_vars=['ImageNumber', 'ObjectNumber']),
                              ],
                              merge_fcn=lambda x: x
                             )
    
    return result, extra[0]
    
data, rdf = read_data('/scratch/gpfs/tcomi/cp_paper_redo/rim_rdf/testing/outputs', 
                      regex=r'/[A-G]\d+_+(?P<treatment>[^_]+).*_(?P<target>RPS6|surf6|RRP1|DDX21|nucleolin|EXOSC10|NAT1).*nd2')
data.T

## RDF curves

In [None]:
to_keep = data.copy()
de_novo = rdf.merge(to_keep[['ImageNumber', 'Parent_DilatedNucleoli', 'treatment', 'target']], 
                    left_on=['ImageNumber', 'ObjectNumber'], 
                    right_on=['ImageNumber', 'Parent_DilatedNucleoli'])

In [None]:
# average values based on target and ssu
groups = ['treatment', 'target', 'channel']
channels = ['', 'target', 'NOP56', '18S', 'NPM1']
rdf_data = []
for name, dat in de_novo.groupby(groups):
    pivoted = dat.pivot_table(columns='radius', values=['intensity', 'counts'], index=['ImageNumber', 'ObjectNumber'])
    average_intens = ((pivoted['intensity'] * pivoted['counts']).fillna(0).sum()) / pivoted['counts'].sum()
    mn, mx = pivoted['intensity'].min(), pivoted['intensity'].max()
    normed = (pivoted['intensity'] - mn) / (mx - mn)
    
    sem = np.sqrt((((normed - normed.mean())**2) * pivoted['counts']).sum() / pivoted['counts'].sum()) / np.sqrt(len(normed))
    mn, mx = average_intens.min(), average_intens.max()
    norm_intens = (average_intens - mn) / (mx - mn)
    for radius, vals in pd.concat([norm_intens, sem], axis=1).iterrows():
        rdf_data.append(dict(
            zip(groups, name),
            norm_intensity=vals[0],
            sem=vals[1],
            channel=channels[name[2]],
            radius=radius,
        ))
rdf_data = pd.DataFrame(rdf_data)

In [None]:
g = sns.relplot(data=rdf_data, x='radius', y='norm_intensity', 
            col='channel', row='target', kind='line', style='treatment', 
            facet_kws=dict(sharex=True, sharey=False))

for (target, channel), ax in g.axes_dict.items():
    sub_dat = rdf_data[(rdf_data.channel==channel) & (rdf_data.target == target)]
    ax.fill_between(sub_dat.radius, sub_dat.norm_intensity - sub_dat['sem'], sub_dat.norm_intensity + sub_dat['sem'], alpha=0.3)