In [None]:
import pandas as pd
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

from pathlib import Path
import re
import utils
import functools

%load_ext autoreload
%autoreload 2

In [None]:
def read_data(directory, regex=None, bins=4):
    directory = Path(directory)
    # Parse information from filename using the provided regex
    result, _ = utils.analyze(directory / 'Image.csv', 
                              parsers=[
                                  utils.ImageParser(regex, debug_regex=False),                              
                              ])
    
    # Measure features from GC objects
    result, _ = utils.analyze(directory / 'GC.csv',
                              previous_result=result,
                              parsers=[
                                  utils.BlankParser(['ObjectNumber']),
                              ],
                              extra_columns=['ImageNumber', ],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber'], how='left'),
                             )
    
    result, _ = utils.analyze(directory / 'GC.csv',
                              previous_result=result,
                              parsers=[
                                  utils.ShapeParser(),
                                  utils.IntensityParser(images=['FiveETS', 'plasmidStar'], measures=['UpperQuartile', 'Mean', 'Max']),
                                  utils.RimEnrichmentParser(images=['NOP56'], area_normalization='GCObjectImage', bins=bins, total_bins=20),
                              ],
                              region='GC',
                              extra_columns=['ImageNumber', 'ObjectNumber'],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber', 'ObjectNumber'])
                             )
    
    # get correlation over combined objects
    result, _ = utils.analyze(directory / 'DilatedGC.csv',
                              parsers=[
                                  utils.CorrelationParser(measures=['Correlation', 'Overlap']),
                              ],
                              previous_result=result,
                              region='DilatedGC',
                              extra_columns=['ImageNumber', 'ObjectNumber'],
                              merge_fcn=functools.partial(utils.merge_result, merge_on=['ImageNumber', 'ObjectNumber'])
                             )
    
    return result
    
data = read_data('/scratch/gpfs/tcomi/cp_paper_redo/endo_exo_rRNA/testing/outputs', 
                 r'/[A-G]\d+_(?P<treatment>[^_]+)_(?P<star>18S|28S)plasmid488.*nd2$', 
                 )

In [None]:
sns.relplot(data=data, x='GC_MeanIntensity_FiveETS', y='GC_MaxIntensity_plasmidStar',
            col='star', style='treatment', 
           )