# Validation of Cue4 relocalization by imaging

Comparison of the proten abundances at the marker between the WT and the deletion backgrounds. 

In [1]:
## logging functions
import logging
## system functions
from os.path import dirname
## system functions from roux
from roux.lib.io import backup
from IPython.display import Markdown as info_nb
from roux.lib.io import read_dict
from roux.lib.sys import read_ps
## workflow functions from roux
from roux.workflow.io import read_metadata
## visualization functions
import matplotlib.pyplot as plt
## visualization functions from roux
from roux.viz.io import begin_plot
## data functions from roux
import roux.lib.dfs as rd # attributes
import sys
sys.path.append('..')

In [15]:
## parameters
run_ids=[
    '20231011',
    '20231012',
    '20231013',
    ]
metadata_path='../config/metadata.yaml'
force=False
test=True


In [19]:
## inferred parameters
metadata=read_metadata(metadata_path,inputs=None if not test else {'version':{'number':'test'}},)
metadata['dataset']=read_metadata(metadata['dataset_config_path'],config_base=dict(species_name=metadata['species_name'],path=metadata['dataset_path'],),)
### output
output_dir_path=metadata['validations']['CUE1_CUE4']['GFP_intensity']['path']
logging.info(f"Output directory: {output_dir_path}")
## backup old files if overwriting (force is True)
if force: backup(output_dir_path,dirname(output_dir_path),test=not force,)

In [20]:
input_dir_path=metadata['data']['validation']['images']
pre_processed_dir_path=metadata['pre_processed']['validation']['images']

## Pre-processsing

In [25]:
## setting parameters
parameters_list=[]
for run_id in run_ids:
    parameters_list.append(
        dict(
            input_path=f'{input_dir_path}/{run_id}',
            output_path=f'{pre_processed_dir_path}/{run_id}/segmentation.json',
            separate_channels_nb_path=metadata['scripts']['io_read_nd2'],
            segmentation_nb_path='8100_segmentation.ipynb',
        )
    )
len(parameters_list)

In [22]:
from roux.workflow.task import run_tasks
_outputs=run_tasks(
    input_notebook_path='811_pre_processing.ipynb',
    parameters_list=parameters_list,
    kernel='parlocred',
)

## Post-processing

In [4]:
parameters_list=[]
for run_id in run_ids:
    parameters_list.append(
        dict(
            input_path=f'{pre_processed_dir_path}/{run_id}/00_paths.tsv',
            output_path=f'{output_dir_path}/{run_id}//relocalization/01_combined.tsv',
            
            channel_segmentation='epi_gfp',
            channel_intensity='epi_gfp',
            
            script_protein_abundance_by_single_cell_path=metadata['scripts']['protein_abundance_and_normalization'],
            script_protein_abundance_by_marker_location_path=metadata['scripts']['protein_abundance_by_marker_location'],
            force=force,
        ))
len(parameters_list)

In [6]:
from roux.workflow.task import run_tasks
_outputs=run_tasks(
    input_notebook_path='811_abundance.ipynb',
    parameters_list = parameters_list,
    kernel='parlocred',
)    

## Sample information combined

In [4]:
%reset_selective -f "^df.*"

In [5]:
df00=pd.concat(
    {Path(p).parent.stem: read_table(p) for p in read_ps(f'{output_dir_path}/2023101*/00_paths.tsv')},
    axis=0,
    names=['replicate'],
    ).reset_index(0).iloc[:,:6]
df00.head()

## Combined plots

### Filter by background intensity variation

In [10]:
## calculation of the mean and std. of the background intensity by replicate
df0=(df00
        .log.query(expr=f"`construct` == ['CUE4-GFP CUE1-WT','CUE4-GFP CUE1-DELTA']")
        .log.query(expr=f"`red_channel` != 'cytoplasm'")
    .assign(
    **{
        'background_intensity':lambda df: df.apply(lambda x: read_dict(f"{output_dir_path}/{x['replicate']}/{x['image id']}_channels/01_gfpby_cell_stats.json")['background_intensity'],axis=1),
        'mean': lambda df: df.groupby(['replicate'])['background_intensity'].transform('mean'),
        'std': lambda df: df.groupby(['replicate'])['background_intensity'].transform('std'),
        'mean+std': lambda df: df['mean']+df['std'],
        'mean-std': lambda df: df['mean']-df['std'],
        'outlier': lambda df: df.apply(lambda x: x['background_intensity']>(x['mean+std']) or x['background_intensity']<(x['mean-std']),axis=1),
        }
        
    )
    .astype({'replicate':str})
    )
df0.head()

#### Plot of the variation and outliers

In [10]:
data=df0.copy()
hue_order=data['construct'].unique()[::-1]
fig,ax=plt.subplots(figsize=[2.5,2.5])
ax=sns.swarmplot(
    data=data,
    y='replicate',
    x='background_intensity',
    hue='construct',
    dodge=True,
    hue_order=hue_order,
    ax=ax
)
ax.legend(bbox_to_anchor=[1,1],title='Construct')
for k in ['+','-']:
    sns.pointplot(
        data=data.loc[:,['replicate',f'mean{k}std']].drop_duplicates(),
        x=f'mean{k}std',
        y='replicate',
        join=False,
        markers=['|'],
        color='r',
        ax=ax,
    )
ax.set(xlabel='background intensity')
ax1=ax.twinx()
from roux.viz.ax_ import set_legend_custom
set_legend_custom(
    ax=ax1,
    legend2param={'mu+/-sigma\nby replicate':'r'},
    marker='|',
    loc=0,
    bbox_to_anchor=[1.9,0.5],
    title="Threshold"
    )
ax1.axis(False)
to_plot(f'{output_dir_path}/plots/dists_filtering_by_backkground_intensity.png')

In [11]:
perc_label(data['outlier'])

### Abundance at the marker

In [26]:
## input data
df03=read_table(
    f"{output_dir_path}/*/relocalization/01_combined.tsv",
    drop_index=False,
    colindex='replicate',
    replaces_index=lambda x: Path(x).parent.parent.stem,
    )
df03.head(1)

In [15]:
df3=df03.log.merge(
    right=df0.log.query(expr="`outlier` == False").loc[:,['replicate','image id']],
    how='inner',
    on=['replicate','image id'],
    ).replace({'marker location':{False:'non-ER',True:'ER'}})
df3.head(1)

In [52]:
kws_plot=dict(
    x='Abundance',
    hue="Construct",
    y="marker location",
    palette=["#FF6121",'#FF9D6C'],
    colindex=['image id','cell id'],
    axis_cont_lim=[1,2],
)
data=(
    df3_
    .assign(
        **{
           kws_plot['x']: lambda df: df['abundance normalized'],
           kws_plot['hue']: lambda df: df["construct"].map(
                {'CUE4-GFP CUE1-WT':'Cue4-GFP wild-type',
                'CUE4-GFP CUE1-DELTA':r'Cue4-GFP $cue1\Delta$',
                }),
        }
        )
    .loc[:,kws_plot['colindex']+[kws_plot['x'],kws_plot['y'],kws_plot['hue']]]
    )
kws_plot['order']=data[kws_plot['y']].unique().tolist()
kws_plot['hue_order']=data[kws_plot['hue']].unique().tolist()

begin_plot()
fig,ax=plt.subplots(figsize=[2.25,2.2])
from roux.viz.dist import plot_dists
plot_dists(
    data,
    kind='bar',
    ax=ax,
    show_n=False,
    **kws_plot,
)
from roux.viz.annot import set_label
set_label(
    ax=ax,
    x=1,y=-0.125,
   s=f"n={len(data.loc[:,kws_plot['colindex']].drop_duplicates())}",
)
ax.set(ylabel=None)
ax.legend(bbox_to_anchor=[1,1],title='Construct')
sns.despine(trim=False)
to_plot(
    f'{output_dir_path}/plots/barplot_protein_abundance_comparison_between_constructs_for_ER_nonER_localizations_replicates_combined.pdf',
    data=data,
    kws_plot=kws_plot,
)

In [56]:
info_nb(f"Average number of cells in each image {data.groupby('image id')['cell id'].nunique().mean():.1f}")