# Calculation of the single-cell abundances

Single-cell abundances and the abundances at the marker locations were quuantified using `htsimaging` [@Dandage2023-sm].

In [1]:
## logging functions
import logging
## data functions
import numpy as np
import pandas as pd
## system functions
from os.path import basename
from os.path import dirname
from os.path import splitext
## system functions from roux
from roux.lib.io import backup
from roux.lib.sys import basenamenoext
from roux.lib.io import read_dict
## workflow functions from roux
from roux.workflow.log import print_parameters
## visualization functions from roux
from roux.viz.diagram import diagram_nb
## data functions from roux
import roux.lib.dfs as rd # attributes
import sys
sys.path.append('..')

In [3]:
input_path=None
output_path=None #

channel_segmentation=None
channel_intensity=None

script_protein_abundance_by_single_cell_path=None #'htsimaging/examples/protein_abundance_and_normalization.ipynb'
script_protein_abundance_by_marker_location_path=None #'htsimaging/examples/protein_abundance_by_marker_location.ipynb'


marker_intensity_min_quantile=0.975
pixels_per_cell_min=100 
non_marker_intensity_quantile_off=0.02

kernel='parlocred' # default kernel

force=False
test=True

In [91]:
## inferred parameters
input_dir_path=dirname(input_path)
output_dir_path=str(Path(output_path).parent.parent)
assert basename(input_dir_path)==basename(output_dir_path)

### output
logging.info(f"Output directory: {output_dir_path}")
## backup old files if overwriting (force is True)
if force: backup(output_dir_path,dirname(output_dir_path),test=not force,)

In [5]:
diagram_nb(
"""
    graph LR;
        i1(["Raw images"])
        --> p1[["Preprocessing\n1. Separate the channels\n2. Segmentation by the DIC images"]] 
        --> image1["Images without\nmarkers"] & image2["Images with\nmarkers"]
        image1 --> o1(["Protein abundance change"])
        image2 --> o1 & o2(["Localization change"])
    classDef ends fill:#fff,stroke:#fff
"""
)

## Protein abundance by single cells 

In [6]:
df0=read_table(input_path)
df0.head(1)

In [7]:
df1=(df0
    # .query("~(`path`.str.contains('_ER_'))")
    ## set image #
    .sort_values('path')
    .assign(
        **{
            'image id': lambda df: df['path'].apply(basenamenoext),            
            'channels path': lambda df: df['channels path'].str.replace('/data/','/pre_processed/'),
            'gfp path' : lambda df: df.apply(lambda x: f"{x['channels path']}/{channel_intensity}.npy",axis=1),
            'segmented path' : lambda df: df.apply(lambda x: f"{x['channels path']}/{channel_segmentation}.npy.segmented.tif",axis=1),            
            'gfp path exists' : lambda df: df.apply(lambda x: exists(x['gfp path']),axis=1),           
            'segmented path exists' : lambda df: df.apply(lambda x: exists(x['segmented path']),axis=1),           
          },
    )
     .log()
)
df1.head(1)

In [10]:
to_table(df1,f'{output_dir_path}/00_paths.tsv')

### Set Image #

In [15]:
df1=(df1
    .log.query(expr="`gfp path exists` == True & `segmented path exists` == True ")
    .assign(
        **{
            "image #": lambda df: df.groupby('construct')['path'].transform(lambda x: 1+np.array(range(len(x)))),
        }
    )
    .rd.clean()
    .log('gfp path')
)
df1.head(1)

In [18]:
parameters_list=df1.apply(lambda x: dict(
    input_path=x['gfp path'],
    segmented_image_path= x['segmented path'],
    output_path=f"{output_dir_path}/{x['image id']}_channels/01_gfpby_cell.pqt",
    ),axis=1).T.to_dict().values()
parameters_list=list(parameters_list)
print_parameters(list(parameters_list)[0])

In [24]:
from roux.workflow.task import run_tasks
outputs=run_tasks(
    input_notebook_path=script_protein_abundance_by_single_cell_path,
    kernel='htsimaging',
    parameters_list = parameters_list,
    fast = True,
)

### Filter images based on possible misalignments between channels

In [25]:
misaligned_fractions={basename(dirname(d['output_path'])).replace('_channels',''):read_dict(f"{splitext(d['output_path'])[0]}_stats.json") for d in parameters_list}

In [27]:
df_=(
    pd.DataFrame(misaligned_fractions).T
    .assign(
        **{
            "misaligned_area_intensity_norm":lambda df: df['misaligned_area_intensity']/df['background_intensity'],
            "drop":lambda df: (df['misaligned_area_intensity_norm']>1.1) | (df['misaligned_fraction']>0.9),
        }
    )
    .sort_values(['misaligned_fraction','misaligned_area_intensity_norm'],ascending=False)
    .rename_axis(['image id'],axis=0).reset_index()
)
df_.head(1)

In [28]:
ax=sns.scatterplot(data=df_,x='misaligned_fraction',y='misaligned_area_intensity_norm',hue='drop')
ax.legend(bbox_to_anchor=[1,1],title='drop image')

In [29]:
## filter
df_=df_.log.query(expr="`drop` == False")

df0.head(1)

In [32]:
## get the metainfo of the filtered images
df0_=(df0
    .assign(
        **{
            'image id': lambda df: df['path'].apply(basenamenoext),
            "image #": lambda df: df.groupby(['construct','red_channel'])['path'].transform(lambda x: 1+np.array(range(len(x)))),            
        }
    )
    .loc[:,['image id','construct','red_channel','image #','z']]
    .log.merge(
        right=df_.loc[:,['image id']],
        how='inner',
        on='image id',
        validate="1:1",
    )
)
df0_.head()

In [33]:
to_table(
    df0_,
    f'{output_dir_path}/01_filteredby_misalign.tsv',
)

## Combining protein abundances

In [34]:
## read the single cell wise protein abundances
df01=read_table(
    [f"{output_dir_path}/{k}_channels/01_gfpby_cell.pqt" for k in df0_['image id'].tolist()],
    drop_index=False,
    replaces_index=lambda x: basename(dirname(x)).replace('_channels',''),
    colindex='image id',
    )
df01.head(1)

In [35]:
## merge with metainfo
df2=df0_.merge(
    right=df01,
    how='inner',
    on='image id',
    validate="1:m",
    )
df2.head(1)

In [36]:
## remove cells without signal
df3=df2.log.query(expr='`abundance normalized` > 1')

In [38]:
to_table(
    df3,
    f'{output_dir_path}/relative_abundance_change/01_combined.tsv',
)

## Protein abundance at the marker (~relocalization)

In [41]:
_input_path=f'{output_dir_path}/01_filteredby_misalign.tsv'

In [42]:
## annotations
df00=read_table(_input_path)
df00.head(1)

### Protein abundance by marker's localizations

In [48]:
## get the images with markers
df0=(df00
    .log.query(expr='~(`red_channel`.isnull())')
    .assign(
     **{
         'channels path':lambda df: df['image id'].apply(lambda x: f'{input_dir_path}/{x}_channels/'),
     }
    )
    )
df0.head(1)

In [49]:
df1=(df0
     .assign(
         **dict(
    input_path=lambda df: df.apply(lambda x: x['channels path']+'/epi_red.npy',axis=1),
    image_intensity_path=lambda df: df.apply(lambda x: f"{x['channels path']}/{channel_intensity}.npy",axis=1),
    regions_path=lambda df: df.apply(lambda x: f"{output_dir_path}/{x['image id']}_channels/01_regions_filtered.npy",axis=1),
             
    marker_intensity_min_quantile=marker_intensity_min_quantile,
    pixels_per_cell_min=pixels_per_cell_min,
    non_marker_intensity_quantile_off=non_marker_intensity_quantile_off,
             
    background_intensity=lambda df: df.apply(lambda x: read_dict(f"{output_dir_path}/{x['image id']}_channels/01_gfpby_cell_stats.json")['background_intensity'],axis=1),
    output_path=lambda df: df.apply(lambda x: f"{output_dir_path}/{x['image id']}_channels/02_gfpby_marker.pqt",axis=1),
    ) 
    )
)
df1.head(1)

In [50]:
## make parameters
parameters_list=df1.loc[:,[
    'input_path','image_intensity_path','regions_path',
    'marker_intensity_min_quantile','pixels_per_cell_min','background_intensity',
    'output_path'
          ]].T.to_dict().values()
## filter parameters
info(len(parameters_list))
parameters_list=[d for d in parameters_list if exists(d['regions_path'])]
info(len(parameters_list))
parameters_list[:1]

In [52]:
from roux.workflow.task import run_tasks
outputs=run_tasks(
    input_notebook_path=script_protein_abundance_by_marker_location_path,
    parameters_list=parameters_list,#[:1],
    kernel=kernel,
    force=force,
    fast=True,
)

In [53]:
df01=df1.groupby('output_path').apply(lambda df: read_table(df.name)).reset_index(0).rd.clean()
df01.head(1)

In [54]:
## mapping it to the metainfo
df2=df1.log.merge(
    right=df01,
    how='inner',
    on='output_path',
    validate="1:m",
    )
df2.head(1)

In [55]:
to_table(
    df2,
    output_path,
)

In [56]:
%reset_selective -f "^df.*"

In [57]:
df2=read_table(f'{output_dir_path}/relocalization/01_combined.tsv')
df2.head(1)