# Pre-processing the raw images

Channels are separated and cells are segmented.

In [1]:
## data functions
import numpy as np
## system functions
from os.path import basename
from os.path import exists
from glob import glob
## system functions from roux
from roux.lib.sys import basenamenoext
## visualization functions from roux
from roux.viz.diagram import diagram_nb
## data functions from roux
import roux.lib.dfs as rd # attributes
import sys
sys.path.append('..')

In [2]:
input_path=None
output_path=None

separate_channels_nb_path=None
segmentation_nb_path=None

force=True
test=True

In [None]:
# inferred parameters 
output_dir_path=Path(output_dir_path).parent
input_dir_path=input_path; del input_path

In [6]:
diagram_nb(
"""
    graph LR;
        i1(["Raw images"])
        --> p1[["Preprocessing\n1. Separate the channels\n2. Segmentation by the DIC images"]] 
        --> image1["Images without\nmarkers"] & image2["Images with\nmarkers"]
        image1 --> o1(["Protein abundance change"])
        image2 --> o1 & o2(["Localization change"])
    classDef ends fill:#fff,stroke:#fff
"""
)

## Raw images

### Grouped by the construct ID

In [8]:
if int(run_id)<20231011:
    df0=pd.Series({
        "CUE4-GFP CUE1-WT":glob(f"{input_dir_path}/{run_id}/cue4cue1/ER reporter/*.nd2"),
        "CUE4-GFP CUE1-DELTA":glob(f"{input_dir_path}/{run_id}/cue4cue1del//ER reporter/*.nd2"),
    }).to_frame('path').explode('path').rename_axis('construct').reset_index()
elif int(run_id)==20231011:
    df0=pd.Series({
        "CUE4-GFP CUE1-WT":[p for p in glob(f"{input_dir_path}/{run_id}/*cue4cue1*.nd2") if not 'del' in basename(p)],
        "CUE4-GFP CUE1-DELTA":glob(f"{input_dir_path}/{run_id}/*cue4cue1del*.nd2"),
    }).to_frame('path').explode('path').rename_axis('construct').reset_index()
elif int(run_id) in [20231012,20231013]:
    df0=pd.Series({
        "CUE4-GFP CUE1-WT":[p for p in glob(f"{input_dir_path}/{run_id}/*cue1cue4*.nd2") if not 'del' in basename(p)],
        "CUE4-GFP CUE1-DELTA":glob(f"{input_dir_path}/{run_id}/*cue4cue1del*.nd2"),
    }).to_frame('path').explode('path').rename_axis('construct').reset_index()

In [10]:
to_table(df0,f'{output_dir_path}/{run_id}_00_paths.tsv')

## Identify markers from the file names

In [13]:
df0=(df0
    .log.query(expr="~(`path`.str.contains('_wt_')) & ~(`path`.str.contains('dic.')) & `path`.str.endswith('.nd2')")# & `path`.str.contains('_00')")
    .assign(
    **{
        'image id': lambda df: df['path'].apply(basenamenoext),            
        'dic_path':None,
        'red_channel':lambda df: df.apply(lambda x: 'ER' if 'ER' in x['path'] else 'cytoplasm'  if 'cyto' in x['path'] else np.nan,axis=1),
        'z': None,
        'output_dir_path':output_dir_path,
        'channels path':lambda df: df.apply(lambda x: f"{output_dir_path}/{run_id}/{splitext(basename(x['path']))[0]}"+(f"_z{x['z']:03}" if not x['z'] is None else '')+"_channels/",axis=1),
      }
    )
)
df0.loc[:,['image id']].rd.assert_no_dups()
assert not df0['construct'].isnull().any(), df0['construct'].isnull().sum()

In [15]:
to_table(df0,f'{output_dir_path}/{run_id}/00_paths.tsv')

## Separate the channels from the nd2 files

In [16]:
parameters_list=df0.apply(lambda x: dict(
    input_path=x['path'],
    output_path=f"{x['channels path']}/epi_gfp.npy",
    zstack=x['z'] if not pd.isnull(x['z']) else None,
    ),
    axis=1).tolist()
len(parameters_list)

In [18]:
from roux.workflow.task import run_tasks
_=run_tasks(
    input_notebook_path=separate_channels_nb_path,
    parameters_list=parameters_list,
    kernel='imaging_nd2',
    fast=True,
    force=force,
)

## Run segmentation

In [20]:
from roux.workflow.task import run_tasks
if not exists(output_path) or force:
    outputs=run_tasks(
        input_notebook_path=segmentation_nb_path,
        parameters_list = [dict(
                input_path= f'{output_dir_path}/{run_id}/*/*.npy',
                output_path=output_path,            
            )],
        kernel='yeast_segmentation',
    )    