In [2]:
# import sys

# from pathlib import Path

# !{sys.executable} -m pip install -e {Path.cwd().parent}

In [47]:
import shutil
from pathlib import Path #Pathlib treats path as object, posix path uses forward slashes
from tempfile import TemporaryDirectory
from typing import List

import pandas as pd

import imcsegpipe #import the imcsegpipe package
from imcsegpipe.utils import sort_channels_by_mass #import the sort_channels_by_mass function from the utils module of imcsegpipe package


# Preprocessing of IMC data for image segmentation


This script presents the first step of the IMC segmentation pipeline.
 
To get started, please refer to the [Get started guide](https://bodenmillergroup.github.io/ImcSegmentationPipeline/) and to download example data you can run the script `scripts/download_examples.ipynb`.
 
**Requirements for the input data:**
 
We recommend to supply the raw data in form of **one zip archive per acquisition session**.
This zip archive should contain the `.mcd` file and all `.txt` files corresponding to individual acquisitions.
 
To understand the output format, please refer to the [Output](https://bodenmillergroup.github.io/ImcSegmentationPipeline/output.html) documentation.
 
Please raise an issue [here](https://github.com/BodenmillerGroup/ImcSegmentationPipeline/issues) for feedback, bug fixes and feature requests.

# Visualize Files (self-added)
Look at the data structure of panel.csv and text files corresponding to MCD. <br><br>
Text files are generated for `EACH REGION OF INTEREST`. <br><br>
`MCD` files are generated for each TMA slide, where many regions of interest can be defined on a single slide. A single MCD file can hold raw acquisition data for multiple regions of interest<br><br>

`Acquisition` means data corresponding to each region of interest (txt file)

## TXT file
Text files are generated for `EACH REGION OF INTEREST`. <br><br>
Start_push and End_push and pushes duration is related to the laser ablation procedure.<br><br>
`X,Y,Z coordinates` of the pixel being ablated.<br><br>
Measurements for each metal-conjugated antibody.<br><br>
`TXT files can be thought of as backups to extraction acquisition information to generate omeTIFF`

In [48]:
txt1 = pd.read_csv('/Users/bonocheong/Desktop/PhD_Pre-readings/Imaging_Based Mass_Cytometry/raw_MCD/Patient1/Patient1_pos1_1_1.txt', sep="\t")
txt1.head()


Unnamed: 0,Start_push,End_push,Pushes_duration,X,Y,Z,80ArAr(ArAr80Di),Myelope_276((2967))Y89(Y89Di),Histone_126((2979))In113(In113Di),SMA_174((2780))In115(In115Di),...,CD4_2293((2943))Yb171(Yb171Di),CD14_2275((2958))Yb172(Yb172Di),E-Cadhe_103((2959))Yb173(Yb173Di),CD303_2313((2952))Yb174(Yb174Di),CD206(M_324((2960))Lu175(Lu175Di),cleaved_198((2944))Yb176(Yb176Di),DNA1(Ir191Di),DNA2(Ir193Di),196Pt(Pt196Di),206Pb(Pb206Di)
0,3110,3301,192,0,0,0,8067.781,0.0,21.807,0.0,...,5.252,10.804,32.527,6.101,5.404,0.0,210.031,400.832,0.842,1.828
1,3302,3493,192,1,0,1,7814.213,0.0,36.018,2.0,...,11.583,13.933,47.026,10.102,7.078,0.0,191.442,325.969,1.0,0.0
2,3494,3685,192,2,0,2,8095.52,0.0,21.466,0.0,...,12.08,33.977,105.607,5.602,1.136,3.183,76.944,132.338,0.0,0.0
3,3686,3877,192,3,0,3,8109.541,0.0,11.26,1.0,...,12.397,49.825,161.894,12.069,6.71,2.039,58.817,114.123,0.0,0.0
4,3878,4069,192,4,0,4,7973.865,1.764,2.515,0.0,...,23.239,63.409,135.412,6.834,24.27,0.0,63.15,126.677,0.0,1.0


# Panel file
Panel file contains information of `antibodies` used in the experiment.

In [49]:
panel_view = pd.read_csv("/Users/bonocheong/Desktop/PhD_Pre-readings/Imaging_Based Mass_Cytometry/raw_MCD/panel.csv")
panel_view.head()


Unnamed: 0,Tube Number,Metal Tag,Target,Clean_Target,Antibody Clone,Stock Concentration,Final Concentration / Dilution,uL to add,full,ilastik,deepcell
0,0,Ir191,Iridium,DNA1,Ir,,,,1,1,1.0
1,0,Ir193,Iridium,DNA2,Ir,,,,1,1,1.0
2,2101,Y89,Myeloperoxidase MPO,MPO,Polyclonal MPO,500.0,4 ug/mL,0.8,1,0,
3,2113,In113,Histone H3,HistoneH3,D1H2,500.0,1 ug/mL,0.2,1,1,1.0
4,1914,In115,SMA,SMA,1A4,500.0,0.25 ug/mL,0.05,1,0,


`Markers used for training of a machine learning classifier in Ilastik.`<br><br>
H3, DNA1, DNA2, CD163, CD20, CD8a, Ecad, CD3.<br><br>
These are used to generate a probability map of pixels as either being nucleus, cytoplasm, or background compartment.

In [50]:
panel_view[panel_view['ilastik']==1]

Unnamed: 0,Tube Number,Metal Tag,Target,Clean_Target,Antibody Clone,Stock Concentration,Final Concentration / Dilution,uL to add,full,ilastik,deepcell
0,0,Ir191,Iridium,DNA1,Ir,,,,1,1,1.0
1,0,Ir193,Iridium,DNA2,Ir,,,,1,1,1.0
3,2113,In113,Histone H3,HistoneH3,D1H2,500.0,1 ug/mL,0.2,1,1,1.0
12,2081,Sm147,CD163,CD163,EDHu-1,500.0,4.5 ug/mL,0.9,1,1,2.0
15,2114,Sm149,CD20,CD20,L26,500.0,4 ug/mL,0.8,1,1,2.0
34,2091,Ho165,CD8a,CD8a,C8/144B,500.0,2 ug/mL,0.4,1,1,2.0
44,2093,Yb173,E-Cadherin / P-Cadherin,Ecad,36/E-Cadherin,500.0,1.5 ug/mL,0.3,1,1,2.0
51,2075,Sm152,CD3,CD3,polyclonal_A0452,500.0,5 ug/mL,1.0,1,1,2.0


`The proteins detected by the antibody panel. And the antibody concentration for each target.`

In [51]:
panel_view[panel_view['full']==0]

Unnamed: 0,Tube Number,Metal Tag,Target,Clean_Target,Antibody Clone,Stock Concentration,Final Concentration / Dilution,uL to add,full,ilastik,deepcell
9,1893,Nd145,CD15,CD15,HI98,500.0,4 ug/mL,0.8,0,0,
11,1651,Nd146,CD45RA,CD45RA,HI100,500.0,4 ug/mL,0.8,0,0,
14,1890,Nd148,Beta-2 Microglobulin,B2M,D8P1H,500.0,5 ug/mL,1,0,0,
16,1813,Sm149,CD20,CD20,L26,500.0,4 ug/mL,0.8,0,0,
20,1865,Eu153,LAG-3,LAG33,D2G4O,500.0,7 ug/mL,14.000.000.000.000.000,0,0,
22,2098,Sm154,CD11c,CD11c,D3V1E,500.0,5 ug/mL,1,0,0,
29,1563,Dy161,TCF1/TCF7,TCF7,C63D9,500.0,2.5 ug/mL,0.5,0,0,
33,1922,Dy164,CD278 (ICOS),ICOS,D1K2T,500.0,5 ug/mL,1,0,0,
36,1796,Er166,Carbonic Anhydrase IX,CarbonicAnhydrase,polyclonal_CA9_AF2188,500.0,3 ug/mL,0.6,0,0,
43,1556,Yb172,CD14,CD14,SP192,500.0,2 ug/mL,0.4,0,0,


`The metal tags used to conjugate antibodies and their concentrations`

In [52]:
panel_view[panel_view['Metal Tag'].duplicated(keep="first")][['Metal Tag','Target','Clean_Target','Final Concentration / Dilution']]

Unnamed: 0,Metal Tag,Target,Clean_Target,Final Concentration / Dilution
9,Nd145,CD15,CD15,4 ug/mL
11,Nd146,CD45RA,CD45RA,4 ug/mL
14,Nd148,Beta-2 Microglobulin,B2M,5 ug/mL
16,Sm149,CD20,CD20,4 ug/mL
20,Eu153,LAG-3,LAG33,7 ug/mL
22,Sm154,CD11c,CD11c,5 ug/mL
29,Dy161,TCF1/TCF7,TCF7,2.5 ug/mL
33,Dy164,CD278 (ICOS),ICOS,5 ug/mL
36,Er166,Carbonic Anhydrase IX,CarbonicAnhydrase,3 ug/mL
43,Yb172,CD14,CD14,2 ug/mL


## Specify the inputs

Here, you will need to specify where the IMC raw data (in form of `.zip` archives) are stored.
The `raw_dirs` variable describes the path (one or multiple) where the `.zip` archives are located.
Here, we use the example data (located in the `raw` folder) to run the pre-processing part of the pipeline.
The `file_regex` variable specifies a [glob](https://towardsdatascience.com/the-python-glob-module-47d82f4cbd2d) entry to select all files of interest from the input directory.
As an example: if you want to select all files that contain the word "Patient", you would use the glob expression `"*Patient*.zip"`.
 
You will also need to specify the location of the panel file (`panel_file`) that contains information regarding the column that contains the metal/channel name (`panel_channel_col`), the column that contains an identifier if the channel should be used for ilastik training (`panel_ilastik_col`), and the column that contains an identifier if the channel should be used to generate the final stack of channels (`panel_keep_col`). The latter two arguments specify columns which contain 0s or 1s, 1 meaning the indicated channel is used and 0 meaning the channel is not used.

In [65]:
# the paths with the ziped acquisition files
raw_dirs = ["/Users/bonocheong/Desktop/PhD_Pre-readings/Imaging_Based Mass_Cytometry/raw_MCD"]
raw_dirs = [Path(raw_dir) for raw_dir in raw_dirs]

# regular expression to select files
file_regex = "*Patient*.zip"

# panel information
panel_file = "/Users/bonocheong/Desktop/PhD_Pre-readings/Imaging_Based Mass_Cytometry/raw_MCD/panel.csv"
panel_channel_col = "Metal Tag"
panel_keep_col = "full"
panel_ilastik_col = "ilastik"

## Specify the outputs

You will need to specify a single folder where the output files of the pipeline are written out to (`work_dir`).
Within the working directory, the following sub-folder will be created:

* `acquisitions_dir`: storing individual acquisitions as `.ome.tiff` files, panoramas as `.png` and acquisition metadata (default `analysis/ometiff`)
* `ilastik_dir`: storing multi-channel images in `.tiff` format for ilastik training. The channel order for each image is written out in `.csv` format (default `analysis/ilastik`). Following the CellProfiler pipelines, all files related to the ilastik segmentation approach will be stored here. 
* `crops_dir`: stores image crops for ilastik training after running the first CellProfiler pipeline (default `analysis/crops`)
* `cellprofiler_input_dir`: all files needed for CellProfiler input (default `analysis/cpinp`)
* `cellprofiler_output_dir`: all files written out by CellProfiler (default `analysis/cpout`)
* `histocat_dir`: folders containing single-channel images for histoCAT upload (default `analysis/histocat`)

Within the `cellprofiler_output_dir` three subfolders are created storing the final images:

* `final_images_dir`: stores the hot pixel filtered multi-channel images containing selected channels (default `analysis/cpout/images`)
* `final_masks_dir`: stores the final cell segmentation masks (default `analysis/cpout/masks`)
* `final_probabilities_dir`: stores the downscaled pixel probabilities after ilastik classification (default `analysis/cpout/probabilities`)

In [66]:
# working directory storing all outputs
work_dir = "/Users/bonocheong/Desktop/PhD_Pre-readings/Imaging_Based Mass_Cytometry/ImcSegmentationPipeline/analysis"
work_dir = Path(work_dir)
work_dir.mkdir(exist_ok=True)

# general output directories
acquisitions_dir = work_dir / "ometiff" #pathlib package allows you to perform direct modifications (/ "xxx") to defined path
ilastik_dir = work_dir / "ilastik"
crops_dir = work_dir / "crops"
cellprofiler_input_dir = work_dir / "cpinp"
cellprofiler_output_dir = work_dir / "cpout"
histocat_dir = work_dir / "histocat"

# Final output directories
final_images_dir = cellprofiler_output_dir / "images"
final_masks_dir = cellprofiler_output_dir / "masks"
final_probabilities_dir = cellprofiler_output_dir / "probabilities"

The specified folder will now be created.

In [67]:
acquisitions_dir.mkdir(exist_ok=True)
crops_dir.mkdir(exist_ok=True)
ilastik_dir.mkdir(exist_ok=True)
cellprofiler_input_dir.mkdir(exist_ok=True)
cellprofiler_output_dir.mkdir(exist_ok=True)
histocat_dir.mkdir(exist_ok=True)

final_images_dir.mkdir(exist_ok=True)
final_masks_dir.mkdir(exist_ok=True)
final_probabilities_dir.mkdir(exist_ok=True)

## Convert `.mcd` files to `.ome.tiff` files

In the first step, the `.zip` archives containing `.mcd` files are converted to folders, which contain `.ome.tiff` files, channel metadata files, panoramas and slide overviews. The `.ome.tiff` files can be read in by commercial and open-source software such as `ImageJ` using the BioFormats importer. The `.csv` files contain the order of the channels as well as the antibody names. The `_pano.png` contain the acquired panoramas; the `_slide.png` contains the slide overview. The `_schema.xml` contains metadata regarding the acquisition session.  
At this stage, only `.zip` files specified by `file_regex` will be processed.

In the following chunk, individual acquisition metadata are written out as `acquisition_metadata.csv` file in the `cellprofiler_output_dir` folder. 

In [68]:
temp_dirs: List[TemporaryDirectory] = [] #generates an empty variable called temp_dirs, specifying its type as a list of TemporaryDirectory objects.
'''
TemporaryDirectory can be useful in situations where a program needs to create and store data or files temporarily, 
but does not keep them after program ends.

TemporaryDirectory may be used to store intermediate results during a long computation or to create temp files for processing large datasets.
TemporaryDirectory ensures that these temporary directories are automatically deleted when they are no longer needed.
'''
try:
    for raw_dir in raw_dirs:
        zip_files = list(raw_dir.rglob(file_regex)) #zip_files is a list containing the path to each MCD zip archive
        if len(zip_files) > 0:
            temp_dir = TemporaryDirectory() #Create a temp directory for zip archives and add it to the temp_dirs list
            temp_dirs.append(temp_dir)
            for zip_file in sorted(zip_files): #Changes zip_file list elements into ascending order
                imcsegpipe.extract_zip_file(zip_file, temp_dir.name) #Extract files from zip archive from the path specified by zip_file and store in temp_dir
    acquisition_metadatas = []
    for raw_dir in raw_dirs + [Path(temp_dir.name) for temp_dir in temp_dirs]: #creates a new list that contains all the Path objects from raw_dirs and temp_dirs
        mcd_files = list(raw_dir.rglob("*.mcd")) #returns a list of all Path objects for MCD files in the raw and temp directories
        mcd_files=[(i) for i in mcd_files if not i.stem.startswith('.')] #returns a new list of Path objects with the same .mcd extension as in previous mcd_files variable, but remove hidden files
        if len(mcd_files) > 0:
            txt_files = list(raw_dir.rglob("*.txt")) #store all txt files in a listed Path object
            txt_files=[(i) for i in txt_files if not i.stem.startswith('.')] #similarly remove hidden txt files
            matched_txt_files = imcsegpipe.match_txt_files(mcd_files, txt_files) #check source code. Matches mcd files for EACH PATIENT to corresponding list of txt files based on stem name
            
            '''
            imcsegpipe.extract_mcd_file first creates a directory according to the stem name of each mcd file. 
            Use content manager (with statement) to open each mcd_file with MCDFile function from readimc package imported by the imcsegpipe package
            Each MCDFile object has a schema_xml attribute which will be written out and saved to the acquisition path (ometiff) directly under the stem name of the mcd_file but with _schema_xml extension (ie. Patient1_schema.xml)
            MCDFile object also has a slides attribute. The slides attribute has a subattribute describing the slide ids of the MCD. Usually only one slide id under each MCDFile (ie. Patient1_s0).
            The slide overview for the corresponding slide id is saved from the MCDFile object to an ImageIo object and saved in the name Patient1_s0_slide.png. Here I guess the slide id corresponds to each ROI?
            The slides attribute from MCDFile object also has a subattribute describing the panomaras. Also save each panomara image to Patient1_s0_p1_pano.png, Patient1_s0_p2_pano.png 
            The slide attribute from MCDFile also has a subattribute describing the acquisitions (ROIs). 
            With each acquisition (ROI) of the MCD file, an OME-XML metadata is being generated which includes information such as:
                -image dimensions (XYZ)
                -pixel physical size and units (micrometers)
                -pixel type (unsigned integers, floating points)
                -channel information (number of channels in the image, the name of metal isotopes, bit depth of each channel)
            MCD file is converted to an ome-TIFF and the OME-XML metadata is used to organize and annotate the ome-TIFF image. The resulting ome-TIFF image contains both image and ome-xml meta data.
                

            For each MCD file (define the TMA), generates a schema_xml file, a slide overview, and a series of panoramas images (sticked images).
            Furthermore, for each acquisition (ROI) within the MCD file, generates an OME-XML metadata, an ome-TIFF (via readimc package which imports the xtiff.to_tiff function),and a corresponding CSV file (contains channel name and label) for that acquisition.
            '''
            for mcd_file in mcd_files: 
                acquisition_metadata = imcsegpipe.extract_mcd_file(
                    mcd_file,
                    acquisitions_dir / mcd_file.stem,
                    txt_files=matched_txt_files[mcd_file],
                )
                acquisition_metadatas.append(acquisition_metadata) #Create a list of acquisition metadatas. Each acquisition metadata from OME-XML file is added to the list as a pandas dataframe.
    acquisition_metadata = pd.concat(acquisition_metadatas, copy=False) #Concats all acquisition metadata dataframes into a single metadata dataframe concated by rows
    acquisition_metadata.to_csv(cellprofiler_input_dir / "acquisition_metadata.csv") #The acquisition data comes from OME-XML metadata. 
finally:
    for temp_dir in temp_dirs:
        temp_dir.cleanup()
    del temp_dirs

Here, a copy of the panel file is transferred to the `cellprofiler_output_dir`. 

In [69]:
shutil.copy2(panel_file, cellprofiler_output_dir / "panel.csv")

PosixPath('/Users/bonocheong/Desktop/PhD_Pre-readings/Imaging_Based Mass_Cytometry/ImcSegmentationPipeline/analysis/cpout/panel.csv')

# Converting `ome-TIFF` files to formats that can be read by `Fiji, R, Python, HistoCat`

Convert `.ome.tiff` files to `histoCAT` compatible format

In the next step, we will convert the generated `.ome.tiff` files to a format that [histoCAT](https://bodenmillergroup.github.io/histoCAT/) can read.
For each acquistion (each `.ome.tiff` file), the `export_to_histocat` function call produces one folder that contains single channel tiff files. All channels contained in the `.ome.tiff` files are written out.

In [70]:
for acquisition_dir in acquisitions_dir.glob("[!.]*"): #read folders corresponding to each MCD in ometiff folder and exclude PosixPath that are hidden
    if acquisition_dir.is_dir():
        imcsegpipe.export_to_histocat(acquisition_dir, histocat_dir)

'''
Goes to ometiff folder and read paths with ometiff extension excluding hidden ometiff files
For each ometiff image, uses the imread function from tifffile package to return numpy array containing information about the ometiff image.
Verifies that the returned numpy array of each ometiff image is a 3D array (ie. 47 channels x 600 rows x 600 columns). Essentially 47 stacks each having a 2D dimension image of 600 x 600.
Checks if the returned numpy array has the same number of channels as specified by "acquisition_channel.csv"
Creates a folder in HistoCat directory for each acquisition of each MCD
For each acquisition, splits the stacked channels to generate an individual TIFF image accessible to ImageJ using tifffile imwrite function for each channel.
'''

'\nGoes to ometiff folder and read paths with ometiff extension excluding hidden ometiff files\nFor each ometiff image, uses the imread function from tifffile package to return numpy array containing information about the ometiff image.\nVerifies that the returned numpy array of each ometiff image is a 3D array (ie. 47 channels x 600 rows x 600 columns). Essentially 47 stacks each having a 2D dimension image of 600 x 600.\nChecks if the returned numpy array has the same number of channels as specified by "acquisition_channel.csv"\nCreates a folder in HistoCat directory for each acquisition of each MCD\nFor each acquisition, splits the stacked channels to generate an individual TIFF image accessible to ImageJ using tifffile imwrite function for each channel.\n'

## Generate image stacks for downstream analyses

Next, we will generate two stacks of multi-channel `.tiff` images:

**1. Full stack:** The full stack contains all channels specified by the "1" entries in the `panel_keep_col` column of the panel file. This stack will be later used to measure cell-specific expression features of the selected channels.

**2. Ilastik stack:** The ilastik stack contains all channels specified by the "1" entries in the `panel_ilastik_col` column of the panel file. This stack will be used to perform the ilastik training to generate cell, cytoplasm and background probability masks (see [Ilastik training](https://bodenmillergroup.github.io/ImcSegmentationPipeline/ilastik.html)).

**Of note:** Both image stacks are now by default hot pixel filtered (see below). To write out the raw image data without filtering set `hpf=None`. `Hot pixel is a common artifact detected in IMC image, likely caused by detector abnormalities. These are individual pixels with unusually higher signal intensities (high counts of ions) when compared to surrounding pixels, but does not correlate with biological significance.`

The `create_analysis_stacks` function takes several arguments:

* `acquisition_dir`: specifies the folder containing the `.ome.tiff` files.  
* `analysis_dir`: specifies the folder where the `.tiff` stacks should be stored.  
* `analysis_channels`: specifies the channel names used for the specific image stack.  
* `suffix`: the suffix to be added at the end of the file name.
* `hpf`: single number indicating the threshold for hot pixel filtering (see below). Setting `hpf=None` disables hot pixel filtering. 

**Hot pixel filtering:** Each pixel intensity is compared against the maximum intensity of the 3x3 neighboring pixels. If the difference is larger than `hpf`, the pixel intensity is clipped to the maximum intensity in the 3x3 neighborhood. 

In [71]:
panel: pd.DataFrame = pd.read_csv(panel_file)

'''
FOR EACH ACQUISITION
create_analysis_stacks uses the "analysis_channels (channels we want to preserve) from heaviest weight to lowest" as probes
First reads each ometiff and acquisition_channel_csv from each mcd folder
Finds the index of the desired analysis channels from the acquisition_channel_csv file
Subsets the stacks (dimensions) from each ometiff image to generate an analysis stacked iamge
For the analysis stacked image, replacing hot pixels exceeding threshold ion counts for each metal with the maximum value in it's 3x3 neighborhood
Stores the analysis stacked image as tiff for ImageJ with suffix (full or ilastik)
Generates a new analysis_channel_csv file for that acquisition containing the names of the channels used for analysis.

Within the analysis_dir, there will be an analysis_channel_csv file and an analysis stacked TIFF image for each acquisition from each MCD (slide). 
'''

for acquisition_dir in acquisitions_dir.glob("[!.]*"):
    if acquisition_dir.is_dir():
        # Write full stack
        imcsegpipe.create_analysis_stacks(
            acquisition_dir=acquisition_dir,
            analysis_dir=final_images_dir,
            analysis_channels=sort_channels_by_mass(
                panel.loc[panel[panel_keep_col] == 1, panel_channel_col].tolist()
            ),
            suffix="_full",
            hpf=50.0, #Pixels with ion counts for a metal above 50 will be replaced by the maximum value in it's 3x3 neighborhood
        )
        # Write ilastik stack
        imcsegpipe.create_analysis_stacks(
            acquisition_dir=acquisition_dir,
            analysis_dir=ilastik_dir,
            analysis_channels=sort_channels_by_mass(
                panel.loc[panel[panel_ilastik_col] == 1, panel_channel_col].tolist() #loc uses labels and booleans to subset data frome dataframe
            ),
            suffix="_ilastik",
            hpf=50.0,
        )

## Export additional metadata

Finally, we will copy a file that contains the correct order of channels for the exported full stacks to the `cellprofiler_input_dir`.

In [11]:
first_channel_order_file = next(final_images_dir.glob("[!.]*_full.csv"))
shutil.copy2(first_channel_order_file, cellprofiler_input_dir / "full_channelmeta.csv")

PosixPath('../analysis/cpinp/full_channelmeta.csv')

We will also generate channel metadata for the probability stack (see [Ilastik training](https://bodenmillergroup.github.io/ImcSegmentationPipeline/ilastik.html)).

In [12]:
probab_meta = ["CellCenter", "CellBorder", "Background"]
with open(cellprofiler_input_dir / "probab_channelmeta_manual.csv", "w") as f:
    f.write("\n".join(probab_meta))

This concludes the pre-processing of the raw image files. In [the next step](https://bodenmillergroup.github.io/ImcSegmentationPipeline/ilastik.html), we will prepare the images for ilastik pixel classification.

## Generate the histocat folder with masks (optional)

This function can be used to convert the `.ome.tiff` files together with the mask files, which are generated in the [segmentation step](https://bodenmillergroup.github.io/ImcSegmentationPipeline/segmentation.html) to a format that is recognized by the `histoCAT` software. To use the function you will need to remove `#` from the following code chunk.

In [12]:
#for acquisition_dir in acquisitions_dir.glob("[!.]*"):
#    if acquisition_dir.is_dir():
#        imcsegpipe.export_to_histocat(
#            acquisition_dir, histocat_dir, mask_dir=final_masks_dir
#        )

In [13]:
!conda list

# packages in environment at /Users/nils/opt/anaconda3/envs/imcsegpipe:
#
# Name                    Version                   Build  Channel
aiofiles                  22.1.0             pyhd8ed1ab_0    conda-forge
aiosqlite                 0.18.0             pyhd8ed1ab_0    conda-forge
anyio                     3.6.2              pyhd8ed1ab_0    conda-forge
appnope                   0.1.3              pyhd8ed1ab_0    conda-forge
argon2-cffi               21.3.0             pyhd8ed1ab_0    conda-forge
argon2-cffi-bindings      21.2.0           py39ha30fb19_3    conda-forge
asttokens                 2.2.1              pyhd8ed1ab_0    conda-forge
attrs                     22.2.0             pyh71513ae_0    conda-forge
babel                     2.12.1             pyhd8ed1ab_1    conda-forge
backcall                  0.2.0              pyh9f0ad1d_0    conda-forge
backports                 1.0                pyhd8ed1ab_3    conda-forge
backports.functools_lru_cache 1.6.4              pyhd8ed