## Binary gate for cell types

How should I establish the binary gate? What should I use for the threshold? I could just tinker with it. 

In [1]:
## Use Mesmer_pypi conda environment
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import tifffile
import pandas as pd
import re
from deco import synchronized, concurrent

## Functions

In [25]:
def make_adaptive_binary_gate(img_arr, median_kernel_size= 21, adaptive_subtraction= 0, adaptive_kernel_size= 21, morphology_kernel = (15,15)):
    num_channels = np.shape(img_arr)[0] 
    ## tifffile loads in ch x h x w

    threshold_list = [None] * (num_channels -1)
        ## Removing DAPI from morphological masks b/c not used for this segmentation

    for i in range(1, num_channels):
        slice = img_arr[i,...]
        
        ## OpenCV documentation recommended blurring prior to adaptive thresholding
        blurred = cv2.medianBlur(slice, median_kernel_size)

        threshold_img = cv2.adaptiveThreshold(blurred, 
                                              255, 
                                              cv2.ADAPTIVE_THRESH_GAUSSIAN_C, ## I think Gaussian looks significantly better than mean for cortical images.
                                              cv2.THRESH_BINARY, 
                                              adaptive_kernel_size, 
                                              adaptive_subtraction)
        
        ## Morphological operation to remove noise
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, morphology_kernel)
        morphology_img = cv2.morphologyEx(threshold_img, cv2.MORPH_CLOSE, kernel)

        ## Adding channel dimension for concatenation 
        threshold_list[i-1] = morphology_img[np.newaxis, ...]

    adaptive_binary_img = np.concatenate(threshold_list)
    return(adaptive_binary_img)


def make_cell_type_mask(adaptive_binary_img, cell_type):
    img_shape = np.shape(adaptive_binary_img)[1:3]
    mask = np.zeros(img_shape)

    ## Channel gates assume that channel order matches directory names in /stor/scratch/Ehrlich/Users/John/histocytometry/raw_images 
        ## See also /stor/work/Ehrlich/Users/John/projects/misc/histocytometry/scripts/scyan_iterative_pipeline.py 
        ## No DAPI channel for gates 
    if cell_type == "aDC2_Sirpa":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,0,255]) | np.all(pixel_val == [255,255,255])
    elif cell_type == "cDC2":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [0,255,255])
    elif cell_type == "aDC1":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,0,255]) | np.all(pixel_val == [255,255,255])
    elif cell_type == "aDC2_XCR1":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [0,255,255])
    elif cell_type == "macrophage":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,255,255]) ## CD63+, but lowish
    elif cell_type == "monoDC2":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,255,255])
    elif cell_type == "pDC":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255, 0, 255]) | np.all(pixel_val == [255,255,255])
    else: 
        raise ValueError("Cell type: " + cell_type + " doesn't match hardcoded options")
    
    return(mask)    

def np_table(arr):
    ## Making this to get some summary stats on the images conveniently
    unique_values, counts = np.unique(arr, return_counts=True)
    np_table = pd.DataFrame({"unique_values" : unique_values,
                            "counts" : counts})
    return(np_table)
    

@concurrent
def gate_thymus_histocytometry(img_dir, img_name, cell_type, median_kernel_size= 21, save_mask= False, downsample_length= 0):
    ## I should think about doing kwargs for make_adaptive_binary_img parameters.

    ## Making individual marker binary gates and saving them for later reference. 
    img_arr = tifffile.imread(os.path.join(img_dir, img_name))
    if downsample_length > 0:
        img_arr = img_arr[:,0:downsample_length, 0:downsample_length]
    adaptive_binary_img = make_adaptive_binary_gate(img_arr, median_kernel_size= median_kernel_size)
    
    if save_mask:
        out_path = os.path.join(img_dir, "individual_gates_on_image.ome.tif")
        overlaid_mask = np.concatenate((img_arr, adaptive_binary_img))
        tifffile.imwrite(out_path, overlaid_mask)
    print("Done with adaptive thresholding!")

    ## Making cell type mask using combined binary mask input 
    cell_type_mask = make_cell_type_mask(adaptive_binary_img, cell_type)
    cell_type_mask = cell_type_mask * 255
        ## Converting 0 and 1 to more dynamic color range.
    cell_type_mask = cell_type_mask.astype("uint8")
    print("Done with cell type mask!")

    print(np_table(cell_type_mask))

    ## Blocking this out for now. I'm not sure this is how I'm going to do it. 
    # ## Make cell type directory for easier use with MCQuant later.
    # out_path = os.path.join(img_dir, "cell_type_" + cell_type)
    # if not os.path.exists(out_path):
    #     os.mkdir(out_path)

    if save_mask:
        tifffile.imwrite(os.path.join(img_dir, cell_type + "_mask.tif"), cell_type_mask)
        cell_type_on_img = np.concatenate((img_arr, cell_type_mask[np.newaxis, ...]))
        tifffile.imwrite(os.path.join(img_dir, cell_type + "_on_image.ome.tif"), cell_type_on_img)   
    print("Done with cell type mask!")

        ## I'll run this through MCquant to get the cell count and area 
        ## I can use the area to filter out garbage "cells" and count easier than doing it by hand. 
            ## There may be an easier way to do this with FIJI.


@synchronized
def parallelize_cell_type_gating(img_dirs, cell_type, save_mask, downsample_length= 0):
    ## Do I want to do thresholding for this? I'm not sure how to implement that. 
    for img_dir in img_dirs:
        gate_thymus_histocytometry(
            img_dir            = img_dir, 
            img_name           = "reordered_image.ome.tif", 
            cell_type          = cell_type, 
            median_kernel_size = 21,  ## 31
            save_mask          = save_mask,
            downsample_length  = downsample_length
        )

@concurrent
def pixel_counts(img_dir, cell_type_mask_name, medullary_mask_name, cortical_mask_name, out_name) -> None:
    ## Load in data 
    cell_type_mask = os.path.join(img_dir, cell_type_mask_name)
    medullary_mask = os.path.join(img_dir, medullary_mask_name)
    cortical_mask  = os.path.join(img_dir, cortical_mask_name)

    ## Find tissue cells 
    medullary_cells = cell_type_mask[medullary_mask]
    cortical_cells  = cell_type_mask[cortical_mask]

    ## Which pixels are cell type-specific 
    medullary_cell_pixels = medullary_cells > 0 
    cortical_cell_pixels = cortical_cells  > 0 
    
    ## Size of the tissue region
    medulla_pixel_size = medullary_cells.size
    cortex_pixel_size  = cortical_cells.size

    cortex_medulla_ratio = cortex_pixel_size/medulla_pixel_size

    cell_df = pd.DataFrame({
        "img_dir"               : img_dir, 
        "cell_type"             : cell_type_mask_name,
        "medullary_cell_pixels" : medullary_cell_pixels, 
        "cortical_cell_pixels"  : cortical_cell_pixels, 
        "medulla_pixel_size"    : medulla_pixel_size,
        "cortex_pixel_size"     : cortex_pixel_size,
        "cortex_medulla_ratio"  : cortex_medulla_ratio
    })

    cell_df.to_csv(os.path.join(img_dir, out_name),index= False)
    
    return None

@synchronized
def parallelize_pixel_counts(img_dirs, cell_type) -> None:
    for img_dir in img_dirs: 
        pixel_counts(
            img_dir             = img_dir, 
            cell_type_mask_name = cell_type + "_on_image.ome.tif",
            medullary_mask_name = "medulla_mask.tif", 
            cortical_mask_name  = "cortex_mask.tif",
            out_name            = cell_type + "_pixel_counts.csv" 
        )
    return None

## Preparing data

In [27]:
raw_dir  = "/stor/scratch/Ehrlich/Users/John/histocytometry/raw_images/images_2023-08-10"
img_dirs = [os.path.join(raw_dir, my_dir) for my_dir in os.listdir(raw_dir) if re.search("_[A-D]$", my_dir)]
img_dirs.sort()

DAPI_CD63_CD11c_Sirpa_dirs    = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_Sirpa",   img_dir)]
DAPI_CD63_CD11c_XCR1_dirs     = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_XCR1",    img_dir)]
DAPI_Sirpa_CD63_MerTK_dirs    = [img_dir for img_dir in img_dirs if re.search("DAPI_Sirpa_CD63_MerTK",   img_dir)]
DAPI_Sirpa_CD11c_CD14_dirs    = [img_dir for img_dir in img_dirs if re.search("DAPI_Sirpa_CD11c_CD14",   img_dir)]
DAPI_B220_CD11c_SiglecH_dirs  = [img_dir for img_dir in img_dirs if re.search("DAPI_B220_CD11c_SiglecH", img_dir)]

img_dict = {
    "aDC2_Sirpa" : DAPI_CD63_CD11c_Sirpa_dirs,
    "cDC2"       : DAPI_CD63_CD11c_Sirpa_dirs,
    "aDC1" :  DAPI_CD63_CD11c_XCR1_dirs,
    "aDC2_XCR1" : DAPI_CD63_CD11c_XCR1_dirs, 
    "macrophage" : DAPI_Sirpa_CD63_MerTK_dirs,
    "monoDC2" :DAPI_Sirpa_CD11c_CD14_dirs, 
    "pDC" : DAPI_B220_CD11c_SiglecH_dirs
}

## Running images

In [28]:
parallelize_cell_type_gating(
    img_dirs          = DAPI_CD63_CD11c_Sirpa_dirs, 
    cell_type         = "aDC2_Sirpa",
    save_mask         = True,
    downsample_length = 0
)

## This might be worth moving to a script and scripting everything. (If it owrks.) 

## I am running a full image dataset to try to see how that works for the parallelize_pixel_counts(). 

## Any image with two cell types gets run twice with the way I've currently written it. 

## I should look at the images and decide if I want to do a morphology manipulation to get the actual DCs. 
## I could think about blurring the initial images with something like a median blur. 


## What about instead of quantifying the number of actual cells, I just count the number of trip positive pixels? 
    ## I almost prefer that methodology. It's so much simpler. 
    ## This would look like saving a mask for the cell type, segmenting that mask into medulla and cortex. 
    ## Counting the positive pixels, then plotting the positive pixels. 
    ## Is it a valid assumption that a vast majority of cell type pixels will be part of a real cell? 

Done with adaptive thresholding!
Done with adaptive thresholding!
Done with adaptive thresholding!
Done with adaptive thresholding!
Done with cell type mask!
   unique_values     counts
0              0  135938279
1            255    3413145
Done with cell type mask!


In [None]:
parallelize_pixel_counts(
    img_dirs = DAPI_CD63_CD11c_Sirpa_dirs,
    cell_type = "aDC2_Sirpa"
)

In [None]:
parallelize_cell_type_gating(
    img_dirs          = DAPI_CD63_CD11c_Sirpa_dirs, 
    cell_type         = "cDC2",
    save_mask         = True,
    downsample_length = 7_500
)

In [None]:
for key in img_dict.keys():
    parallelize_pixel_counts(
        img_dirs  = img_dict[key], 
        cell_type = key 
    )



In [None]:
parallelize_cell_type_gating(
    img_dirs  = DAPI_CD63_CD11c_XCR1_dirs,
    cell_type = "aDC1",
    save_mask = True
)

parallelize_cell_type_gating(
    img_dirs  = DAPI_CD63_CD11c_XCR1_dirs,
    cell_type = "aDC2_XCR1",
    save_mask = True
)

In [None]:
parallelize_cell_type_gating(
    img_dirs  = DAPI_Sirpa_CD63_MerTK_dirs,
    cell_type = "macrophage",
    save_mask = True
)

In [None]:
parallelize_cell_type_gating(
    img_dirs  = DAPI_Sirpa_CD11c_CD14_dirs,
    cell_type = "monoDC2",
    save_mask = True
)

In [None]:
parallelize_cell_type_gating(
    img_dirs  = DAPI_B220_CD11c_SiglecH_dirs,
    cell_type = "pDC",
    save_mask = True
)