## Adaptive thresholding of thymus histocytometry images

Ken Lau suggested that doing a binary threshold is obviously imperfect. Adaptive thresholding can be done in FIJI, but why do that when I can automate it after struggling with writing it in python and OpenCV.


I think the logic is defining a binary mask for each marker then doing the overlay to find cell types. 

Use Mesmer_pypi conda environment

In [1]:
import numpy as np
import cv2
import os
import matplotlib.pyplot as plt
import tifffile
import pandas as pd
import re

### Functions

In [2]:
def make_adaptive_binary_gate(img_arr, median_kernel_size= 21, adaptive_subtraction= 0, adaptive_kernel_size= 21, morphology_kernel = (15,15)):
    num_channels = np.shape(img_arr)[0] 
    ## tifffile loads in ch x h x w

    threshold_list = [None] * (num_channels -1)
        ## Removing DAPI from morphological masks

    for i in range(1, num_channels):
        slice = img_arr[i,...]
        
        ## OpenCV documentation recommended blurring prior to adaptive thresholding
        blurred = cv2.medianBlur(slice, median_kernel_size)

        threshold_img = cv2.adaptiveThreshold(blurred, 
                                              255, 
                                              cv2.ADAPTIVE_THRESH_GAUSSIAN_C, ## I think Gaussian looks significantly better than mean for cortical images.
                                              cv2.THRESH_BINARY, 
                                              adaptive_kernel_size, 
                                              adaptive_subtraction)
        
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, morphology_kernel)
        morphology_img = cv2.morphologyEx(threshold_img, cv2.MORPH_CLOSE, kernel)

        ## Adding channel dimension for concatenation 
        threshold_list[i-1] = morphology_img[np.newaxis, ...]

    adaptive_binary_img = np.concatenate(threshold_list)
    return(adaptive_binary_img)

## Lauren wants the individual marker adaptive binary gate files. 
## I could structure this into an ome.tif if that makes sense. 

def make_cell_type_mask(adaptive_binary_img, cell_type):
    img_shape = np.shape(adaptive_binary_img)[1:3]
    mask = np.zeros(img_shape)

    ## Channel gates assume that channel order matches directory names in /stor/scratch/Ehrlich/Users/John/histocytometry/raw_images 
        ## See also /stor/work/Ehrlich/Users/John/projects/misc/histocytometry/scripts/scyan_iterative_pipeline.py 
        ## No DAPI channel for gates 
    if cell_type == "aDC2_Sirpa":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,0,255]) | np.all(pixel_val == [255,255,255])
        print(pixel_val)
    elif cell_type == "cDC2":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [0,255,255])
        print(pixel_val)
    elif cell_type == "aDC1":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,0,255]) | np.all(pixel_val == [255,255,255])
    elif cell_type == "aDC2_XCR1":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [0,255,255])
    elif cell_type == "macrophage":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,0,255])
    elif cell_type == "monoDC2":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255,255,255])
    elif cell_type == "pDC":
        for row in range(img_shape[0]):
            for col in range(img_shape[1]):
                pixel_val = adaptive_binary_img[:, row, col]
                mask[row, col] = np.all(pixel_val == [255, 0, 255]) | np.all(pixel_val == [255,255,255])
    else: 
        raise ValueError("Cell type: " + cell_type + " doesn't match hardcoded options")
    
    return(mask)    

def np_table(arr):
    unique_values, counts = np.unique(arr, return_counts=True)
    np_table = pd.DataFrame({"unique_values" : unique_values,
                            "counts" : counts})
    return(np_table)
    

def gate_thymus_histocytometry(img_dir, img_name, cell_type, median_kernel_size= 21):
    ## I should think about doing kwargs for make_adaptive_binary_img parameters.

    img_arr = tifffile.imread(os.path.join(img_dir, img_name))
    adaptive_binary_img = make_adaptive_binary_gate(img_arr, median_kernel_size= median_kernel_size)
    print("Done with binary mask!")

    cell_type_mask = make_cell_type_mask(adaptive_binary_img, cell_type)
    cell_type_mask = cell_type_mask * 255
        ## Converting 0 and 1 to more dynamic color range.
    cell_type_mask = cell_type_mask.astype("uint8")
    print("Done with cell type mask!")

    print(np_table(cell_type_mask))

    ## Make cell type directory for easier use with MCQuant later.
    out_path = os.path.join(img_dir, "cell_type_" + cell_type)
    if not os.path.exists(out_path):
        os.mkdir(out_path)

    tifffile.imwrite(os.path.join(out_path, cell_type + "_mask.tif"), cell_type_mask)

        ## I'll run this through MCquant to get the cell count and area 
        ## I can use the area to filter out garbage "cells" and count easier than doing it by hand. 
            ## There may be an easier way to do this with FIJI.


## Running thymus histocytometry images
### Preparing data

In [8]:
raw_dir  = "/stor/scratch/Ehrlich/Users/John/histocytometry/raw_images"
img_dirs = [os.path.join(raw_dir, my_dir) for my_dir in os.listdir(raw_dir)]

DAPI_CD63_CD11c_Sirpa_dirs    = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_Sirpa",   img_dir)]
DAPI_CD63_CD11c_XCR1_dirs     = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_XCR1",    img_dir)]
DAPI_CD63_CD11c_MerTK_dirs    = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_MerTK",   img_dir)]
DAPI_CD63_CD11c_CD14_dirs     = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_CD14",    img_dir)]
DAPI_CD63_CD11c_SiglecH_dirs  = [img_dir for img_dir in img_dirs if re.search("DAPI_CD63_CD11c_SiglecH", img_dir)]

## Testing kernel

In [12]:
## Moving this code out of the way 
if False:
    img_arr= tifffile.imread(os.path.join(raw_dir, "40x_Cortex_DAPI_CD63_CD11c_XCR1/sub_image.ome.tif"))
    median_kernel_size= 31
    adaptive_subtraction= 0
    adaptive_kernel_size= 21
    morphology_kernel = (15,15)

    num_channels = np.shape(img_arr)[0] 


    threshold_list = [None] * (num_channels -1)
            ## Removing DAPI from morphological masks

    for i in range(1, num_channels):
        slice = img_arr[i,...]
            
        ## OpenCV documentation recommended blurring prior to adaptive thresholding
        blurred = cv2.medianBlur(slice, median_kernel_size)
    #     blurred = cv2.GaussianBlur(blurred, (11,11), 0)
    #     threshold_img = cv2.bilateralFilter(threshold_img, 9, 10, 10)
            
        threshold_img = cv2.adaptiveThreshold(blurred, 
                                                255, 
                                                cv2.ADAPTIVE_THRESH_GAUSSIAN_C, ## I think Gaussian looks significantly better than mean for cortical images.
                                                cv2.THRESH_BINARY, 
                                                adaptive_kernel_size, 
                                                adaptive_subtraction)
            
        kernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, morphology_kernel)
        morphology_img = cv2.morphologyEx(threshold_img, cv2.MORPH_CLOSE, kernel)

        ## Adding channel dimension for concatenation 
        threshold_list[i-1] = morphology_img

        fig, ax = plt.subplots(1, 3, figsize=(15, 15))
    ax[0].imshow(threshold_list[0])
    ax[1].imshow(threshold_list[1])
    ax[2].imshow(threshold_list[2])

    plt.show()

## Running data

In [9]:
for indiv_dir in DAPI_CD63_CD11c_Sirpa_dirs:
    if re.search("pan", indiv_dir):
        print(indiv_dir)
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "aDC2_Sirpa", median_kernel_size= 31)
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "cDC2",       median_kernel_size= 31)

## I'm going to look at the cell type mask with FIJI. I can quantify the cells and filter based on area and circularity of the cell.

/stor/scratch/Ehrlich/Users/John/histocytometry/raw_images/20x_pan1_DAPI_CD63_CD11c_Sirpa
Done with binary mask!
[0 0 0]
Done with cell type mask!
   unique_values     counts
0              0  172041153
1            255     643841
Done with binary mask!
[0 0 0]
Done with cell type mask!
   unique_values     counts
0              0  172556232
1            255     128762


In [15]:
for indiv_dir in DAPI_CD63_CD11c_XCR1_dirs:
    if re.search("pan", indiv_dir):
        print(indiv_dir)
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "aDC1")
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "aDC2_XCR1")
        ## Is this also aDC2? 

/stor/scratch/Ehrlich/Users/John/histocytometry/raw_images/40x_Medulla_DAPI_CD63_CD11c_XCR1
Done with binary mask!
Done with cell type mask!
   unique_values   counts
0              0  3794053
1            255   400251
Done with binary mask!
Done with cell type mask!
   unique_values   counts
0              0  4140488
1            255    53816
/stor/scratch/Ehrlich/Users/John/histocytometry/raw_images/40x_Cortex_DAPI_CD63_CD11c_XCR1
Done with binary mask!
Done with cell type mask!
   unique_values   counts
0              0  3977512
1            255   216792
Done with binary mask!
Done with cell type mask!
   unique_values   counts
0              0  4161557
1            255    32747
/stor/scratch/Ehrlich/Users/John/histocytometry/raw_images/20x_pan1_DAPI_CD63_CD11c_XCR1
Done with binary mask!
Done with cell type mask!
   unique_values    counts
0              0  86479477
1            255   1986706
Done with binary mask!
Done with cell type mask!
   unique_values    counts
0             

In [None]:
for indiv_dir in DAPI_CD63_CD11c_MerTK_dirs:
    if re.search("pan", indiv_dir):
        print(indiv_dir)
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "macrophage")

In [None]:
for indiv_dir in DAPI_CD63_CD11c_CD14_dirs:
    if re.search("pan", indiv_dir):
        print(indiv_dir)
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "monoDC2")

In [None]:
for indiv_dir in DAPI_CD63_CD11c_SiglecH_dirs:
    if re.search("pan", indiv_dir):
        print(indiv_dir)
        gate_thymus_histocytometry(img_dir = indiv_dir, img_name = "sub_image.ome.tif", cell_type = "pDC")

In [None]:
## Loop through raw dirs
## Loop through cell type directories for each image. 
## Run MCQuant on each image and cell type to extract the data 
## Combine cell type csv files 
## Filter out cells that are below a certain size 
## Overlay the masks onto the original image. 