### EMTOOLS -- Preprocessing
This notebook preprocesses images for prediction using a DNN trained with Uni-EM. It opens a series of .tiff or .png files in `path_input` and applies CLAHE (Contrast limited adaptive histogram equalization), which enhances the local contrast of images. It then re-saves them as RGB .png files to `path_results`.

**Author:** Philip Ruthig, Paul Flechsig Institute, Center of Neuropathology and Brain Research Leipzig

**Contact:** philip.ruthig@medizin.uni-leipzig.de // philip.ruthig@gmail.com

**Publication:**
Please contact me if you want to use this code for any publication.

In [None]:
import tifffile as tf
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
from os import listdir
from os.path import isfile, join
import skimage
from skimage.transform import downscale_local_mean
import tqdm
import IPython

In [None]:
def better_chunk_generator(img_shape, overlap, resize_chunks=False):
    sizes = [2048, 1024, 512, 256]  # list of possible chunk sizes

    for size in sizes:
        if img_shape[0] >= size and img_shape[1] >= size:
            chunk_size = (size, size)
            break

    y_start = 0
    
    while y_start < img_shape[0]:
        y_end = y_start + chunk_size[0]
        if y_end > img_shape[0]:
            break
        
        x_start = 0
        while x_start < img_shape[1]:
            x_end = x_start + chunk_size[1]
            if x_end > img_shape[1]:
                break
            
            yield (y_start, y_end, x_start, x_end)
            x_start += chunk_size[1] - overlap
        
        y_start += chunk_size[0] - overlap


def chunk_generator(img_shape, chunk_size, overlap, resize_chunks=False):
    y_start = 0
    
    while y_start < img_shape[0]:
        y_end = y_start + chunk_size[0]
        if y_end > img_shape[0]:
            break
        
        x_start = 0
        while x_start < img_shape[1]:
            x_end = x_start + chunk_size[1]
            if x_end > img_shape[1]:
                break
            
            yield (y_start, y_end, x_start, x_end)
            x_start += chunk_size[1] - overlap
        
        y_start += chunk_size[0] - overlap

def crop_background(img, background_threshold):
    '''
    Returns the largest rectangular region within your 2d image that does not contain background.
    Useful if your images contain grid shadows and you want to exclude them from your analysis.
    '''
    rows, cols = img.shape
    max_area = 0
    max_top = max_left = max_bottom = max_right = 0

    # Loop through each element in the 2D array (image)
    for i in range(rows):
        for j in range(cols):
            # Check if the element meets the background threshold condition
            if img[i, j] >= background_threshold:
                top = bottom = i
                left = right = j

                # Expand the region vertically until the background threshold condition is not met
                while bottom < rows and img[bottom, j] >= background_threshold:
                    bottom += 1

                # Expand the region horizontally until the background threshold condition is not met
                while right < cols and np.all(img[i:bottom, right] >= background_threshold):
                    right += 1

                # Calculate the area of the current rectangular region
                area = (bottom - i) * (right - j)

                # Update the maximum area and the coordinates of the maximum rectangular region
                if area > max_area:
                    max_area = area
                    max_top, max_left, max_bottom, max_right = i, j, bottom, right

    # Return the largest rectangular region based on the maximum coordinates
    return img[max_top:max_bottom, max_left:max_right]

def remove_whitespaces(string):
    return "".join(string.split())


In [None]:
# user inputs
path_input = r"0_raw\\"
path_results = r"1_preprocessed\\"
ds = 4 # each axis of the image is downsampled with this factor.
bg_crop = False # set to true if you want your data has black edges which you want to be cropped
background_threshold = 100 # intensity threshold for background - if your image has dark background that you would like to crop, define a threshold for it here. Everything below that threshold is cropped.

In [None]:
path_images = [f for f in listdir(path_input) if isfile(join(path_input, f)) and f != ".gitkeep"]
for i in tqdm.tqdm(range(len(path_images))):
    # print("Preprocessing Images " + str(i))
    if path_images[i].endswith(".tif") or path_images[i].endswith(".tiff") or path_images[i].endswith(".TIF") or path_images[i].endswith(".TIFF"):
        test_img = tf.imread(path_input + str(path_images[i])) # use this for tiff
    elif path_images[i].endswith(".png") or path_images[i].endswith(".PNG"):
        test_img = cv2.imread(path_input + str(path_images[i]),-1) # use this for png 
    else:
        print('Input file format not supported. Use .png or .tif.')
        break
    if bg_crop == True:
        if background_threshold < test_img.min():
            print('cropping background..')
            test_img = crop_background(test_img,background_threshold)

    # if image is small
    if np.array(test_img.shape[0:1]).min() < 2048*ds:
        coord_list = []
        for y_start, y_end, x_start, x_end in chunk_generator(test_img.shape, (1024*ds,1024*ds),0):
            coord_list.append((y_start,y_end,x_start,x_end))
            for zyx in tqdm.tqdm(coord_list): #iterate through image, 
                test_img_clahe = skimage.exposure.equalize_adapthist(test_img[zyx[0]:zyx[1],zyx[2]:zyx[3]],clip_limit=0.01,kernel_size=127)
                test_img_downscaled = downscale_local_mean(test_img_clahe, ds)
                test_img_rgb_png = cv2.merge((downscale_local_mean(test_img_downscaled,1),  #R
                                            downscale_local_mean(test_img_downscaled,1),    #G
                                            downscale_local_mean(test_img_downscaled,1)))   #B
                skimage.io.imsave(path_results + path_images[i][:-4] +  remove_whitespaces(str(zyx)) + ".png", (test_img_rgb_png*255).astype('uint8'))
            continue
    
    # if image is big (at least 2048*ds x 2048*ds shape)
    coord_list = []
    for y_start, y_end, x_start, x_end in chunk_generator(test_img.shape, (2048*ds,2048*ds),0):
        coord_list.append((y_start,y_end,x_start,x_end))
    for zyx in tqdm.tqdm(coord_list): #iterate through image, 
        test_img_clahe = skimage.exposure.equalize_adapthist(test_img[zyx[0]:zyx[1],zyx[2]:zyx[3]],clip_limit=0.01,kernel_size=127)
        test_img_downscaled = downscale_local_mean(test_img_clahe, ds)
        test_img_rgb_png = cv2.merge((downscale_local_mean(test_img_downscaled,1),  #R
                                    downscale_local_mean(test_img_downscaled,1),    #G
                                    downscale_local_mean(test_img_downscaled,1)))   #B
        skimage.io.imsave(path_results + path_images[i][:-4] + remove_whitespaces(str(zyx)) +  ".png", (test_img_rgb_png*255).astype('uint8'))