### EMTOOLS -- Preprocessing
This notebook preprocesses images for prediction using a DNN trained with Uni-EM. It opens a series of .tiff or .png files in `path_input` and applies CLAHE (Contrast limited adaptive histogram equalization), which enhances the local contrast of images. It then re-saves them as RGB .png files to `path_results`.

**Author:** Philip Ruthig, Paul Flechsig Institute, Center of Neuropathology and Brain Research Leipzig

**Contact:** philip.ruthig@medizin.uni-leipzig.de // philip.ruthig@gmail.com

**Publication:**
Please contact me if you want to use this code for any publication.

In [1]:
import tifffile as tf
import cv2
import numpy as np
import tqdm
import matplotlib.pyplot as plt
import pickle
import skimage
import scipy.stats as stats
from os import listdir
from os.path import isfile, join
from skimage.transform import downscale_local_mean

  "class": algorithms.Blowfish,


In [2]:
def batch_generator(image_array, batch_size, overlap):
    img_list = []
    yx_list = []

    for i in range(0, image_array.shape[0] - batch_size + 1, batch_size - overlap):
        for j in range(0, image_array.shape[1] - batch_size + 1, batch_size - overlap):
            if i + batch_size > image_array.shape[0]:
                i = image_array.shape[0] - batch_size
            if j + batch_size > image_array.shape[1]:
                j = image_array.shape[1] - batch_size

            batch = image_array[i:i+batch_size, j:j+batch_size]
            img_list.append(batch)
            yx_list.append((i,i+batch_size,j,j+batch_size))

    return img_list, yx_list

def pad_image(image_array, batch_size, overlap):
    original_shape = image_array.shape

    # Calculate the required padding to achieve the desired size
    pad_rows = max(0, batch_size - overlap - (original_shape[0] % (batch_size - overlap)))
    pad_cols = max(0, batch_size - overlap - (original_shape[1] % (batch_size - overlap)))

    # Calculate the total size based on the desired overlap
    total_rows = original_shape[0] + pad_rows
    total_cols = original_shape[1] + pad_cols

    # Calculate the excess padding beyond the desired size
    excess_rows = total_rows % batch_size
    excess_cols = total_cols % batch_size

    # Adjust the total size to the desired size, accounting for the excess padding
    total_rows -= excess_rows
    total_cols -= excess_cols

    padded_image = np.pad(image_array, ((0, total_rows), (0, total_cols)), mode='reflect')

    return padded_image

def remove_whitespaces(string):
    return "".join(string.split())


In [3]:
# user inputs
path_input = r"0_raw\\"
path_results = r"1_preprocessed\\"
ds = 4 # each axis of the image is downsampled by this factor.
batch_size = 2048 # width and length of each image.
overlap = 200 # overlap of each neighbouring image batch


In [4]:
img_name_list = []
img_coord_list = []
img_original_shape_list = []
img_original_name_list = []

In [5]:
path_images = [f for f in listdir(path_input) if isfile(join(path_input, f)) and f != ".gitkeep"]
for i1 in tqdm.tqdm(range(len(path_images)),position=0,leave=False):
    try:
        if path_images[i1].endswith(".tif") or path_images[i1].endswith(".tiff") or path_images[i1].endswith(".TIF") or path_images[i1].endswith(".TIFF"):
            test_img = tf.imread(path_input + str(path_images[i1])) # use this for tiff
        elif path_images[i1].endswith(".png") or path_images[i1].endswith(".PNG"):
            test_img = cv2.imread(path_input + str(path_images[i1]),-1) # use this for png 
            temp = (test_img[:,:,0]+test_img[:,:,1]+test_img[:,:,2])/3 # average RGB to grayscale
            test_img = temp.astype('uint8')
        else:
            print('Input file format not supported. Use .png or .tif.')
            break
        test_img_ds = downscale_local_mean(test_img, ds)
        test_img_ds_pad = pad_image(test_img_ds, batch_size, overlap)
        img_list,coords = batch_generator(test_img_ds_pad, batch_size, overlap)
        for i2 in tqdm.tqdm(range(len(img_list)),position=1,leave=False):
            test_img_clahe = skimage.exposure.equalize_adapthist(img_list[i2]/np.max(img_list[i2]),clip_limit=0.01,)#kernel_size=127)
            test_img_rgb_png = cv2.merge((downscale_local_mean(test_img_clahe,1),#R
                                        downscale_local_mean(test_img_clahe,1),  #G
                                        downscale_local_mean(test_img_clahe,1))) #B
            skimage.io.imsave(path_results + path_images[i1][:-4] + remove_whitespaces(str(coords[i2])) +  ".png", (test_img_rgb_png*255).astype('uint8'))
            img_name_list.append(path_images[i1][:-4] + remove_whitespaces(str(coords[i2])) +  ".png")
            img_coord_list.append(coords[i2])
            img_original_name_list.append(path_images[i1][:-4])
        img_original_shape_list.append(test_img_ds.shape)
    except:
        print(f"Failed for Image: {path_images[i1]}")
        continue


  0%|          | 0/1 [00:00<?, ?it/s]

                                             

In [6]:
# save metadata as pkl files
with open(r"img_name_list","wb") as fp:
    pickle.dump(img_name_list,fp)

with open(r"img_coord_list","wb") as fp:
    pickle.dump(img_coord_list,fp)

with open(r"img_original_shape_list","wb") as fp:
    pickle.dump(img_original_shape_list,fp)

with open(r"img_original_name_list","wb") as fp:
    pickle.dump(img_original_name_list,fp)

In [7]:
# def crop_background(img, background_threshold):
#     '''
#     Returns the largest rectangular region within your 2d image that does not contain background.
#     Useful if your images contain grid shadows or stitching artifacts and you want to exclude them from your analysis.
#     '''
#     rows, cols = img.shape
#     max_area = 0
#     max_top = max_left = max_bottom = max_right = 0

#     # Loop through each element in the 2D array (image)
#     for i in range(rows):
#         for j in range(cols):
#             # Check if the element meets the background threshold condition
#             if img[i, j] >= background_threshold:
#                 top = bottom = i
#                 left = right = j

#                 # Expand the region vertically until the background threshold condition is not met
#                 while bottom < rows and img[bottom, j] >= background_threshold:
#                     bottom += 1

#                 # Expand the region horizontally until the background threshold condition is not met
#                 while right < cols and np.all(img[i:bottom, right] >= background_threshold):
#                     right += 1

#                 # Calculate the area of the current rectangular region
#                 area = (bottom - i) * (right - j)

#                 # Update the maximum area and the coordinates of the maximum rectangular region
#                 if area > max_area:
#                     max_area = area
#                     max_top, max_left, max_bottom, max_right = i, j, bottom, right

#     # Return the cropped region
#     return img[max_top:max_bottom, max_left:max_right]