### EMTOOLS -- Preprocessing
This notebook preprocesses images for prediction using a DNN trained with Uni-EM. It opens a series of image files in `path_input`, downsamples them and applies CLAHE (Contrast limited adaptive histogram equalization), which enhances the local contrast of images. It then re-saves them as RGB .png files to `path_results`. Please note you might need to adapt the downsampling factor `ds` depending on your data resolution.

**Author:** Philip Ruthig, Paul Flechsig Institute, Center of Neuropathology and Brain Research Leipzig

**Contact:** philip.ruthig@medizin.uni-leipzig.de // philip.ruthig@gmail.com

**Publication:**
Please contact me if you want to use this code for any publication.

In [None]:
import tifffile as tf
import cv2
import numpy as np
import tqdm
import matplotlib.pyplot as plt
import pickle
import skimage
import scipy.stats as stats
from os import listdir
from os.path import isfile, join
from skimage.transform import downscale_local_mean

In [None]:
def batch_generator(image_array, batch_size, overlap):
    img_list = []
    yx_list = []

    for i in range(0, image_array.shape[0] - batch_size + 1, batch_size - overlap):
        for j in range(0, image_array.shape[1] - batch_size + 1, batch_size - overlap):
            if i + batch_size > image_array.shape[0]:
                i = image_array.shape[0] - batch_size
            if j + batch_size > image_array.shape[1]:
                j = image_array.shape[1] - batch_size

            batch = image_array[i:i+batch_size, j:j+batch_size]
            img_list.append(batch)
            yx_list.append((i,i+batch_size,j,j+batch_size))

    return img_list, yx_list

def remove_whitespaces(string):
    return "".join(string.split())


In [None]:
# user inputs
path_input = r"0_raw\\"
path_results = r"1_preprocessed\\"
ds = 4 # each axis of the image is downsampled by this factor.
batch_size = 2048 # width and length of each resulting image.
overlap = 200 # overlap of each neighbouring image batch


In [None]:
img_name_list = []
img_coord_list = []
img_original_shape_list = []
img_original_name_list = []

In [None]:
path_images = [f for f in listdir(path_input) if isfile(join(path_input, f)) and f != ".gitkeep"]
for i1 in tqdm.tqdm(range(len(path_images))):
    print(f"processing {path_images[i1]}")
    if path_images[i1].endswith(".tif") or path_images[i1].endswith(".tiff") or path_images[i1].endswith(".TIF") or path_images[i1].endswith(".TIFF"):
        test_img = tf.imread(path_input + str(path_images[i1])) # use this for tiff
        if len(test_img.shape)==3: # if img is RGB
            temp = (test_img[:,:,0]+test_img[:,:,1]+test_img[:,:,2])/3 # average RGB to grayscale
            test_img = temp.astype('uint8')
    elif path_images[i1].endswith(".png") or path_images[i1].endswith(".PNG"):
        test_img = cv2.imread(path_input + str(path_images[i1]),-1) # use this for png 
        try: # average RGB to grayscale
            test_img = (test_img[:,:,0]+test_img[:,:,1]+test_img[:,:,2])/3
        except: # if img is grayscale already
            print("")
        test_img = test_img.astype('uint8')
    else:
        print('Input file format not supported. Use .png or .tif.')
        break
    test_img_ds = downscale_local_mean(test_img, ds)
    img_original_shape_list.append(test_img_ds.shape)
    test_img_ds_pad = np.pad(test_img_ds, batch_size, mode='reflect')
    img_list,coords = batch_generator(test_img_ds_pad, batch_size, overlap)
    for i2 in tqdm.tqdm(range(len(img_list))):
        test_img_clahe = skimage.exposure.equalize_adapthist(img_list[i2]/np.max(img_list[i2]),clip_limit=0.01,kernel_size=127)
        test_img_rgb_png = cv2.merge((downscale_local_mean(test_img_clahe,1),#R
                                    downscale_local_mean(test_img_clahe,1),  #G
                                    downscale_local_mean(test_img_clahe,1))) #B
        skimage.io.imsave(path_results + path_images[i1][:-4] + remove_whitespaces(str(coords[i2])) +  ".png", (test_img_rgb_png*255).astype('uint8'))
        img_name_list.append(path_images[i1][:-4] + remove_whitespaces(str(coords[i2])) +  ".png")
        img_coord_list.append(coords[i2])
        img_original_name_list.append(path_images[i1][:-4])
    # break


In [None]:
# save metadata as pkl files
with open(r"img_name_list","wb") as fp:
    pickle.dump(img_name_list,fp)

with open(r"img_coord_list","wb") as fp:
    pickle.dump(img_coord_list,fp)

with open(r"img_original_shape_list","wb") as fp:
    pickle.dump(img_original_shape_list,fp)

with open(r"img_original_name_list","wb") as fp:
    pickle.dump(img_original_name_list,fp)

with open(r"batch_size","wb") as fp:
    pickle.dump(batch_size,fp)