In [None]:
import os
import numpy as np
import math
import cv2
import pathlib
from pathlib import Path
import imageio

## Training and Testdata preparation

In [None]:
def create_overlapping_patches(image, patch_size, overlap):
    img_array = np.array(image)
    patches = []
    coords = []

    step = patch_size - overlap
    height, width = img_array.shape[:2]
    
    for y in range(0, height - patch_size + 1, step):
        for x in range(0, width - patch_size + 1, step):
            patch = img_array[y:y + patch_size, x:x + patch_size]
            coords.append((x,y))
            patches.append(patch)
    return patches

In [None]:
# as described by Weigert et al
def perc_normalization(ip):
    y = np.array(ip)        # gt    

    lower = np.percentile(y,0.1)
    upper = np.percentile(y,99.9)

    y_norm = (y-lower)/(upper-lower)
    y_norm = np.clip(y_norm,0,1)   

    return y_norm*((2**16)-1)

In [None]:
def process_images_in_folder(folder_path, samples):
    """
    Processes all images in the specified folder, splitting them into tiles.
    Use samples to get image names from both noisy and gt 
    """
    # Create output folder for tiles
    test_folder = Path(folder_path.parent.absolute()/'Hagen_testmix')
    test_noisy = os.path.join(test_folder,'noisy_tiles')
    os.makedirs(test_noisy, exist_ok=True)    
    train_noisy = os.path.join(folder_path.parent.absolute(),'Hagen_noisy')
    os.makedirs(train_noisy, exist_ok=True)
    train_actin_noisy = os.path.join(folder_path.parent.absolute(),'Hagen_actin_noisy')
    os.makedirs(train_actin_noisy, exist_ok=True)
    test_gt = os.path.join(test_folder,'gt_tiles')
    os.makedirs(test_gt, exist_ok=True) 
    train_gt = os.path.join(folder_path.parent.absolute(),'Hagen_GT')
    os.makedirs(train_gt, exist_ok=True)
    train_actin_gt = os.path.join(folder_path.parent.absolute(),'Hagen_actin_GT')
    os.makedirs(train_actin_gt, exist_ok=True)

    # Loop through all files in the folder
    files = sorted(os.listdir(folder_path))
    c = 0
    for i in range(0,len(files),2):
        filename = samples[c]
        gt_path = os.path.join(folder_path, files[i])
        gt = imageio.v2.imread(gt_path)
        noisy_path = os.path.join(folder_path, files[i+1])
        noisy = imageio.v2.imread(noisy_path)        
            
        if gt is None or noisy is None:
            print(f"Unable to read image/s:")
            continue
        for j in range(len(gt)):            
            if j==0: 
                print(f"Processing images: {files[i]} and {files[i+1]}")
                gt_images = os.path.join(test_folder,f'gt_images/{filename}')
                os.makedirs(gt_images, exist_ok=True)
                noisy_images = os.path.join(test_folder,f'noisy_images/{filename}')
                os.makedirs(noisy_images, exist_ok=True)
            image_filename = f"{filename}_frame_{j}_.png"       

            scaled_gt = perc_normalization(gt[j])
            scaled_noisy = perc_normalization(noisy[j])            
            
            if j<math.floor(0.9*len(gt)):
                tiles_gt = np.array(create_overlapping_patches(scaled_gt,256,0))         # no overlap for training patches
                tiles_noisy = np.array(create_overlapping_patches(scaled_noisy,256,0))
                ref_tile = np.array(create_overlapping_patches(gt[j],256,0))
            else:
                if np.shape(scaled_gt)[0]==2048:
                    overlap = 128            # possible 32
                elif np.shape(scaled_gt)[0]==1024:
                    overlap = 128            # possible 64
                else:
                    overlap = 128    
                tiles_gt = np.array(create_overlapping_patches(scaled_gt,256,overlap))  # overlap for test patches to adress borders
                tiles_noisy = np.array(create_overlapping_patches(scaled_noisy,256,overlap))
                ref_tile = np.array(create_overlapping_patches(gt[j],256,overlap))
                       
            # save scaled images to get full FOV references
            if j>=math.floor(0.9*len(gt)):               # 90% of data for train, last 10 percent official test set, same below
                cv2.imwrite(os.path.join(noisy_images,image_filename), scaled_noisy.astype(np.uint16))
                cv2.imwrite(os.path.join(gt_images,image_filename), scaled_gt.astype(np.uint16))
                
            # save each tile
            k=0
            for k, tile in enumerate(tiles_gt):
                tile_n = tiles_noisy[k,:,:]
                ref = ref_tile[k,:,:]
                tile_filename = f"{filename}_frame_{j}_tile_{k}.png"                
                #if j<math.floor(0.9*len(gt)) and 'actin-60x-noise1' in tile_filename and np.max(ref)>249: # generate actin training data only
                #    tile_path_gt = os.path.join(train_actin_gt, tile_filename)
                #    tile_path_noisy = os.path.join(train_actin_noisy, tile_filename)
                if j<math.floor(0.9*len(gt)) and np.max(ref)>249:    # generate all training data 
                    tile_path_gt = os.path.join(train_gt, tile_filename)
                    tile_path_noisy = os.path.join(train_noisy, tile_filename)
                elif j>=math.floor(0.9*len(gt)):            
                    tile_path_gt = os.path.join(test_gt, tile_filename)
                    tile_path_noisy = os.path.join(test_noisy, tile_filename)                    
                else:
                    continue
                
                cv2.imwrite(tile_path_gt, tile.astype(np.uint16))
                cv2.imwrite(tile_path_noisy, tile_n.astype(np.uint16))  
        c+=1

In [None]:
# Set the folder path where images are located and the tile size
path = pathlib.Path(os.getcwd())

In [None]:
# Process images of various samples and noise levels in Hagen folder
samples = ['actin-20x-noise1','actin-60x-noise1','actin-60x-noise2','actin-confocal','membrane',
           'mito-20x-noise1','mito-60x-noise1','mito-60x-noise2','mito-confocal','nucleus']
process_images_in_folder(path/'Hagen_rawdata',samples)