# Cropping and Generation of Hematoxylin Images for Model Development

In [150]:
import os
import cv2
import numpy as np
import SimpleITK as sitk
import matplotlib.pyplot as plt
from skimage.color import rgb2hed, hed2rgb
from PIL import Image

# 1.  Crop images to set pixel fields

In [152]:
def cropImages(input_path, target_size):

    image = cv2.imread(input_path)
    if image is None:
        raise ValueError(f"Failed to load image from {input_path}. Please check the file path or format.")

    image_height, image_width = image.shape[:2]

    # calculate coordinates for center crop
    if image_width > target_size:
        left = (image_width - target_size) // 2
        right = left + target_size
    else:
        left = 0
        right = image_width
        
    if image_height > target_size:
        top = (image_height - target_size) // 2
        bottom = top + target_size
    else:
        top = 0
        bottom = image_height

    image = image[top:bottom, left:right]
        
    # resize to target size in case of stacking issues
    image = cv2.resize(image, (target_size, target_size), interpolation=cv2.INTER_AREA)
    
    return image

# 2. Convert images to multichannel H&E

In [134]:
def convert2HnE(input_path):
    image = cv2.imread(input_path)

    image = image.astype(np.float32) / 255.0
    
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) # convert to RGB
    image_hed = rgb2hed(image_rgb)  # Convert RGB to HED

    null = np.zeros_like(image_hed[:, :, 0])
    hematoxylin = hed2rgb(np.stack((image_hed[:, :, 0], null, null), axis=-1))
    
    return hematoxylin
    
#https://scikit-image.org/docs/0.25.x/auto_examples/color_exposure/plot_ihc_color_separation.html
#https://github.com/EricaLiang09ukk/IBD_Severity_Prediction_project/blob/main/ibd%20process%20image%20to%20multichannel%20again2.ipynb

# MAIN

In [148]:
# paths and directories
input_dir = r"C:\Users\laava\OneDrive\Documents\Queen's University\Master's Degree\CISC 881\Group Project\P20483_patient_23\P20483_patient_23"
cropped_image_dir = os.path.join(input_dir, "cropped_images")
output_hema_dir = os.path.join(input_dir, "output_hema")
output_volume_dir = os.path.join(input_dir, "output_volumes")
os.makedirs(cropped_image_dir, exist_ok=True)
os.makedirs(output_hema_dir, exist_ok=True)
os.makedirs(output_volume_dir, exist_ok=True)

# Main function to get Cropped Hematoxyolin Images only:

In [154]:
def main():

    # CROP IMAGES
    target_size = 1000 #pixel size
    
    for filename in sorted(os.listdir(input_dir)):
        if filename.lower().endswith(".jpg"):
            image_input_path = os.path.join(input_dir, filename)
            cropped_image_path = os.path.join(cropped_image_dir, f"{os.path.splitext(filename)[0]}_cropped.jpg")
            if os.path.exists(cropped_image_path):
                continue
            try:
                cropped_image = cropImages(image_input_path, target_size)
                # save cropped image
                cv2.imwrite(cropped_image_path, cropped_image)
            except Exception as e:
                print(f"Error processing {filename}: {e}")

    # STACK IMAGES
    images = []
    for filename in sorted(os.listdir(cropped_image_dir)):
        if filename.lower().endswith("_cropped.jpg"):
            cropped_image_path = os.path.join(cropped_image_dir, filename)
            try:
                # read image in cv2, use color to preserve H&E
                cropped_image = cv2.imread(cropped_image_path, cv2.IMREAD_COLOR)
         
                # convert image to numpy array and stacks into a volume
                image_array = np.asarray(cropped_image, dtype=np.uint8)
                images.append(image_array)
            except Exception as e:
                print(f"Error processing {filename}: {e}")

    stacked_array = (np.stack(images, axis=0)).astype(np.float32)
    print(f"Volume shape: {stacked_array.shape}")
    histo_volume = sitk.GetImageFromArray(stacked_array, isVector=True) # enable for VectorVolumeNode
    output_volume_path = os.path.join(output_volume_dir, f"{os.path.basename(input_dir)}_volume.nrrd")
    sitk.WriteImage(histo_volume, output_volume_path)
    print(f"Saved volume as: {output_volume_path}")

    # CONVERT THE IMAGES
    for filename in sorted(os.listdir(cropped_image_dir)):
        if filename.lower().endswith("_cropped.jpg"):
            cropped_image_path = os.path.join(cropped_image_dir, filename)
            hematoxylin_image_path = os.path.join(output_hema_dir, f"{os.path.splitext(filename)[0]}_hematoxylin.jpg")
            try:
                h_channel = convert2HnE(cropped_image_path)
                cv2.imwrite(hematoxylin_image_path, h_channel)

                print(f"Saved channel as: {hematoxylin_image_path}")
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    
if __name__ == '__main__':
    main()

Volume shape: (38, 1000, 1000, 3)
Saved volume as: C:\Users\laava\OneDrive\Documents\Queen's University\Master's Degree\CISC 881\Group Project\P20483_patient_23\P20483_patient_23\output_volumes\P20483_patient_23_volume.nrrd
Saved channel as: C:\Users\laava\OneDrive\Documents\Queen's University\Master's Degree\CISC 881\Group Project\P20483_patient_23\P20483_patient_23\output_hema\23_A_DD_A_cropped_hematoxylin.jpg
Saved channel as: C:\Users\laava\OneDrive\Documents\Queen's University\Master's Degree\CISC 881\Group Project\P20483_patient_23\P20483_patient_23\output_hema\23_A_DD_B_cropped_hematoxylin.jpg
Saved channel as: C:\Users\laava\OneDrive\Documents\Queen's University\Master's Degree\CISC 881\Group Project\P20483_patient_23\P20483_patient_23\output_hema\23_A_DD_C_cropped_hematoxylin.jpg
Saved channel as: C:\Users\laava\OneDrive\Documents\Queen's University\Master's Degree\CISC 881\Group Project\P20483_patient_23\P20483_patient_23\output_hema\23_A_DD_D_cropped_hematoxylin.jpg
Saved ch