# Resizing and Labelling of Hematoxylin Images with IBD Subtype

In [33]:
import os
import cv2
import numpy as np
from PIL import Image
import pandas as pd

In [35]:
# RESIZE TO 224x224 FOR VGG16
def resizeImages(image_path, label, output_dir, image_size=224):
    image = cv2.imread(image_path)
    resized_image = cv2.resize(image, (image_size,image_size), interpolation=cv2.INTER_AREA)

    slide_id = os.path.splitext(os.path.basename(image_path))[0]

    image_filename = f"{slide_id}_resized.jpg"
    image_path = os.path.join(output_dir, image_filename)
    cv2.imwrite(image_path, resized_image)
    
    image_records=[{
        "slide_id": slide_id.removesuffix("_cropped"),
        "label": label,
        "filename": image_filename
    }]

    return resized_image, image_records

In [37]:
input_path = r"\P20483_patient_23\P20483_patient_23\output_hema"
output_path = r"\P20483_patient_23\P20483_patient_23\resized_whole_hema_patient23"
patient_data_df = pd.read_excel(r"\Pediatric_IBD_StructuredData_BothRaters.xlsx", sheet_name="Patients")

valid_ids = ["11"]

all_records = []
for filename in os.listdir(input_path):
    if filename.endswith("_cropped_hematoxylin.jpg"):
        slide_filename = os.path.splitext(filename)[0]
        slide_path = os.path.join(input_path, filename)

        patient_id = slide_filename.split("_")[0]
        if patient_id not in valid_ids:
            continue

        # match patient ID to row
        match_row = patient_data_df[(patient_data_df['patient_id'].astype(str).str.zfill(2)) == patient_id]
        row = match_row.iloc[0]
        diagnosis = row['diagnosis'].strip().lower()
        label = '0' if diagnosis == 'ulcerative colitis' else '1'

        slide_output_dir = os.path.join(output_path, label)
        os.makedirs(slide_output_dir, exist_ok=True)
    
        resized_image, image_records = resizeImages(slide_path, label=label, output_dir=slide_output_dir)
        '''output_filename = f"{slide_filename}_resized.jpg"
        output_path = os.path.join(slide_output_dir, output_filename)
        cv2.imwrite(output_path, resized_image)'''
    
        all_records.extend(image_records)

# convert list of dicts to DF
records_df = pd.DataFrame(all_records)
records_df.to_csv(r"\image_metadata_IBDtype23.csv", index=False)
print("CSV saved as image_metadata.csv")

CSV saved as image_metadata.csv
