In [None]:
from openslide import OpenSlide
from PIL import Image
import numpy as np
import os

def extract_tumorous_patches(svs_path, mask_path, output_dir, patch_size_svs):
    # Open the SVS file
    slide = OpenSlide(svs_path)

    # Open the segmentation mask
    mask = Image.open(mask_path).convert("L")
    mask.save(os.path.join(output_dir, "greyscale.png"))

    # Create an output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)

    # Get dimensions of the SVS image
    svs_width, svs_height = slide.dimensions

    # Calculate the scaling factor between the mask and SVS image
    scale_factor_width = svs_width / mask.width
    scale_factor_height = svs_height / mask.height

    # Convert patch size to mask dimensions
    patch_size_mask = (int(patch_size_svs / scale_factor_width), int(patch_size_svs / scale_factor_height))

    # Iterate over patches
    for y in range(0, mask.height, patch_size_mask[1]):
        for x in range(0, mask.width, patch_size_mask[0]):
            # Crop the patch from the mask
            patch_mask = mask.crop((x, y, x + patch_size_mask[0], y + patch_size_mask[1]))

            # Check if all pixels in the patch are tumorous
            if all(pixel == 150 for pixel in patch_mask.getdata()):
                # Convert patch coordinates to SVS dimensions
                x_svs = int(x * scale_factor_width)
                y_svs = int(y * scale_factor_height)

                # Crop the corresponding patch from the SVS file
                patch_svs = slide.read_region((x_svs, y_svs), 0, (patch_size_svs, patch_size_svs))

                # Save the patch
                patch_filename = f"patch_{x_svs}_{y_svs}.png"
                patch_svs.save(os.path.join(output_dir, patch_filename))

    # Close the SVS file
    slide.close()

if __name__ == "__main__":
    svs_file = "/images/PublicDatasets/NSCLC/TCGA_Renal/TCGA_SVS_Collect_COMBINED/TCGA-UW-A7GI-01Z-00-DX1.4A1709FC-F8F3-4509-A011-10CBF1B53DA9.svs"
    mask_file = "/images/PublicDatasets/NSCLC/TCGA_Renal/RCC_Annotated_Patches/complete_region_annotation/0a709187-bb10-4146-80f4-f46894d12a4d.png"
    output_directory = "/images/PublicDatasets/NSCLC/TCGA_Renal/RCC_Annotated_Patches/extracted_patches_annotated"
    patch_size = 512  # Adjust the patch size as needed

    extract_tumorous_patches(svs_file, mask_file, output_directory, patch_size)
