# TissueRegionExtractor #

This file contains a function designed to extract only the regions of interest—areas with tissue—from large morphological images.
These extracted regions are then saved as TIFF files and will be used later for training the model.



In [14]:
import os
import openslide
import numpy as np
import cv2

In [19]:
def process_slide(slide_path, output_dir, downsample_factor=32):
    os.makedirs(output_dir, exist_ok=True)

    slide = openslide.OpenSlide(slide_path)
    
    level = slide.get_best_level_for_downsample(downsample_factor)
    
    dims = slide.level_dimensions[level]
    
    region = slide.read_region((0, 0), level, dims)
    region = np.array(region.convert("RGB"))
    
    gray = cv2.cvtColor(region, cv2.COLOR_RGB2GRAY)
    
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    
    _, binary = cv2.threshold(blur, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    
    if np.sum(binary == 255) > np.sum(binary == 0):
        binary = cv2.bitwise_not(binary)
    
    kernel = np.ones((3, 3), np.uint8)
    binary = cv2.morphologyEx(binary, cv2.MORPH_OPEN, kernel, iterations=2)
    
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    
    scale = slide.level_downsamples[level]
    tissue_regions = []
    
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)
        if w * h < 1000: 
            continue
        x_full = int(x * scale)
        y_full = int(y * scale)
        w_full = int(w * scale)
        h_full = int(h * scale)
        tissue_regions.append((x_full, y_full, w_full, h_full))
    
    for idx, (x, y, w, h) in enumerate(tissue_regions):
        tissue = slide.read_region((x, y), 0, (w, h)).convert("RGBA")
        tissue = tissue.convert("RGB")
        output_filename = os.path.join(output_dir, f"tissue_region_{idx}.tiff")
        tissue.save(output_filename, format="TIFF")
        print(f"Saved: {output_filename}")

In [20]:
slide_file = '../data/patomorfologia/1M02.mrxs'
output_folder = '../preprocessedData/tissue_regions/1M02'
process_slide(slide_file, output_folder, downsample_factor=32)

Saved: ../preprocessedData/tissue_regions/1M02\tissue_region_0.tiff
Saved: ../preprocessedData/tissue_regions/1M02\tissue_region_1.tiff
Saved: ../preprocessedData/tissue_regions/1M02\tissue_region_2.tiff
Saved: ../preprocessedData/tissue_regions/1M02\tissue_region_3.tiff
Saved: ../preprocessedData/tissue_regions/1M02\tissue_region_4.tiff


In [21]:
slide_file = '../data/patomorfologia/1M01.mrxs'
output_folder = '../preprocessedData/tissue_regions/1M01'
process_slide(slide_file, output_folder, downsample_factor=32)

Saved: ../preprocessedData/tissue_regions/1M01\tissue_region_0.tiff
Saved: ../preprocessedData/tissue_regions/1M01\tissue_region_1.tiff
Saved: ../preprocessedData/tissue_regions/1M01\tissue_region_2.tiff
