# Full Dataset Tumor Region Cropping

This notebook processes all dataset splits (train, val, test), crops tumor regions using their masks, and saves them into structured folders inside `cropped/` by split and class.

In [1]:
import os
import cv2
import numpy as np
from glob import glob
from tqdm import tqdm

# Base dataset path (update if needed)
base_path = r"D:\IIT\Subjects\(4606)Machine Vision\CW\Develo\DataSet\U-Net\processed\split"
output_base = r"D:\IIT\Subjects\(4606)Machine Vision\CW\Develo\DataSet\U-Net\cropped"

splits = ['train', 'val', 'test']
classes = ['glioma', 'meningioma', 'pituitary']

os.makedirs(output_base, exist_ok=True)


In [2]:
def crop_tumor_region(image, mask):
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return None
    x, y, w, h = cv2.boundingRect(max(contours, key=cv2.contourArea))
    cropped = image[y:y+h, x:x+w]
    return cropped


In [3]:
total_count = 0
for split in splits:
    for cls in classes:
        img_dir = os.path.join(base_path, split, "images", cls)
        msk_dir = os.path.join(base_path, split, "masks", cls)
        save_dir = os.path.join(output_base, split, cls)
        os.makedirs(save_dir, exist_ok=True)

        img_files = sorted(glob(os.path.join(img_dir, "*.png")))
        for file_path in tqdm(img_files, desc=f"{split.upper()} - {cls}"):
            filename = os.path.basename(file_path)
            img = cv2.imread(file_path)
            mask_path = os.path.join(msk_dir, filename)
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

            cropped = crop_tumor_region(img, mask)
            if cropped is not None and cropped.size > 0:
                resized = cv2.resize(cropped, (224, 224))
                cv2.imwrite(os.path.join(save_dir, filename), resized)
                total_count += 1

print(f"✅ Done. Total cropped images saved: {total_count}")


TRAIN - glioma: 100%|██████████| 998/998 [00:03<00:00, 293.23it/s]
TRAIN - meningioma: 100%|██████████| 998/998 [00:04<00:00, 222.30it/s]
TRAIN - pituitary: 100%|██████████| 998/998 [00:04<00:00, 221.81it/s]
VAL - glioma: 100%|██████████| 285/285 [00:01<00:00, 218.68it/s]
VAL - meningioma: 100%|██████████| 285/285 [00:02<00:00, 110.10it/s]
VAL - pituitary: 100%|██████████| 285/285 [00:02<00:00, 120.90it/s]
TEST - glioma: 100%|██████████| 143/143 [00:00<00:00, 282.57it/s]
TEST - meningioma: 100%|██████████| 143/143 [00:01<00:00, 79.67it/s]
TEST - pituitary: 100%|██████████| 143/143 [00:01<00:00, 98.70it/s] 

✅ Done. Total cropped images saved: 4278



