# Segmentation and Feature Extraction

### 1. Create Cell Masks (From .dat Annotation)
Purpose: For each cropped image, generate a binary mask (nucleus/cytoplasm) based on its .dat polygon.

In [11]:
import cv2
import numpy as np
from pathlib import Path
from tqdm import tqdm

BASE = Path("../")  # navigating up from Image Preprocessing/
PREPROCESSED = BASE / "Dataset/Preprocessed Dataset"
RAW = BASE / "Dataset/Raw Dataset/SipakMed Dataset"
MASKS = PREPROCESSED / "Masks"
MASKS.mkdir(exist_ok=True)

CELL_CLASSES = [
    "im_Dyskeratotic", "im_Koilocytotic", "im_Metaplastic",
    "im_Parabasal", "im_Superficial-Intermediate"
]

for cell_type in CELL_CLASSES:
    crop_dir = PREPROCESSED / cell_type
    raw_dir = RAW / cell_type / cell_type
    mask_dir = MASKS / cell_type
    mask_dir.mkdir(exist_ok=True, parents=True)
    for img_path in tqdm(list(crop_dir.glob("*.png"))):
        img_id_label = img_path.stem
        img_id = img_id_label.split("_")[0]
        label = "cyt" if "cyt" in img_id_label else "nuc"
        dat_file = raw_dir / f"{img_id}_{label}.dat"
        if not dat_file.exists():
            continue
        coords = [list(map(float, l.strip().split(','))) for l in open(dat_file) if ',' in l]
        if len(coords) < 3:
            continue
        poly = np.array(coords, np.int32)
        mask = np.zeros((224,224), np.uint8)
        cv2.fillPoly(mask, [poly], 255)
        cv2.imwrite(str(mask_dir / f"{img_path.stem}_mask.png"), mask)


  0%|          | 0/1626 [00:00<?, ?it/s]

100%|██████████| 1626/1626 [00:00<00:00, 17947.16it/s]
  0%|          | 0/1650 [00:00<?, ?it/s]
100%|██████████| 1650/1650 [00:00<00:00, 13285.40it/s]
  0%|          | 0/1586 [00:00<?, ?it/s]
100%|██████████| 1586/1586 [00:00<00:00, 20838.55it/s]
100%|██████████| 1586/1586 [00:00<00:00, 20838.55it/s]
100%|██████████| 1574/1574 [00:00<00:00, 23604.62it/s]
100%|██████████| 1574/1574 [00:00<00:00, 23604.62it/s]
100%|██████████| 1662/1662 [00:00<00:00, 21704.94it/s]
100%|██████████| 1662/1662 [00:00<00:00, 21704.94it/s]
