# Generate masks

This notebook generates segmentation masks from Solafune's annotation JSON file.  
The masks are saved as `.npy` files for efficient loading during model training.

In [2]:
import json
from pathlib import Path

import cv2
import numpy as np
import tifffile
from tqdm import tqdm

In [3]:
data_dir = Path("./data")

In [4]:
train_file_names = [f"train_{i}.tif" for i in range(176)]  # train_0.tif ~ train_175.tif
class_names = ["grassland_shrubland", "logging", "mining", "plantation"]

with open(data_dir / "train_annotations.json", "r") as f:
    raw_annotations = json.load(f)

annotations: dict[str, dict[str, list[list[float]]]] = {}  # file_name -> class_name -> polygons
for fn in tqdm(train_file_names):
    ann: dict[str, list[list[float]]] = {}  # class_name -> polygons
    for class_name in class_names:
        ann[class_name] = []

    for tmp_img in raw_annotations["images"]:
        if tmp_img["file_name"] == fn:
            for tmp_ann in tmp_img["annotations"]:
                ann[tmp_ann["class"]].append(tmp_ann["segmentation"])

    annotations[fn] = ann

100%|██████████| 176/176 [00:00<00:00, 88006.38it/s]


In [5]:
mask_save_dir = data_dir / "train_masks"
mask_save_dir.mkdir(parents=True, exist_ok=True)

for fn in tqdm(train_file_names):
    mask = np.zeros((4, 1024, 1024), dtype=np.uint8)
    anns = annotations[fn]
    for class_idx, class_name in enumerate(class_names):
        polygons = anns[class_name]
        cv2.fillPoly(mask[class_idx], [np.array(poly).astype(np.int32).reshape(-1, 2) for poly in polygons], 255)

    np.save(mask_save_dir / fn.replace(".tif", ".npy"), mask)

100%|██████████| 176/176 [00:00<00:00, 492.48it/s]
