In [None]:
import os
import shutil
from pathlib import Path
from tqdm import tqdm

# === CONFIGURATION ===
ROOT_DIR = Path("../data/data_primary/processed/split")
LABEL_SOURCE_DIR = Path("../data/data_primary/labels")  # JSON labels folder
SPLITS = ["train", "val", "test"]

for split in SPLITS:
    print(f"\nProcessing split: {split}")

    split_dir = ROOT_DIR / split
    image_dir = split_dir / "images"
    label_dir = split_dir / "labels"

    # Create labels directory if not exists
    os.makedirs(label_dir, exist_ok=True)

    # Loop through post-disaster images only
    image_files = list(image_dir.glob("*_post_disaster.png"))

    for img_path in tqdm(image_files, desc=f"Copying labels for {split}"):
        stem = img_path.stem  # e.g. hurricane-florence_00000000_post_disaster
        label_name = f"{stem}.json"
        label_source_path = LABEL_SOURCE_DIR / label_name
        label_dest_path = label_dir / label_name

        if label_source_path.exists():
            shutil.copy(label_source_path, label_dest_path)
        else:
            print(f"Label not found for: {img_path.name}")

print("\nDone.")


🔄 Processing split: train


Copying labels for train: 100%|██████████| 214/214 [00:00<00:00, 2481.22it/s]



🔄 Processing split: val


Copying labels for val: 100%|██████████| 47/47 [00:00<00:00, 3378.62it/s]



🔄 Processing split: test


Copying labels for test: 100%|██████████| 47/47 [00:00<00:00, 3041.41it/s]


✅ Done.





In [12]:
import os
import json
from pathlib import Path
from PIL import Image
from shapely.geometry import shape, Polygon, MultiPolygon
from tqdm import tqdm

# === CONFIGURATION ===
SPLITS = ["train", "val", "test"]
SOURCE_ROOT = Path("../data/data_primary/processed/split")
DEST_ROOT = Path("../data/data_primary_yolo")
CLASS_MAPPING = {
    'no-damage': 1,
    'minor-damage': 0,
    'major-damage': 0,
    'destroyed': 0
}
IMAGE_EXTENSION = ".png"

# === Create Output Folder Structure ===
for split in SPLITS:
    (DEST_ROOT / "labels" / split).mkdir(parents=True, exist_ok=True)

# === Main Processing Loop ===
for split in SPLITS:
    label_dir = SOURCE_ROOT / split / "labels"
    image_dir = SOURCE_ROOT / split / "images"
    output_dir = DEST_ROOT / "labels" / split

    if not label_dir.exists():
        continue

    label_files = list(label_dir.glob("*.json"))

    for label_path in tqdm(label_files, desc=f"Converting {split}"):
        base_stem = label_path.stem
        image_path = image_dir / f"{base_stem}{IMAGE_EXTENSION}"

        if not image_path.exists():
            continue

        with open(label_path, "r") as f:
            data = json.load(f)

        features = data.get("features", [])
        if not isinstance(features, list):
            continue

        with Image.open(image_path) as img:
            width, height = img.size

        label_lines = []

        for feature in features:
            if not isinstance(feature, dict):
                continue

            props = feature.get("properties", {})
            geom = shape(feature.get("geometry", {}))
            subtype = props.get("subtype")

            if subtype not in CLASS_MAPPING:
                continue

            class_id = CLASS_MAPPING[subtype]

            geometries = [geom] if isinstance(geom, Polygon) else list(geom.geoms) if isinstance(geom, MultiPolygon) else []

            for poly in geometries:
                coords = list(poly.exterior.coords)
                if len(coords) < 3:
                    continue

                norm_coords = []
                for x, y in coords:
                    x_norm = x / width
                    y_norm = y / height
                    norm_coords.append(f"{x_norm:.6f} {y_norm:.6f}")

                label_line = f"{class_id} " + " ".join(norm_coords)
                label_lines.append(label_line)

        if label_lines:
            output_path = output_dir / f"{base_stem}.txt"
            with open(output_path, "w") as f:
                f.write("\n".join(label_lines))

print("\n✅ YOLO label conversion complete.")


Converting train: 100%|██████████| 214/214 [00:00<00:00, 2731.93it/s]
Converting val: 100%|██████████| 47/47 [00:00<00:00, 3283.95it/s]
Converting test: 100%|██████████| 47/47 [00:00<00:00, 3131.22it/s]


✅ YOLO label conversion complete.





In [14]:
import os
import json
from pathlib import Path
from PIL import Image
from shapely import wkt
from tqdm import tqdm

# === CONFIGURATION ===
SPLITS = ["train", "val", "test"]
SOURCE_ROOT = Path("../data/data_primary/processed/split")
DEST_ROOT = Path("../data/data_primary_yolo")
CLASS_MAPPING = {
    "no-damage": 1,
    "minor-damage": 0,
    "major-damage": 0,
    "destroyed": 0,
}
IMAGE_EXTENSION = ".png"

# === Create Folder Structure ===
for split in SPLITS:
    (DEST_ROOT / "labels" / split).mkdir(parents=True, exist_ok=True)

# === Main Conversion ===
for split in SPLITS:
    print(f"\n🔄 Converting split: {split}")
    image_dir = SOURCE_ROOT / split / "images"
    label_dir = SOURCE_ROOT / split / "labels"
    output_dir = DEST_ROOT / "labels" / split

    label_files = list(label_dir.glob("*.json"))

    for label_path in tqdm(label_files, desc=f"{split}"):
        base_name = label_path.stem  # e.g. hurricane-florence_00000064_post_disaster
        image_path = image_dir / f"{base_name}{IMAGE_EXTENSION}"
        if not image_path.exists():
            continue

        # Get image dimensions
        with Image.open(image_path) as img:
            width, height = img.size

        # Load and parse JSON
        with open(label_path, "r") as f:
            data = json.load(f)

        features = data.get("features", {}).get("xy", [])
        label_lines = []

        for item in features:
            subtype = item.get("properties", {}).get("subtype", "no-damage").strip().lower()
            class_id = CLASS_MAPPING.get(subtype)
            if class_id is None:
                continue

            try:
                polygon = wkt.loads(item["wkt"])
            except:
                continue

            if not polygon.is_valid or polygon.is_empty:
                continue

            coords = list(polygon.exterior.coords)
            if len(coords) < 3:
                continue

            norm_coords = [
                f"{x / width:.6f} {y / height:.6f}"
                for x, y in coords
            ]
            yolo_line = f"{class_id} " + " ".join(norm_coords)
            label_lines.append(yolo_line)

        # Write .txt file
        if label_lines:
            out_path = output_dir / f"{base_name}.txt"
            with open(out_path, "w") as f:
                f.write("\n".join(label_lines))

print("\n✅ YOLO polygon label conversion complete.")



🔄 Converting split: train


train: 100%|██████████| 214/214 [00:00<00:00, 450.48it/s]



🔄 Converting split: val


val: 100%|██████████| 47/47 [00:00<00:00, 534.16it/s]



🔄 Converting split: test


test: 100%|██████████| 47/47 [00:00<00:00, 490.81it/s]


✅ YOLO polygon label conversion complete.





In [43]:
import os
from pathlib import Path
import shutil
from tqdm import tqdm

SPLITS = ["train", "val", "test"]
SOURCE_IMAGE_ROOT = Path("../data/data_primary/processed/split")
DEST_ROOT = Path("../data/data_primary_yolo")

for split in SPLITS:
    print(f"\n Processing split: {split}")
    src_img_dir = SOURCE_IMAGE_ROOT / split / "images"
    label_dir = DEST_ROOT / split / "labels"
    dest_img_dir = DEST_ROOT / split / "images"

    # Ensure image folder exists
    dest_img_dir.mkdir(parents=True, exist_ok=True)

    label_files = list(label_dir.glob("*.txt"))

    for label_file in tqdm(label_files, desc=f"{split}"):
        base_name = label_file.stem
        image_path = src_img_dir / f"{base_name}.png"

        if image_path.exists():
            shutil.copy(image_path, dest_img_dir / image_path.name)
        else:
            print(f"Missing image for label: {base_name}.png")



 Processing split: train


train: 100%|██████████| 201/201 [00:00<00:00, 776.48it/s]



 Processing split: val


val: 100%|██████████| 42/42 [00:00<00:00, 637.82it/s]



 Processing split: test


test: 100%|██████████| 43/43 [00:00<00:00, 329.64it/s]
