In [1]:
import numpy as np
import h5py
import sys

sys.path.append("../src")
from Load_data.prepare_data import *

In [2]:
def build_or_load_binary_any_dataset(
    cache_path,
    train_image_dir,
    test_image_dir,
    train_xml,
    test_xml,
    img_size=(64, 64)
):
    os.makedirs(os.path.dirname(cache_path), exist_ok=True)

    if os.path.exists(cache_path):
        print(f"‚ö° Loading cache: {cache_path}")
        with h5py.File(cache_path, "r") as f:
            X_train = f["X_train"][:]
            Y_train = f["Y_train"][:]
            X_test  = f["X_test"][:]
            Y_test  = f["Y_test"][:]
        return X_train, Y_train, X_test, Y_test

    print("üõ†Ô∏è Building BINARY_ANY dataset")

    # ---- TRAIN ----
    X_train, fn_train = load_and_resize_images(train_image_dir, img_size)
    raw_train = parse_cvat_xml_all_labels(train_xml)
    labels_train = label_Y_binary(raw_train)
    Y_train = build_label_array(fn_train, labels_train)

    # ---- TEST ----
    X_test, fn_test = load_and_resize_images(test_image_dir, img_size)
    raw_test = parse_cvat_xml_all_labels(test_xml)
    labels_test = label_Y_binary(raw_test)
    Y_test = build_label_array(fn_test, labels_test)

    with h5py.File(cache_path, "w") as f:
        f.create_dataset("X_train", data=X_train, compression="gzip")
        f.create_dataset("Y_train", data=Y_train, compression="gzip")
        f.create_dataset("X_test",  data=X_test,  compression="gzip")
        f.create_dataset("Y_test",  data=Y_test,  compression="gzip")

    print(f"‚úÖ Cached to {cache_path}")
    return X_train, Y_train, X_test, Y_test


In [None]:
X_train, Y_train, X_test, Y_test = build_or_load_binary_any_dataset(
    cache_path="../EIDSeg_Dataset/cache/eidseg_64x64_binary_any.h5",
    train_image_dir="../EIDSeg_Dataset/data/train/images/default",
    test_image_dir ="../EIDSeg_Dataset/data/test/images/default",
    train_xml="../EIDSeg_Dataset/data/train/train.xml",
    test_xml ="../EIDSeg_Dataset/data/test/test.xml",
    img_size=(64, 64)
)


üõ†Ô∏è Building BINARY_ANY dataset


In [None]:
def build_or_load_destroyed_coverage_dataset(
    cache_path,
    train_image_dir,
    test_image_dir,
    train_xml,
    test_xml,
    img_size=(64, 64),
    min_coverage=0.3
):
    os.makedirs(os.path.dirname(cache_path), exist_ok=True)

    if os.path.exists(cache_path):
        print(f"‚ö° Loading cache: {cache_path}")
        with h5py.File(cache_path, "r") as f:
            X_train = f["X_train"][:]
            Y_train = f["Y_train"][:]
            X_test  = f["X_test"][:]
            Y_test  = f["Y_test"][:]
        return X_train, Y_train, X_test, Y_test

    print("üõ†Ô∏è Building DESTROYED_COVERAGE dataset")

    # ---- TRAIN ----
    X_train, fn_train = load_and_resize_images(train_image_dir, img_size)
    labels_train = parse_destroyed_with_size_check(
        train_xml, min_coverage=min_coverage
    )
    Y_train = build_label_array(fn_train, labels_train)

    # ---- TEST ----
    X_test, fn_test = load_and_resize_images(test_image_dir, img_size)
    labels_test = parse_destroyed_with_size_check(
        test_xml, min_coverage=min_coverage
    )
    Y_test = build_label_array(fn_test, labels_test)

    with h5py.File(cache_path, "w") as f:
        f.create_dataset("X_train", data=X_train, compression="gzip")
        f.create_dataset("Y_train", data=Y_train, compression="gzip")
        f.create_dataset("X_test",  data=X_test,  compression="gzip")
        f.create_dataset("Y_test",  data=Y_test,  compression="gzip")

    print(f"‚úÖ Cached to {cache_path}")
    return X_train, Y_train, X_test, Y_test


In [None]:
X_train, Y_train, X_test, Y_test = build_or_load_destroyed_coverage_dataset(
    cache_path="../EIDSeg_Dataset/cache//eidseg_64x64_destroyed_cov30.h5",
    train_image_dir="../EIDSeg_Dataset/data/train/images/default",
    test_image_dir ="../EIDSeg_Dataset/data/test/images/default",
    train_xml="../EIDSeg_Dataset/data/train/train.xml",
    test_xml ="../EIDSeg_Dataset/data/test/test.xml",
    img_size=(64, 64),
    min_coverage=0.3
)
