In [1]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("meteahishali/aerial-imagery-for-standing-dead-tree-segmentation")

print("Path to dataset files:", path)

  from .autonotebook import tqdm as notebook_tqdm


Downloading from https://www.kaggle.com/api/v1/datasets/download/meteahishali/aerial-imagery-for-standing-dead-tree-segmentation?dataset_version_number=1...


100%|██████████| 258M/258M [03:38<00:00, 1.24MB/s] 

Extracting files...





Path to dataset files: /Users/evanchan19/.cache/kagglehub/datasets/meteahishali/aerial-imagery-for-standing-dead-tree-segmentation/versions/1


In [1]:
import os
os.getcwd()

'/Users/evanchan19/Desktop/COMP9517/project'

In [4]:
import os
import shutil
import random
from PIL import Image
import numpy as np
import pandas as pd

# 相对路径（你当前在 COMP9517/project 下）
dataset_dir = "data/USA_segmentation"
rgb_dir = os.path.join(dataset_dir, "RGB_images")
nrg_dir = os.path.join(dataset_dir, "NRG_images")
mask_dir = os.path.join(dataset_dir, "masks")

# 创建 train/val 子文件夹
for split in ["train", "val"]:
    for subfolder in ["RGB_images", "NRG_images", "masks"]:
        os.makedirs(os.path.join(dataset_dir, split, subfolder), exist_ok=True)

# 构建 {core_name: full_filename} 映射，例如 core_name = ar037_2019_n_06_04_0.png
def build_core_map(directory, prefix):
    return {
        f[len(prefix):]: f
        for f in os.listdir(directory)
        if f.startswith(prefix) and f.lower().endswith((".png", ".jpg", ".jpeg"))
    }

# 三类文件的映射
rgb_map = build_core_map(rgb_dir, "RGB_")
nrg_map = build_core_map(nrg_dir, "NRG_")
mask_map = build_core_map(mask_dir, "mask_")

# 取三者共有的 core name
matched_core_names = sorted(list(set(rgb_map) & set(nrg_map) & set(mask_map)))
random.seed(42)
random.shuffle(matched_core_names)

# 80/20 拆分
split_index = int(len(matched_core_names) * 0.8)
train_cores = matched_core_names[:split_index]
val_cores = matched_core_names[split_index:]

# 拷贝文件到对应位置
def copy_from_map(core_list, split):
    for core in core_list:
        shutil.copy(os.path.join(rgb_dir, rgb_map[core]), os.path.join(dataset_dir, split, "RGB_images", rgb_map[core]))
        shutil.copy(os.path.join(nrg_dir, nrg_map[core]), os.path.join(dataset_dir, split, "NRG_images", nrg_map[core]))
        shutil.copy(os.path.join(mask_dir, mask_map[core]), os.path.join(dataset_dir, split, "masks", mask_map[core]))

copy_from_map(train_cores, "train")
copy_from_map(val_cores, "val")

# 检查 mask 是否为灰度图并且是二值图（0 或 255）
invalid_masks = []

def validate_masks(mask_folder):
    for fname in os.listdir(mask_folder):
        try:
            img = Image.open(os.path.join(mask_folder, fname)).convert("L")
            arr = np.array(img)
            if arr.ndim != 2 or not np.isin(arr, [0, 255]).all():
                invalid_masks.append(os.path.join(mask_folder, fname))
        except Exception:
            invalid_masks.append(os.path.join(mask_folder, fname))

validate_masks(os.path.join(dataset_dir, "train", "masks"))
validate_masks(os.path.join(dataset_dir, "val", "masks"))

# 输出无效文件（如果有）
if invalid_masks:
    print("\n❌ Invalid mask files (non-binary or wrong format):")
    for f in invalid_masks:
        print(" -", f)
else:
    print("\n✅ All masks valid: grayscale + binary (0/255)")



✅ All masks valid: grayscale + binary (0/255)


In [1]:
import os
import cv2
import numpy as np

def analyze_image_sizes(image_dir):
    heights, widths = [], []
    image_files = sorted([
        f for f in os.listdir(image_dir)
        if f.lower().endswith(('.png', '.jpg', '.jpeg'))
    ])

    for fname in image_files:
        path = os.path.join(image_dir, fname)
        img = cv2.imread(path)
        if img is None:
            print(f"⚠️ Failed to load: {fname}")
            continue
        h, w = img.shape[:2]
        heights.append(h)
        widths.append(w)

    heights = np.array(heights)
    widths = np.array(widths)

    print(f"✅ Scanned {len(heights)} images in: {image_dir}")
    print(f"- Unique image sizes: {set(zip(heights, widths))}")
    print(f"- Average size: {np.mean(widths):.1f} x {np.mean(heights):.1f}")
    print(f"- Min size: {np.min(widths)} x {np.min(heights)}")
    print(f"- Max size: {np.max(widths)} x {np.max(heights)}")

    return widths, heights

# 替换为你要分析的目录（RGB 或 NRG）
image_dir = "/Users/evanchan19/Desktop/COMP9517/project/data/USA_segmentation/train/RGB_images"
analyze_image_sizes(image_dir)


✅ Scanned 355 images in: /Users/evanchan19/Desktop/COMP9517/project/data/USA_segmentation/train/RGB_images
- Unique image sizes: {(393, 410), (376, 398), (422, 421), (384, 412), (354, 380), (326, 349), (318, 336), (365, 394), (372, 379), (386, 378), (348, 363), (385, 366), (462, 487), (414, 413), (344, 364), (380, 378), (390, 410), (547, 547), (522, 563), (445, 448), (352, 363), (357, 348), (454, 417), (362, 358), (356, 363), (378, 384), (357, 379), (358, 379), (366, 395), (329, 352), (568, 614), (442, 470), (339, 366), (350, 345), (541, 528), (461, 449), (321, 342), (349, 371), (424, 414), (321, 354), (307, 338), (342, 357), (343, 361), (362, 360), (419, 417), (350, 362), (379, 421), (416, 425), (324, 354), (370, 363), (458, 457), (340, 333), (401, 425), (365, 358), (415, 443), (447, 475), (439, 430), (366, 405), (343, 358), (364, 374), (352, 342), (333, 343), (338, 345), (360, 398), (315, 340), (361, 399), (466, 463), (433, 450), (353, 389), (387, 370), (390, 408), (334, 366), (356, 

(array([366, 396, 352, 366, 354, 399, 389, 362, 413, 346, 354, 379, 465,
        547, 443, 346, 448, 392, 361, 342, 352, 369, 349, 415, 358, 433,
        370, 360, 396, 564, 364, 347, 411, 343, 379, 398, 392, 403, 468,
        374, 392, 364, 426, 344, 388, 391, 342, 361, 449, 414, 448, 413,
        329, 404, 408, 338, 450, 367, 375, 351, 379, 363, 493, 440, 360,
        418, 392, 373, 359, 363, 354, 356, 355, 353, 390, 368, 371, 470,
        402, 420, 397, 372, 387, 370, 393, 428, 390, 411, 343, 384, 425,
        410, 368, 354, 344, 371, 352, 379, 374, 347, 421, 381, 366, 346,
        362, 466, 380, 363, 410, 362, 408, 379, 371, 432, 343, 398, 317,
        333, 330, 329, 400, 345, 374, 347, 343, 367, 403, 395, 385, 360,
        358, 373, 378, 405, 427, 341, 421, 378, 345, 407, 392, 334, 352,
        343, 395, 362, 349, 354, 371, 443, 380, 345, 365, 376, 342, 427,
        430, 394, 363, 404, 482, 388, 338, 351, 475, 330, 351, 366, 390,
        323, 337, 379, 437, 426, 396, 367, 412, 344