In [1]:
import os
import numpy as np
from glob import glob
from datetime import datetime

In [2]:
#paths
input_folder = "data/8.477_47.336_8.605_47.417"
output_folder = os.path.join(input_folder, "aggregated-5pct")
os.makedirs(output_folder, exist_ok=True)

# to load and sort files
def extract_date(filename):
    basename = os.path.basename(filename)
    date_str = basename.split("_")[0]
    return datetime.strptime(date_str, "%Y-%m-%d")

file_paths = sorted(glob(os.path.join(input_folder, "*_mask.npy")), key=extract_date)

# assignment priorities
priority_values = [
    np.array([255, 0, 0]),  # red: building
    np.array([0, 0, 255]),  # blue: water
    np.array([0, 255, 0])  # green: green space
]

# helper
def is_valid(pixel):
    # invalid if empty [0, 0, 0] or cloud [255, 255, 255]
    return not (np.all(pixel == [0, 0, 0]) or np.all(pixel == [255, 255, 255]))

def match_priority(value):
    for p in priority_values:
        if np.all(value == p):
            return True
    return False

def fill_pixels(base, candidate):
    filled = base.copy()
    mask_invalid = (np.all(base == [0, 0, 0], axis=-1) | np.all(base == [255, 255, 255], axis=-1))
    
    for p in priority_values:
        mask_candidate = np.all(candidate == p, axis=-1)
        mask = mask_invalid & mask_candidate
        filled[mask] = p
        mask_invalid[mask] = False  # Update remaining invalid pixels
    return filled

In [4]:
# aggregation loop
i = 0
n = len(file_paths)

fill_until = 0.95  # fill image until 95 % are valid pixels

while i < n:
    base = np.load(file_paths[i])
    base_date = extract_date(file_paths[i])
    last_used = base_date

    j = i + 1
    while j < n:
        candidate = np.load(file_paths[j])
        base = fill_pixels(base, candidate)
        valid_pixels = np.array([is_valid(px) for px in base.reshape(-1, 3)])
        if valid_pixels.sum() / len(valid_pixels) >= fill_until:
            last_used = extract_date(file_paths[j])
            break
        last_used = extract_date(file_paths[j])
        j += 1

    # Save result
    out_name = f"{last_used.strftime('%Y-%m-%d')}_aggregated_{int(fill_until*100)}_mask.npy"
    out_path = os.path.join(output_folder, out_name)
    np.save(out_path, base)
    print(f"Saved aggregated file: {out_path}")

    # Advance
    i = j + 1

Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-03-28_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-04-10_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-05-10_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-05-27_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-06-26_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-07-06_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-08-15_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-08-23_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-09-24_aggregated_mask.npy
Saved aggregated file: data/8.477_47.336_8.605_47.417/aggregated-5pct/2017-10-17_aggregated