<a href="https://colab.research.google.com/github/MatP-DS/MasterThesis/blob/main/npz_pipeline_y_labels_full.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install rasterio

import numpy as np
import rasterio
# Safely mount Google Drive only if not mounted
import os
from google.colab import drive

if not os.path.ismount('/content/drive'):
    drive.mount('/content/drive')

In [None]:
from rasterio.warp import reproject, Resampling

# -------------------------
# Step 1: Paths
# -------------------------
label_raster_path = "/content/drive/MyDrive/MasterThesis/01_raw_data/03_groundtruth_hansen/gfc_loss_clean_2023.tif"
ref_image_path = "/content/drive/MyDrive/MasterThesis/01_raw_data/01_sentinel2/full_feature_stack/S2_full_2020_02.tif"
output_path = "/content/drive/MyDrive/MasterThesis/02_preprocessed_data/y_labels_2023_fromstack.npz"

# -------------------------
# Step 2: Load reference image for valid mask and target shape
# -------------------------
with rasterio.open(ref_image_path) as ref:
    ref_img = ref.read().astype(np.float32)  # shape: (bands, rows, cols)
    ref_meta = ref.meta
    target_shape = (ref_meta["height"], ref_meta["width"])
    target_transform = ref_meta["transform"]
    valid_mask = np.all(~np.isnan(ref_img), axis=0) & (np.sum(ref_img, axis=0) != 0)

# -------------------------
# Step 3: Load and resample loss raster to match reference
# -------------------------
with rasterio.open(label_raster_path) as src:
    loss_raw = src.read(1).astype(np.uint8)
    loss_resampled = np.zeros(target_shape, dtype=np.uint8)

    reproject(
        source=loss_raw,
        destination=loss_resampled,
        src_transform=src.transform,
        src_crs=src.crs,
        dst_transform=target_transform,
        dst_crs=ref_meta["crs"],
        resampling=Resampling.nearest
    )

# -------------------------
# Step 4: Extract labels using valid pixel mask
# -------------------------
y_labels = loss_resampled[valid_mask]
y_labels = np.where(y_labels == 1, 1, 0).astype(np.uint8)

# -------------------------
# Step 5: Save
# -------------------------
np.savez_compressed(output_path, y=y_labels)
print(f"✅ Saved: {y_labels.shape} labels to {output_path}")

✅ Saved: (466708,) labels to /content/drive/MyDrive/MasterThesis/02_preprocessed_data/y_labels_2023_fromstack.npz
