In [None]:
import requests
import pandas as pd
import os
import phidown
print(phidown.__version__)
from phidown.search import CopernicusDataSearcher
from pathlib import Path
from omnicloudmask import predict_from_load_func, load_s2
import rasterio
from rasterio.mask import mask
from shapely import wkt
import xarray as xr
import numpy as np

# Catalog search / Phidown

In [None]:
aoi="POLYGON((-153.5 70.5,-153 70.5,-153 71,-153.5 71,-153.5 70.5))"

searcher = CopernicusDataSearcher()
# Configure the search parameters
searcher.query_by_filter(
    collection_name='SENTINEL-2',
    product_type='S2MSI2A',
    orbit_direction=None,
    cloud_cover_threshold=40,
    aoi_wkt=None,
    start_date='2018-07-01T00:00:00',
    end_date='2018-08-30T04:00:00',
    attributes={
        'tileId': '05WMU', 
    },
    top=1000,
)

results = searcher.execute_query()

print(f"Number of results: {len(results)}")
# Display the first few rows of the DataFrame
searcher.display_results(top_n=15)

# Download cloudy scene / Phidown

In [None]:
from phidown.search import CopernicusDataSearcher
searcher.download_product('S2B_MSIL2A_20220721T222539_N0510_R015_T05WMU_20240712T010752.SAFE',   # set config file name to .s5cfg for automated authentitifaction
                        output_dir='/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/raw_2018')

# SAFE to Tiff with clipping

In [None]:
import os
import glob
import numpy as np
import rasterio
from rasterio.mask import mask
from rasterio.warp import reproject, Resampling
from shapely import wkt
from shapely.ops import transform
import pyproj

# CONFIG --------------------
SAFE_PATH = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/raw_2018/S2B_MSIL2A_20220721T222539_N0510_R015_T05WMU_20240712T010752.SAFE"
AOI_WKT = "POLYGON((-154 70,-153 70,-153 71,-154 71,-154 70))"
OUTPUT_TIFF = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/masking_tests/Geotiffs_masking/cloudtest_clipped_new.tif"

BANDS = {
    "B02_10m": "10m",
    "B03_10m": "10m",
    "B04_10m": "10m",
    "B08_10m": "10m",
    "B11_20m": "20m",
    "B12_20m": "20m"
}

# HELPERS -------------------

def find_img_data_path(safe_path, res):
    granule_dir = os.path.join(safe_path, "GRANULE")
    granule_name = os.listdir(granule_dir)[0]
    return os.path.join(granule_dir, granule_name, "IMG_DATA", f"R{res}")

def reproject_geom(geom_wkt, raster_crs):
    geom = wkt.loads(geom_wkt)
    transformer = pyproj.Transformer.from_crs("EPSG:4326", raster_crs, always_xy=True).transform
    return transform(transformer, geom)

def clip_and_resample(path, aoi_geom, ref_profile):
    with rasterio.open(path) as src:
        clipped, clipped_transform = mask(src, [aoi_geom], crop=True)
        clipped_profile = src.profile.copy()
        clipped_profile.update({
            "height": clipped.shape[1],
            "width": clipped.shape[2],
            "transform": clipped_transform
        })

        # If this band matches reference resolution ‚Üí no resample needed
        if (src.res[0] == ref_profile["transform"][0]):
            return clipped[0], clipped_profile

        # Resample to reference grid
        dst = np.empty((ref_profile["height"], ref_profile["width"]), dtype=clipped.dtype)
        reproject(
            source=clipped,
            destination=dst,
            src_transform=clipped_transform,
            src_crs=src.crs,
            dst_transform=ref_profile["transform"],
            dst_crs=ref_profile["crs"],
            resampling=Resampling.bilinear
        )
        return dst, ref_profile


# MAIN -------------------

def process_bands(safe_path, bands, aoi_wkt, output_tif):

    # detect CRS from 10m band
    example_band = glob.glob(os.path.join(find_img_data_path(safe_path, "10m"), "*_B02_10m.jp2"))[0]
    with rasterio.open(example_band) as ref:
        raster_crs = ref.crs

    # Reproject AOI
    aoi_geom = reproject_geom(aoi_wkt, raster_crs)

    arrays = []
    ref_profile = None

    for band_key, res in bands.items():
        img_data_path = find_img_data_path(safe_path, res)
        band_path = glob.glob(os.path.join(img_data_path, f"*_{band_key}.jp2"))[0]

        # Initialize reference profile using first 10m band
        if ref_profile is None and res == "10m":
            with rasterio.open(band_path) as ref:
                clipped, transform = mask(ref, [aoi_geom], crop=True)
                ref_profile = ref.profile.copy()
                ref_profile.update({
                    "height": clipped.shape[1],
                    "width": clipped.shape[2],
                    "transform": transform,
                    "count": len(bands)
                })

        band_array, _ = clip_and_resample(band_path, aoi_geom, ref_profile)
        arrays.append(band_array)

    # Stack into multiband
    stack = np.stack(arrays)

    with rasterio.open(output_tif, "w", **ref_profile) as dst:
        dst.write(stack)

    print(f"‚úÖ Saved: {output_tif}")

# RUN --------------------
process_bands(SAFE_PATH, BANDS, AOI_WKT, OUTPUT_TIFF)


# SAFE to Tiff with clipping & SCL masking integrated

In [None]:
import os
import glob
import numpy as np
import rasterio
from rasterio.mask import mask
from rasterio.warp import reproject, Resampling
from shapely import wkt
from shapely.ops import transform
import pyproj

# ------------------------------------------------------
# CONFIG
# ------------------------------------------------------
SAFE_PATH = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/raw_2018/S2B_MSIL2A_20220721T222539_N0510_R015_T05WMU_20240712T010752.SAFE"
AOI_WKT = "POLYGON((-154 70,-153 70,-153 71,-154 71,-154 70))"
OUTPUT_TIFF = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/masking_tests/Geotiffs_masking/cloudtest_clipped_scl_new.tif.tif"

BANDS = {
    "B02_10m": "10m",
    "B03_10m": "10m",
    "B04_10m": "10m",
    "B08_10m": "10m",
    "B11_20m": "20m",
    "B12_20m": "20m"
}

# SCL CLASSES TO MASK (invalid pixels)
SCL_INVALID = [3, 7, 8, 9, 10, 11]  
# 3 = cloud shadow, 7 = unclassified, 8 = medium cloud,
# 9 = high cloud, 10 = thin cirrus, 11 = snow/ice


# ------------------------------------------------------
# HELPERS
# ------------------------------------------------------

def find_img_data_path(safe_path, res):
    granule_dir = os.path.join(safe_path, "GRANULE")
    granule_name = os.listdir(granule_dir)[0]
    return os.path.join(granule_dir, granule_name, "IMG_DATA", f"R{res}")


def reproject_geom(geom_wkt, raster_crs):
    geom = wkt.loads(geom_wkt)
    transformer = pyproj.Transformer.from_crs(
        "EPSG:4326", raster_crs, always_xy=True
    ).transform
    return transform(transformer, geom)


def clip_and_resample(path, aoi_geom, ref_profile):
    """Read a JP2 file, clip to AOI, and resample to reference profile."""
    with rasterio.open(path) as src:
        clipped, clipped_transform = mask(src, [aoi_geom], crop=True)
        clipped_profile = src.profile.copy()
        clipped_profile.update({
            "height": clipped.shape[1],
            "width": clipped.shape[2],
            "transform": clipped_transform
        })

        # No resampling needed (already 10m)
        if (src.res[0] == ref_profile["transform"][0]):
            return clipped[0], clipped_profile

        # Resample (20m -> 10m)
        dst = np.empty((ref_profile["height"], ref_profile["width"]), dtype=clipped.dtype)

        reproject(
            source=clipped,
            destination=dst,
            src_transform=clipped_transform,
            src_crs=src.crs,
            dst_transform=ref_profile["transform"],
            dst_crs=ref_profile["crs"],
            resampling=Resampling.bilinear
        )

        return dst, ref_profile


# ------------------------------------------------------
# MAIN
# ------------------------------------------------------

def process_bands(safe_path, bands, aoi_wkt, output_tif):

    # 1. Detect raster CRS
    example_band = glob.glob(os.path.join(find_img_data_path(safe_path, "10m"), "*_B02_10m.jp2"))[0]
    with rasterio.open(example_band) as ref:
        raster_crs = ref.crs

    # 2. Reproject AOI into UTM zone of product
    aoi_geom = reproject_geom(aoi_wkt, raster_crs)

    arrays = []
    ref_profile = None

    # ------------------------------------------------------
    # 3. PREPARE REFERENCE PROFILE (from first 10m band)
    # ------------------------------------------------------
    first_band_key = next(k for k, v in bands.items() if v == "10m")
    first_band_path = glob.glob(os.path.join(find_img_data_path(safe_path, "10m"), f"*_{first_band_key}.jp2"))[0]

    with rasterio.open(first_band_path) as ref:
        clipped, transform = mask(ref, [aoi_geom], crop=True)
        ref_profile = ref.profile.copy()
        ref_profile.update({
            "height": clipped.shape[1],
            "width": clipped.shape[2],
            "transform": transform,
            "count": len(bands)
        })

    # ------------------------------------------------------
    # 4. READ + CLIP + RESAMPLE SCL MASK
    # ------------------------------------------------------
    scl_path = glob.glob(os.path.join(find_img_data_path(safe_path, "20m"), "*_SCL_20m.jp2"))[0]
    scl_array, _ = clip_and_resample(scl_path, aoi_geom, ref_profile)

    valid_mask = ~np.isin(scl_array, SCL_INVALID)   # True = valid, False = cloud/shadow/etc.

    # ------------------------------------------------------
    # 5. PROCESS BANDS WITH ON-THE-FLY MASKING
    # ------------------------------------------------------
    for band_key, res in bands.items():
        band_path = glob.glob(os.path.join(find_img_data_path(safe_path, res), f"*_{band_key}.jp2"))[0]

        band_array, _ = clip_and_resample(band_path, aoi_geom, ref_profile)

        # Apply SCL mask
        masked_array = np.where(valid_mask, band_array, np.nan)

        arrays.append(masked_array)

    # ------------------------------------------------------
    # 6. Write stacked output
    # ------------------------------------------------------
    stack = np.stack(arrays)
    # Identify pixels where ALL bands are NaN or zero
    # (e.g. no data strips along tile edges)
    # Option 1: treat all-nan across bands as nodata
    all_nan_mask = np.all(np.isnan(stack), axis=0)

    # Option 2: if any band has real data, keep it

    # Optionally: expand nan masking (e.g. set all-zero pixels to nan)
    # For stricter masking: replace zeros with NaN if all bands are zero
    all_zero_mask = np.all(stack == 0, axis=0)
    stack[:, all_zero_mask] = np.nan

    with rasterio.open(output_tif, "w", **ref_profile) as dst:
        dst.write(stack)

    print(f"‚úÖ Saved masked, clipped, stacked TIFF ‚Üí {output_tif}")


# ------------------------------------------------------
# RUN
# ------------------------------------------------------
process_bands(SAFE_PATH, BANDS, AOI_WKT, OUTPUT_TIFF)


# Apply current masking methods

In [None]:
import xarray as xr
import numpy as np
import rasterio
from rasterio.enums import Resampling
import rioxarray

# ------------------------------
# CONFIG
# ------------------------------
INPUT_TIFF = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/masking_tests/Geotiffs_masking/cloudtest_clipped_new.tif"    # masked + stacked GeoTIFF
OUTPUT_TIFF = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/masking_tests/Geotiffs_masking/masked_brightness_new.tif"

APPLY_BRIGHTNESS_MASK = True
APPLY_OUTLIER_MASK = False

BRIGHTNESS_THRESHOLD = 2100   # reflectance > 0.21

# ------------------------------
# LOAD STACKED TIF
# ------------------------------
ds = xr.open_dataset(INPUT_TIFF, engine="rasterio")
scene = ds["band_data"] if "band_data" in ds else ds[list(ds.data_vars)[0]]
# scene dims: ("band", "y", "x")

print(f"üîç Loaded: {scene.shape} (bands, y, x)")

# ------------------------------
# OPTIONAL: BRIGHTNESS MASK
# ------------------------------
if APPLY_BRIGHTNESS_MASK:
    #  Red=B04, Green=B03, Blue=B02

    red   = scene.sel(band=3)
    green = scene.sel(band=2)
    blue  = scene.sel(band=1)

    bright_mask = ~((red > BRIGHTNESS_THRESHOLD) &
                    (green > BRIGHTNESS_THRESHOLD) &
                    (blue > BRIGHTNESS_THRESHOLD))

    # Apply to all bands
    scene = scene.where(bright_mask)
    print("‚úÖ Applied brightness mask")

# ------------------------------
# OPTIONAL: OUTLIER MASK
# ------------------------------
if APPLY_OUTLIER_MASK:
    scene_mean = scene.mean(dim=("y", "x"), skipna=True)
    scene_std  = scene.std(dim=("y", "x"), skipna=True)

    # Expand mean/std to broadcastable shape
    scene_mean = scene_mean.expand_dims({"y": scene.sizes["y"], "x": scene.sizes["x"]})
    scene_std  = scene_std.expand_dims({"y": scene.sizes["y"], "x": scene.sizes["x"]})

    lower = scene_mean - 3 * scene_std
    upper = scene_mean + 3 * scene_std

    outlier_mask = (scene >= lower) & (scene <= upper)
    scene = scene.where(outlier_mask)
    print("‚úÖ Applied outlier mask")

# ------------------------------
# SAVE RESULT
# ------------------------------
scene.rio.set_spatial_dims(x_dim="x", y_dim="y", inplace=True)
scene.rio.write_crs(ds.rio.crs, inplace=True)

scene.rio.to_raster(OUTPUT_TIFF)
print(f"‚úÖ Saved masked result to: {OUTPUT_TIFF}")


# Pytorch testing

In [None]:
import torch
import torchvision
import torchaudio

print(torch.__version__)
print(torchvision.__version__)
print(torchaudio.__version__)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0))


# Create Omnicloudmask (Predict)

In [None]:
from pathlib import Path
from omnicloudmask import predict_from_load_func, load_s2

# Paths to scenes
scene_paths = [Path("/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/raw_2018/S2B_MSIL2A_20220721T222539_N0510_R015_T05WMU_20240712T010752.SAFE")]

# Predict masks for scenes
pred_paths = predict_from_load_func(
                scene_paths=scene_paths,
                load_func=load_s2,
                batch_size=4
            )

# time to process 1 scene: 30sec on dgx with batch_size=4 and no other variables set, can be further optimized? -> https://github.com/DPIRD-DMA/OmniCloudMask

# Apply (& Clip) Omnicloudmask to Tiff

In [None]:
import rasterio
import numpy as np
from shapely.geometry import box
from rasterio.mask import mask   

# Paths
image_path = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/masking_tests/Geotiffs_masking/cloudtest_clipped_new.tif"
mask_path = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/raw_2018/S2B_MSIL2A_20220721T222539_N0510_R015_T05WMU_20240712T010752_OCM_v1_6_0.tif"
output_path = "/isipd/projects/p_alex/ALEX-Development/2025-05_Praktikum-Sina/Sentinel/masking_tests/Geotiffs_masking/OCM_mask_new.tif"

# --- Load the clipped image ---
with rasterio.open(image_path) as img_src:
    img_data = img_src.read()
    img_profile = img_src.profile
    img_bounds = img_src.bounds

# --- Create polygon from the image bounds ---
aoi_geom = box(*img_bounds)

# --- Clip cloud mask to the same extent ---
with rasterio.open(mask_path) as mask_src:
    clipped_mask, _ = mask(mask_src, [aoi_geom], crop=True)

# Matching shapes?
print("Image:", img_data.shape)
print("Mask:", clipped_mask.shape)

assert clipped_mask.shape[1:] == img_data.shape[1:], "‚ùå Mask/image shape mismatch"

# --- Apply mask: set clouds to NaN ---
masked_data = np.where(clipped_mask.astype(bool), np.nan, img_data)

# --- Save result ---
img_profile.update(
    driver="GTiff",     
    dtype="float32",     
    nodata=np.nan
)

# Remove JP2-only metadata that breaks GTiff writing
for key in ["blockxsize", "blockysize", "interleave"]:
    img_profile.pop(key, None)

with rasterio.open(output_path, "w", **img_profile) as dst:
    dst.write(masked_data.astype("float32"))

print(f"‚úÖ Saved masked image to: {output_path}")

# 5 sec run time

# Set 0 Pixels to NaN

In [None]:
import rasterio
import numpy as np

# --- File paths ---
input_path = "path/to/your_input.tif"
output_path = "path/to/your_output_nan.tif"

# --- Parameters ---
LOW_REFLECTANCE_THRESHOLD = 10     # reflectance units (Sentinel-2 is 0‚Äì10000 scale)
MAX_ALLOWED_VALID_BANDS = 2        # mask pixel if it has ‚â§ 2 valid bands

# --- Load image ---
with rasterio.open(input_path) as src:
    data = src.read()  # shape: (bands, height, width)
    profile = src.profile

# --- Create mask: True where pixel has mostly low values ---
low_vals = data <= LOW_REFLECTANCE_THRESHOLD           # shape: (bands, H, W)
num_low = np.sum(low_vals, axis=0)                     # shape: (H, W)

# Mask pixels where most bands are low (i.e., few valid bands)
mask = num_low >= (data.shape[0] - MAX_ALLOWED_VALID_BANDS)

# --- Apply mask across all bands ---
data_masked = np.where(mask[None, :, :], np.nan, data)

# --- Update profile for float32 + NaN ---
profile.update(
    dtype='float32',
    nodata=np.nan,
    driver='GTiff'
)

# Remove incompatible JP2 keys if they exist
for key in ["blockxsize", "blockysize", "interleave"]:
    profile.pop(key, None)

# --- Save result ---
with rasterio.open(output_path, "w", **profile) as dst:
    dst.write(data_masked.astype("float32"))

print(f"‚úÖ Saved masked image to: {output_path}")
