## Creating annual aggregates: AOI specific CC

In [None]:
# =========================================
# CONFIG
# =========================================
YEARS            = list(range(2017, 2018))     
MONTH_START_END  = ("07-01", "08-31")          
GRID             = "MGRS-05WMU"               # e.g. "MGRS-05WMN"
MAX_CLOUD_COVER  = 70                          # Broad STAC filter (scene-wide)
AOI_CLOUD_THRESH = 0.4                         # AOI cloud filter (20%)
BBOX_LL          = (-153.5, 70.5, -153, 71)     # (minx,miny,maxx,maxy)

REDUCER          = "median"                 # "median" or "quantile"
Q                = 0.25
QUANT_METHOD     = "nearest"

OUT_DIR          = "CDSE_annual_median_small"

# =========================================
# ENV SETUP
# =========================================
import os
os.environ["AWS_ACCESS_KEY_ID"]     = "C364NPCJK6JQ64OIMZJR"
os.environ["AWS_SECRET_ACCESS_KEY"] = "xpoRWkPi2ktOVRavz69FKnDUKjLjv8cmWk6rESO3"
os.environ["AWS_REGION"]            = "us-east-1"
os.environ["AWS_S3_ENDPOINT"]       = "eodata.dataspace.copernicus.eu"
os.environ["AWS_VIRTUAL_HOSTING"]   = "FALSE"

# =========================================
# IMPORTS
# =========================================
from pathlib import Path
from pystac_client import Client
from pystac import Item
import math
import numpy as np
import xarray as xr
import rioxarray
from pyproj import Transformer
import rasterio as rio
from rasterio.enums import Resampling

# =========================================
# HELPERS
# =========================================
def search_s2_stac(start_date: str, end_date: str, grid: str, max_cloud_cover: int = 100):
    cat = Client.open("https://stac.dataspace.copernicus.eu/v1/")
    search = cat.search(
        collections=["sentinel-2-l2a"],
        datetime=f"{start_date}/{end_date}",
        query={"eo:cloud_cover": {"lte": max_cloud_cover}, "grid:code": {"eq": grid}},
    )
    items = list(search.items())
    print(f"  üîé Found {len(items)} items")
    return items

def prefer_s3_assets(items):
    out = []
    for it in items:
        it = it.clone()
        for a in it.assets.values():
            s3_href = None
            extra = (getattr(a, "extra_fields", None) or {})
            alt = extra.get("alternate") or extra.get("alternates")
            if isinstance(alt, dict):
                s3_href = (alt.get("s3") or alt.get("S3") or {}).get("href")
            elif isinstance(alt, list):
                for d in alt:
                    href = d.get("href")
                    if href and href.startswith("s3://"):
                        s3_href = href; break
            if s3_href:
                a.href = s3_href
        out.append(it)
    return out

def detect_epsg_and_bounds(items, bbox_ll_override=None):
    if not items:
        raise ValueError("No items passed to detect_epsg_and_bounds")

    if bbox_ll_override is None:
        bbs = [it.bbox for it in items]
        minx = min(b[0] for b in bbs); miny = min(b[1] for b in bbs)
        maxx = max(b[2] for b in bbs); maxy = max(b[3] for b in bbs)
        bbox_ll = (minx, miny, maxx, maxy)
    else:
        bbox_ll = bbox_ll_override

    epsg = None
    for it in items:
        if "proj:epsg" in it.properties:
            epsg = int(it.properties["proj:epsg"]); break
    if epsg is None:
        lon, lat = ((bbox_ll[0] + bbox_ll[2]) / 2.0, (bbox_ll[1] + bbox_ll[3]) / 2.0)
        zone = int(math.floor((lon + 180) / 6) + 1)
        epsg = 32600 + zone if lat >= 0 else 32700 + zone

    tx = Transformer.from_crs("EPSG:4326", f"EPSG:{epsg}", always_xy=True)
    x1, y1 = tx.transform(bbox_ll[0], bbox_ll[1])
    x2, y2 = tx.transform(bbox_ll[2], bbox_ll[3])
    bounds_proj = (min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
    return epsg, bbox_ll, bounds_proj

def rasterio_env():
    return rio.Env(
        AWS_S3_ENDPOINT=os.environ["AWS_S3_ENDPOINT"],
        AWS_REGION=os.environ["AWS_REGION"],
        AWS_VIRTUAL_HOSTING=os.environ["AWS_VIRTUAL_HOSTING"],
        GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR",
        CPL_VSIL_CURL_ALLOWED_EXTENSIONS="tif,gtiff,jp2,xml",
    )

# =========================================
# AOI cloud filtering from SCL
# =========================================
SCL_CLOUD = [3, 7, 8, 9, 10, 11]

def get_filtered_items_for_year(year, grid, bbox_ll, max_scene_cloud=80, aoi_cloud_thresh=0.5, min_coverage=0.1):
    start_date = f"{year}-{MONTH_START_END[0]}"
    end_date   = f"{year}-{MONTH_START_END[1]}"

    print(f"\nüîé [{year}] STAC search in {grid} from {start_date} to {end_date}")
    items = search_s2_stac(start_date, end_date, grid, max_cloud_cover=max_scene_cloud)
    items_s3 = prefer_s3_assets(items)

    # Helper to get projected AOI bounds
    def get_bounds_for_item(item):
        try:
            epsg, _, bounds_proj = detect_epsg_and_bounds([item], bbox_ll_override=bbox_ll)
            return epsg, bounds_proj
        except:
            return None, None

    # -------------------------------------------------------
    # 1) Compute expected AOI size (in pixels) once per year
    # -------------------------------------------------------
    # Use the FIRST item to detect CRS for AOI projection
    if len(items_s3) == 0:
        return []

    epsg_ref, aoi_bounds = get_bounds_for_item(items_s3[0])
    if aoi_bounds is None:
        return []

    # AOI dimensions in meters
    aoi_width  = aoi_bounds[2] - aoi_bounds[0]
    aoi_height = aoi_bounds[3] - aoi_bounds[1]

    # Expected pixel count at SCL resolution (20m)
    expected_pixels = (aoi_width / 20) * (aoi_height / 20)

    print(f"üìê Expected AOI pixels (20m grid): {int(expected_pixels):,}")

    filtered_items = []

    # -------------------------------------------------------
    # 2) Process each scene individually
    # -------------------------------------------------------
    with rasterio_env():
        for it in items_s3:

            if "SCL_20m" not in it.assets:
                continue

            try:
                epsg_scene, bounds_proj = get_bounds_for_item(it)
                if bounds_proj is None:
                    continue

                href = it.assets["SCL_20m"].href
                scl  = rioxarray.open_rasterio(href, masked=True, chunks={}).squeeze("band", drop=True)
                scl  = scl.rio.clip_box(*bounds_proj)

                # If scene has NO data at all over AOI ‚Üí skip
                if scl.isnull().all():
                    print(f"‚õî {it.id} ‚Üí No AOI coverage, skipped")
                    continue

                # Count valid pixels
                
                valid_mask = (scl != 0) & scl.notnull() # scl pixels that are valid (neither nan nor 0(no data))
                valid_pixels = valid_mask.sum().compute().item()

                if valid_pixels == 0:
                    print(f"‚õî {it.id} ‚Üí AOI valid=0px, skipped")
                    continue

                # Coverage ratio relative to expected AOI pixels
                coverage_ratio = valid_pixels / expected_pixels

                # Cloud ratio relative to valid AOI pixels
                bad_pixels = (scl.isin(SCL_CLOUD) & valid_mask).sum().compute().item()
                cloud_ratio = bad_pixels / valid_pixels

                print(
                    f"üßÆ {it.id} ‚Üí clouds={cloud_ratio:.0%}, "
                    f"coverage={coverage_ratio:.0%} "
                )

                # Keep only if cloudiness low & coverage sufficient
                if cloud_ratio <= aoi_cloud_thresh and coverage_ratio >= min_coverage:
                    print(f"   ‚úÖ kept")
                    filtered_items.append(it)
                else:
                    print(f"   ‚õî skipped")

            except Exception as e:
                print(f"‚ö†Ô∏è {it.id} failed: {e}")

    return filtered_items




# =========================================
# PROCESS YEAR FUNCTION
# =========================================
BAND_ORDER = ["B02_10m", "B03_10m", "B04_10m", "B08_10m", "B11_20m", "B12_20m"]
BAND_LABELS = ["Blue", "Green", "Red", "NIR", "SWIR1", "SWIR2"]
SCL_BAD = [0,1,3,7,8,9,10,11]

def process_year(year: int, grid: str, max_cloud: int, bbox_ll):
    print(f"\n==== Year {year} | grid={grid} | clouds‚â§{max_cloud}% ====")

    # Get AOI-filtered items
    items = get_filtered_items_for_year(year, GRID, bbox_ll, max_scene_cloud=80, aoi_cloud_thresh=0.5, min_coverage=0.1)
    if not items:
        print("  ‚ö†Ô∏è No AOI-clear items for this year; skipping.")
        return None

    items_s3 = prefer_s3_assets(items)
    epsg_out, bbox_ll_used, bounds_out = detect_epsg_and_bounds(items, bbox_ll_override=bbox_ll)
    print(f"  EPSG={epsg_out} | bbox_ll={bbox_ll_used} | bounds_proj={tuple(round(v,2) for v in bounds_out)}")

    bands_10m = [b for b in BAND_ORDER if b.endswith("10m")]
    bands_20m = [b for b in BAND_ORDER if b.endswith("20m")]

    stacks = []
    ok, bad = 0, 0

    with rasterio_env():
        for it_s3 in items_s3:
            try:
                ref = None
                pieces = []

                # --- Read 10m bands ---
                for bname in bands_10m:
                    if bname not in it_s3.assets:
                        continue
                    da = rioxarray.open_rasterio(it_s3.assets[bname].href, masked=True, chunks={"x":1024,"y":1024}).squeeze("band", drop=True)
                    da = da.rio.clip_box(*bounds_out)
                    if ref is None:
                        ref = da
                    pieces.append(da.expand_dims("band"))

                # --- Read and upsample 20m bands ---
                for bname in bands_20m:
                    if bname not in it_s3.assets:
                        continue
                    da20 = rioxarray.open_rasterio(it_s3.assets[bname].href, masked=True, chunks={"x":1024,"y":1024}).squeeze("band", drop=True)
                    da20 = da20.rio.clip_box(*bounds_out)
                    da20u = da20.rio.reproject_match(ref, resampling=Resampling.bilinear)
                    pieces.append(da20u.expand_dims("band"))

                if not pieces:
                    continue

                scene = xr.concat(pieces, dim="band")
                scene = scene.assign_coords(band=BAND_LABELS)

                # --- SCL mask ---
                scl_href = it_s3.assets.get("SCL_20m")
                if scl_href:
                    scl = rioxarray.open_rasterio(scl_href.href, masked=True, chunks={}).squeeze("band", drop=True).astype("uint16")
                    scl = scl.rio.clip_box(*bounds_out)
                    scl = scl.rio.reproject_match(ref, resampling=Resampling.nearest)
                    mask = ~scl.isin(SCL_BAD)
                    scene = scene.where(mask)

                stacks.append(scene.expand_dims(time=[np.datetime64(f"{year}-07-15")]))
                ok += 1
            except Exception as e:
                print(f"  ‚ö†Ô∏è {it_s3.id} failed: {e}")
                bad += 1

    if not stacks:
        print("  ‚ö†Ô∏è No readable scenes; skipping year.")
        return None

    print(f"  ‚úÖ Scenes OK: {ok} | failed: {bad}")
    arr = xr.concat(stacks, dim="time")
    arr = arr.where(arr > 0)

    if REDUCER.lower() == "median":
        red = arr.median(dim="time", skipna=True)
        red_tag = "median"
    else:
        #rgb_nir = ["Blue", "Green", "Red", "NIR"]
        #swir = ["SWIR1", "SWIR2"]
        #q25_rgbnir = arr.sel(band=rgb_nir).quantile(0.25, dim="time", method=QUANT_METHOD, skipna=True)
        #q75_swir = arr.sel(band=swir).quantile(0.75, dim="time", method=QUANT_METHOD, skipna=True)
        #red = xr.concat([q25_rgbnir, q75_swir], dim="band").sel(band=BAND_LABELS)
        red_tag = "qmix25_75"

    out_dir = Path(OUT_DIR); out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / f"s2_{red_tag}_cc{MAX_CLOUD_COVER}_{year}_north_aoinan2.tif"

    red_u16 = (
        red.fillna(0)
        .clip(0, 10000)
        .astype("uint16")
        .rio.write_nodata(0)
        .rio.set_spatial_dims("x", "y", inplace=False)
        .rio.write_crs(f"EPSG:{epsg_out}", inplace=False)
    )

    print(f" üìÇ Saving ‚Üí {out_path}")
    red_u16.transpose("band", "y", "x").rio.to_raster(
        out_path,
        driver="GTiff",
        compress="deflate",
        tiled=True,
        predictor=2,
        BIGTIFF="IF_SAFER",
        blockxsize=512,
        blockysize=512,
        tags={f"band_{i+1}": name for i, name in enumerate(BAND_LABELS)},
    )
    return out_path

# =========================================
# RUN ALL YEARS
# =========================================
all_outputs = []
for yr in YEARS:
    p = process_year(yr, GRID, MAX_CLOUD_COVER, bbox_ll=BBOX_LL)
    if p is not None:
        all_outputs.append(str(p))

print("\nDone. Written files:")
for p in all_outputs:
    print(" ‚Ä¢", p)

## Creating Annual aggregates (simpler scene version)

In [None]:
# =========================================
# CONFIG
# =========================================
YEARS            = list(range(2017, 2025))     
MONTH_START_END  = ("07-01", "08-31")          
GRID             = "MGRS-05WMU"                     # e.g. "MGRS-05WMN"
MAX_CLOUD_COVER  = 40                           
BBOX_LL          = (-153.5, 70.5, -153, 71)         # (-154.25, 65.00, -154.00, 65.25)  # lon/lat bbox (minx,miny,maxx,maxy); set to None for full tile -153.5, 70.5, -153, 71

# reducer: "median" or "quantile"
REDUCER          = "median"                  # "median" or "quantile"
Q                = 0.25                        # used only if REDUCER == "quantile"
# quantile interpolation method: 'nearest', 'lower', 'higher', 'midpoint', or 'linear'
QUANT_METHOD = "nearest"

# output
OUT_DIR          = "CDSE_annual_median_small"  

# S3 endpoint setup
import os
os.environ["AWS_ACCESS_KEY_ID"]     = "C364NPCJK6JQ64OIMZJR"
os.environ["AWS_SECRET_ACCESS_KEY"] = "..." #!!! SET KEY
os.environ["AWS_REGION"]            = "us-east-1"
os.environ["AWS_S3_ENDPOINT"]       = "eodata.dataspace.copernicus.eu"
os.environ["AWS_VIRTUAL_HOSTING"]   = "FALSE"

# =========================================
# IMPORTS
# =========================================
from pathlib import Path
from pystac_client import Client
from pystac import Item
import math
import numpy as np
import xarray as xr
import rioxarray
from pyproj import Transformer
import rasterio as rio
from rasterio.enums import Resampling

# =========================================
# HELPERS
# =========================================
def search_s2_stac(start_date: str, end_date: str, grid: str, max_cloud_cover: int = 100) -> list[Item]:
    cat = Client.open("https://stac.dataspace.copernicus.eu/v1/")
    search = cat.search(
        collections=["sentinel-2-l2a"],
        datetime=f"{start_date}/{end_date}",
        query={"eo:cloud_cover": {"lte": max_cloud_cover}, "grid:code": {"eq": grid}},
    )
    items = list(search.items())
    print(f"  üîé Found {len(items)} items")
    return items

def prefer_s3_assets(items):
    out = []
    for it in items:
        it = it.clone()
        for a in it.assets.values():
            s3_href = None
            extra = (getattr(a, "extra_fields", None) or {})
            alt = extra.get("alternate") or extra.get("alternates")
            if isinstance(alt, dict):
                s3_href = (alt.get("s3") or alt.get("S3") or {}).get("href")
            elif isinstance(alt, list):
                for d in alt:
                    href = d.get("href")
                    if href and href.startswith("s3://"):
                        s3_href = href; break
            if s3_href:
                a.href = s3_href
        out.append(it)
    return out

def detect_epsg_and_bounds(items, bbox_ll_override=None):
    if not items:
        raise ValueError("No items passed to detect_epsg_and_bounds")

    # bbox in lon/lat
    if bbox_ll_override is None:
        bbs = [it.bbox for it in items]
        minx = min(b[0] for b in bbs); miny = min(b[1] for b in bbs)
        maxx = max(b[2] for b in bbs); maxy = max(b[3] for b in bbs)
        bbox_ll = (minx, miny, maxx, maxy)
    else:
        bbox_ll = bbox_ll_override

    # EPSG from items (proj:epsg) else UTM guess
    epsg = None
    for it in items:
        if "proj:epsg" in it.properties:
            epsg = int(it.properties["proj:epsg"]); break
    if epsg is None:
        lon, lat = ((bbox_ll[0] + bbox_ll[2]) / 2.0, (bbox_ll[1] + bbox_ll[3]) / 2.0)
        zone = int(math.floor((lon + 180) / 6) + 1)
        epsg = 32600 + zone if lat >= 0 else 32700 + zone

    # reproject bbox to EPSG
    tx = Transformer.from_crs("EPSG:4326", f"EPSG:{epsg}", always_xy=True)
    x1, y1 = tx.transform(bbox_ll[0], bbox_ll[1])
    x2, y2 = tx.transform(bbox_ll[2], bbox_ll[3])
    bounds_proj = (min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
    return epsg, bbox_ll, bounds_proj

def rasterio_env():
    # Make GDAL/rasterio behave better on S3 (reduce directory scans)
    return rio.Env(
        AWS_S3_ENDPOINT=os.environ.get("AWS_S3_ENDPOINT", "eodata.dataspace.copernicus.eu"),
        AWS_REGION=os.environ.get("AWS_REGION", "us-east-1"),
        AWS_VIRTUAL_HOSTING=os.environ.get("AWS_VIRTUAL_HOSTING", "FALSE"),
        GDAL_DISABLE_READDIR_ON_OPEN="EMPTY_DIR",
        CPL_VSIL_CURL_ALLOWED_EXTENSIONS="tif,gtiff,jp2,xml",
    )
# ================================================================================================================================================
#PROCESS
#=================================================================================================================================================
# bands
BAND_ORDER = ["B02_10m","B03_10m","B04_10m","B08_10m","B11_20m","B12_20m"]
BAND_LABELS = ["Blue","Green","Red","NIR","SWIR1","SWIR2"]
SCL_BAD = [0,1,3,7,8,9,10,11]  # S2 SCL classes to mask

def process_year(year: int, grid: str, max_cloud: int, bbox_ll):
    print(f"\n==== Year {year} | grid={grid} | clouds‚â§{max_cloud}% ====")
    start_date = f"{year}-{MONTH_START_END[0]}"
    end_date   = f"{year}-{MONTH_START_END[1]}"

    # 1) STAC search
    items = search_s2_stac(start_date, end_date, grid, max_cloud_cover=max_cloud)
    if not items:
        print("  ‚ö†Ô∏è No items for this year; skipping.")
        return None

    # 2) Prefer S3 asset hrefs
    items_s3 = prefer_s3_assets(items)

    # 3) Bounds & EPSG (use bbox)
    epsg_out, bbox_ll_used, bounds_out = detect_epsg_and_bounds(items, bbox_ll_override=bbox_ll)
    print(f"  EPSG={epsg_out} | bbox_ll={bbox_ll_used} | bounds_proj={tuple(round(v,2) for v in bounds_out)}")

    # Split bands
    bands_10m = [b for b in BAND_ORDER if b.endswith("10m")]
    bands_20m = [b for b in BAND_ORDER if b.endswith("20m")]

    stacks = []
    ok, bad = 0, 0

    with rasterio_env():
        for it_s3, it_orig in zip(items_s3, items):
            try:
                # --- read 10m bands ---
                ref = None
                pieces = []
                for bname in bands_10m:
                    if bname not in it_s3.assets:
                        continue
                    href = it_s3.assets[bname].href  # <-- S3 href
                    da = rioxarray.open_rasterio(href, masked=True, chunks={"x":1024,"y":1024}).squeeze("band", drop=True)
                    da = da.rio.clip_box(*bounds_out)  # bounds are in EPSG of the tile
                    if ref is None:
                        ref = da
                    pieces.append(da.expand_dims("band"))

                # --- read & upsample 20m bands ---
                for bname in bands_20m:
                    if bname not in it_s3.assets:
                        continue
                    href = it_s3.assets[bname].href
                    da20 = rioxarray.open_rasterio(href, masked=True, chunks={"x":1024,"y":1024}).squeeze("band", drop=True)
                    da20 = da20.rio.clip_box(*bounds_out)
                    if ref is None:
                        ref = da20
                    da20u = da20.rio.reproject_match(ref, resampling=Resampling.bilinear)
                    pieces.append(da20u.expand_dims("band"))

                if not pieces:
                    continue

                # --- combine into single scene ---
                scene = xr.concat(pieces, dim="band")  
                if scene.sizes["band"] != len(BAND_LABELS):
                    print(f"  ‚ö†Ô∏è Szene hat unerwartete Bandanzahl ({scene.sizes['band']}); wird √ºbersprungen.")
                    continue              
                have_names = []
                for b in BAND_ORDER:
                    have_names.append(b)


                # --- SCL mask ---
                if "_20m" in it_s3.assets:
                    scl_href = it_s3.assets["SCL_20m"].href
                    scl = rioxarray.open_rasterio(scl_href, masked=True, chunks={"x":1024,"y":1024}).squeeze("band", drop=True).astype("uint16")
                    scl = scl.rio.clip_box(*bounds_out)
                    scl = scl.rio.reproject_match(ref, resampling=Resampling.nearest)
                    mask = ~scl.isin(SCL_BAD)

                #----------outlier mask / brightness mask------------------------------------------------------------------------------------

                scene = scene.where(mask)
                scene = scene.assign_coords(band=BAND_LABELS)
                # --- Brightness mask (exclude very bright RGB pixels) ---
                # NOTE: Sentinel-2 reflectance values are scaled 0‚Äì10000, so threshold=0.12 -> 1200
                BRIGHT_THRESHOLD = 2100  

                # Select RGB bands
                rgb = scene.sel(band=["Red", "Green", "Blue"])

                # Compute a combined mask: True where NOT bright (keep these)
                bright_mask = ~((rgb.sel(band="Red")   > BRIGHT_THRESHOLD) &
                                (rgb.sel(band="Green") > BRIGHT_THRESHOLD) &
                                (rgb.sel(band="Blue")  > BRIGHT_THRESHOLD))

                # Apply to all bands
                scene = scene.where(bright_mask)


                # --- per-scene outlier mask (2œÉ) ---
                scene_mean = scene.mean(dim=("y", "x"), skipna=True)
                scene_std  = scene.std(dim=("y", "x"), skipna=True)

                # reshape 
                scene_mean = scene_mean.expand_dims({"y": scene.sizes["y"], "x": scene.sizes["x"]})
                scene_std  = scene_std.expand_dims({"y": scene.sizes["y"], "x": scene.sizes["x"]})


                lower = scene_mean - 3 * scene_std
                upper = scene_mean + 3 * scene_std
                scene_mask = (scene >= lower) & (scene <= upper)
                scene = scene.where(scene_mask)

                print(f"    ‚Ü™ Outlier-masked pixels in scene:") #  {(~scene_mask).sum().compute().item():,}")

            #---------------------------------------------------------------------------------------------------
                stacks.append(scene.expand_dims(time=[np.datetime64(f"{year}-07-15")]))
                ok += 1
            except Exception as e:
                print(f"  ‚ö†Ô∏è {it_orig.id} failed: {e}")
                bad += 1

    if not stacks:
        print("  ‚ö†Ô∏è No readable scenes; skipping year.")
        return None

    print(f"  ‚úÖ Scenes OK: {ok} | failed: {bad}")
    arr = xr.concat(stacks, dim="time")
    arr = arr.where(arr > 0)

    # ===== REDUCTION (median or quantile) =====
    if REDUCER.lower() == "median":                     # mean ausrobieren / keine maske und nur 25 % quantil
        red = arr.median(dim="time", skipna=True)
        red_tag = "median"
    else:
        # Make sure bands are labeled (should already be from above)
        if "band" not in arr.coords or set(arr.band.values) != set(BAND_LABELS):
            arr = arr.assign_coords(band=BAND_LABELS)

        # Define band groups
        rgb_nir = ["Blue", "Green", "Red", "NIR"]
        swir    = ["SWIR1", "SWIR2"]

        # Compute per-group quantiles with your chosen method
        q25_rgbnir = arr.sel(band=rgb_nir).quantile(0.25, dim="time", method=QUANT_METHOD, skipna=True)
        q75_swir   = arr.sel(band=swir).quantile(0.75, dim="time", method=QUANT_METHOD, skipna=True)

        # Stitch back together and restore canonical band order
        red = xr.concat([q25_rgbnir, q75_swir], dim="band").sel(band=BAND_LABELS)

        red_tag = "qmix25_75"             


    # ===== SAVE RASTER =====
    out_dir = Path(OUT_DIR); out_dir.mkdir(parents=True, exist_ok=True)
    out_path = out_dir / f"s2_{red_tag}_cc{MAX_CLOUD_COVER}_{year}_north_outbright2100.tif"


    red_u16 = (
        red.fillna(0)
        .clip(0, 10000)
        .astype("uint16")
        .rio.write_nodata(0)
        .rio.set_spatial_dims("x","y", inplace=False)
        .rio.write_crs(f"EPSG:{epsg_out}", inplace=False)
        )

    red_u16 = red_u16.assign_coords(band=BAND_LABELS)


    print(f" üìÇ Saving ‚Üí {out_path}")
    red_u16.transpose("band","y","x").rio.to_raster(
        out_path,
        driver="GTiff",
        compress="deflate",
        tiled=True,
        predictor=2,
        BIGTIFF="IF_SAFER",
        blockxsize=512,
        blockysize=512,
        tags={f"band_{i+1}": name for i, name in enumerate(BAND_LABELS)},
        )
    return out_path

# =========================================
# RUN ALL YEARS
# =========================================
all_outputs = []
for yr in YEARS:
    p = process_year(yr, GRID, MAX_CLOUD_COVER, bbox_ll=BBOX_LL)
    if p is not None:
        all_outputs.append(str(p))

print("\nDone. Written files:")
for p in all_outputs:
    print(" ‚Ä¢", p)

#cc: 30: 8, 4, 8, 16, 7, 12, 13, 5 = 50min mit outlier mask
#cc 40: 11, 10, 9, 18, 13, 14, 19

## Calculating TC images

In [None]:
# === tasseled_cap_mosaic_generation.py ===
from pathlib import Path
import numpy as np
import xarray as xr
import rioxarray
from dask.diagnostics import ProgressBar
import warnings

warnings.filterwarnings("ignore", category=UserWarning, message=".*coordinate precision.*")

median_dir = Path("CDSE_annual_median_small")       
tc_dir = Path("CDSE_tc_small")                      
tc_dir.mkdir(exist_ok=True)
years = list(range(2018, 2025))

# Sentinel-2 Tasseled Cap coefficients 
coeffs = {
    "tcb": dict(Blue=0.3037, Green=0.2793, Red=0.4743, NIR=0.5585, SWIR1=0.5082, SWIR2=0.1863),
    "tcg": dict(Blue=-0.2848, Green=-0.2435, Red=-0.5436, NIR=0.7243, SWIR1=0.0840, SWIR2=-0.1800),
    "tcw": dict(Blue=0.1509, Green=0.1973, Red=0.3279, NIR=0.3406, SWIR1=-0.7112, SWIR2=-0.4572),
}

for year in years:
    in_file = median_dir / f"s2_median_cc30_{year}_north_outbright1800.tif"
    out_file = tc_dir / f"tc_median_c30_{year}_north_outbright1800_std3.tif"

    if not in_file.exists():
        print(f"‚ùå Missing median mosaic for {year}")
        continue
    if out_file.exists():
        print(f"‚è≠Ô∏è Already exists, skipping {out_file}")
        continue

    print(f"‚úÖ Loading: {in_file}")
    # Important: masked=True makes rioxarray treat nodata (0) as NaN
    da = rioxarray.open_rasterio(in_file, chunks={"x": 1024, "y": 1024}, masked=True)

    # assign band names, convert reflectance to 0‚Äì1
    da = da.assign_coords(band=["Blue", "Green", "Red", "NIR", "SWIR1", "SWIR2"]).astype("float32") / 10000.0          # not needed - 0.1 

    # Ensure true zeros are NaN (in case old medians used fillna(0))
    da = da.where(da != 0)

    # Split bands
    blue, green, red, nir, swir1, swir2 = da.sel(band=["Blue", "Green", "Red", "NIR", "SWIR1", "SWIR2"])

    def tc(c):
        return (c["Blue"]*blue + c["Green"]*green + c["Red"]*red +
                c["NIR"]*nir + c["SWIR1"]*swir1 + c["SWIR2"]*swir2)

    # Compute tasseled cap 
    tcb = tc(coeffs["tcb"])
    tcg = tc(coeffs["tcg"])
    tcw = tc(coeffs["tcw"])

    # Stack tc
    tc_stack = xr.concat([tcb, tcg, tcw], dim="band")
    tc_stack = tc_stack.assign_coords(band=["TCB", "TCG", "TCW"])
    tc_stack = tc_stack.rio.write_crs(da.rio.crs)

    # Ensure NaNs are preserved
    tc_stack = tc_stack.astype("float32").rio.write_nodata(np.nan)

    print(f"üíæ Saving tasseled cap mosaic: {out_file}")
    with ProgressBar():
        (
            tc_stack.compute(scheduler="threads")
            .transpose("band", "y", "x")
            .rio.to_raster(
                out_file,
                driver="GTiff",
                tiled=True,
                compress="deflate",
                BIGTIFF="IF_SAFER",
                predictor=3,           
                blockxsize=1024,
                blockysize=1024,
            )
        )

print("‚úÖ All tasseled cap mosaics saved.")


## Trend Calculation (fixed vis)

In [None]:
# =========================================
# TREND CALCULATION FOR TC STACKS
# =========================================

from pathlib import Path
import numpy as np
import xarray as xr
import rioxarray
from dask.diagnostics import ProgressBar
import dask
import logging

# -----------------------------------------
# CONFIG
# -----------------------------------------
tc_dir     = Path("CDSE_tc_small")          # input mosaics
trend_dir  = Path("CDSE_tc_trend_results")  # output directory
trend_dir.mkdir(exist_ok=True)

years = list(range(2017, 2025))
bands_tc = ["TCB", "TCG", "TCW"]

# -----------------------------------------
# 1. LOAD ALL TASSELED CAP MOSAICS
# -----------------------------------------
arrays = []

for year in years:
    fp = tc_dir / f"tc_median_c30_{year}_north_outbright1800_std3.tif"
    if not fp.exists():
        print(f"‚ùå Missing {fp}")
        continue

    print(f"‚úÖ Loading {fp}")
    da = rioxarray.open_rasterio(fp, chunks={"x": 1024, "y": 1024})

    # Assign TC band names
    da = da.assign_coords(band=bands_tc)

    # Add numeric time coordinate
    da = da.expand_dims(time=[np.datetime64(f"{year}-07-15")])

    arrays.append(da)

if not arrays:
    raise RuntimeError("No tasseled cap mosaics found!")

# Concatenate stack
stack = xr.concat(arrays, dim="time").transpose("time", "band", "y", "x")
stack = stack.chunk({"time": -1, "x": 1024, "y": 1024})
stack.name = "tc"

print(f"üß© Stack shape: {stack.shape} (time, band, y, x)")

# -----------------------------------------
# 2. FIX THE TIME AXIS FOR REGRESSION
# -----------------------------------------
# Convert datetime64 ‚Üí integer years
years_numeric = stack["time"].dt.year

# Replace time dim with 'year'
stack = stack.assign_coords(year=("time", years_numeric.data))
stack = stack.swap_dims({"time": "year"})

print(f"üìÖ Using year values for regression: {list(years_numeric.values)}")

# -----------------------------------------
# 3. TREND REGRESSION (PER YEAR)
# -----------------------------------------
results = []

for band in bands_tc:
    print(f"üìà Computing trend for {band}...")

    sub = stack.sel(band=band)

    # Fit a first-degree polynomial across the 'year' axis
    fit = sub.to_dataset(name="tc").polyfit(dim="year", deg=1)

    # Extract slope (degree 1 coefficient)
    slope = fit["tc_polyfit_coefficients"].sel(degree=1)

    # OPTIONAL ‚Äî
    # match GEE visualization intensity (your GEE script did "*10")
    slope = slope * 10

    slope = slope.expand_dims(band=[f"{band}_slope"])
    results.append(slope)

# Combine all slope bands
trend = xr.concat(results, dim="band")
trend.rio.write_crs(stack.rio.crs, inplace=True)

# -----------------------------------------
# 4. COMPUTE THE ARRAY
# -----------------------------------------
out_path = trend_dir / "tc_trend_small_fixed_outbrightness.tif"
print(f"üíæ Saving trend raster: {out_path}")

# Threaded Dask scheduler
dask.config.set(scheduler="threads")
logging.getLogger("tornado.application").setLevel(logging.ERROR)
logging.getLogger("tornado.general").setLevel(logging.ERROR)

with ProgressBar(dt=30.0):  
    trend = trend.compute()

trend_vis = trend.clip(-0.3, 0.3)
trend_vis = ((trend_vis + 0.3) / 0.6 * 255).astype("uint8")
trend_vis.transpose("band", "y", "x").rio.to_raster("trend_visual.tif")

# -----------------------------------------
# 5. SAVE TO GEOTIFF
# -----------------------------------------
trend_vis.transpose("band", "y", "x").rio.to_raster(
    out_path,
    driver="GTiff",
    tiled=True,
    compress="deflate",
    BIGTIFF="IF_SAFER",
    predictor=2,
    blockxsize=1024,
    blockysize=1024,
)

print("‚úÖ Trend image saved successfully.")

