In [None]:
import os
import re

import pandas as pd
import rioxarray
import xarray as xr

# Base directory
base_dir = "/home/jovyan/grid4earth_S2L1B/Sentinel-2/MSI/MSI_L1B_GR/2025/07/24"

# Band suffixes of interest
band_ids = [
    "B01",
    "B02",
    "B03",
    "B04",
    "B05",
    "B06",
    "B07",
    "B08",
    "B09",
    "B10",
    "B11",
    "B12",
    "B8A",
]

# List SAFE folders
safe_dirs = sorted(
    [
        d
        for d in os.listdir(base_dir)
        if os.path.isdir(os.path.join(base_dir, d)) and d.endswith(".11")
    ]
)

# Container for time-stamped datasets
ds_list = []

for safe_name in safe_dirs:
    safe_path = os.path.join(base_dir, safe_name)
    img_data_dir = os.path.join(safe_path, "IMG_DATA")

    print(f"\n📦 Processing: {safe_name}")

    # Extract sensing time from the filename, between "_S" and "_D"
    match = re.search(r"_S(\d{8}T\d{6})_D", safe_name)
    if not match:
        print(f"❌ No timestamp found in {safe_name}")
        continue

    timestamp_str = match.group(1)
    timestamp = pd.to_datetime(timestamp_str, format="%Y%m%dT%H%M%S")

    data_vars = {}
    ref_da = None

    for band in band_ids:
        band_files = [f for f in os.listdir(img_data_dir) if f.endswith(f"{band}.jp2")]
        if not band_files:
            print(f"⚠️  Missing {band} in {safe_name}")
            continue

        path = os.path.join(img_data_dir, band_files[0])
        da = rioxarray.open_rasterio(path, masked=True).squeeze()

        # Use the first band as the shape reference
        if ref_da is None:
            ref_da = da
        # No CRS or reprojection handled here

        data_vars[band] = da

    if not data_vars:
        print("🚫 No bands loaded, skipping.")
        continue

    # Create Dataset and add time as a coordinate
    ds = xr.Dataset(data_vars)
    ds = ds.expand_dims(time=[timestamp])
    ds_list.append(ds)

# Combine all time-stamped datasets
if ds_list:
    ds_all = xr.concat(ds_list, dim="time")
    print("\n✅ Combined dataset with time dimension:")
    print(ds_all)
    # Optionally save:
    # ds_all.to_zarr("Sentinel2_L1B_timeseries.zarr", mode="w")
else:
    print("\n🚫 No datasets to combine.")

In [None]:
ds_all

In [None]:
ds_all.B02.isel(time=3).plot()

In [None]:
ds_all.to_zarr("s2l1b_tmp.zarr")

In [None]:
!du -hs s2l1b_tmp.zarr