In [1]:
# 00_sentinel-1_patches_v2.ipynb

import os
import time
import uuid
import json
import glob
import numpy as np
import pandas as pd
import geopandas as gpd
import datetime as dt
from dotenv import load_dotenv
from sentinelhub import (
    SHConfig, SentinelHubCatalog, SentinelHubRequest, DataCollection, 
    MimeType, SentinelHubDownloadClient, BBox, CRS, bbox_to_dimensions, filter_times
)
from shapely.geometry import shape
import tifffile

# Load environment variables
load_dotenv()

# Configure Sentinel Hub
config = SHConfig()
config.sh_client_id = os.getenv("SENTINELHUB_CLIENT_ID3")
config.sh_client_secret = os.getenv("SENTINELHUB_CLIENT_SECRET3")
config.sh_base_url = 'https://sh.dataspace.copernicus.eu'
config.sh_token_url = 'https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token'

# Define Sentinel-1 IW from CDSE endpoint
data_collection = DataCollection.SENTINEL1_IW.define_from(
    name="s1iw-cdse",
    service_url=config.sh_base_url
)

# Directories
patches_path = "../data/patches/highlighted_patches.geojson"
raw_dir = "../data/raw/sentinel1"
proc_dir = "../data/processed/sentinel1"
log_path = "../data/logs/patch_download_log.csv"
os.makedirs(raw_dir, exist_ok=True)
os.makedirs(proc_dir, exist_ok=True)
os.makedirs(os.path.dirname(log_path), exist_ok=True)

# Load patches
patches_gdf = gpd.read_file(patches_path)
print(f"Loaded {len(patches_gdf)} patches")

# Sort patches
patches_gdf["patch_index"] = patches_gdf["patch_id"].str.extract(r'patch_(\d{5})')[0].astype(int)
patches_gdf = patches_gdf.sort_values("patch_index").reset_index(drop=True)

# Resume support
start_from_patch = None
if start_from_patch and start_from_patch in patches_gdf["patch_id"].values:
    start_index = patches_gdf[patches_gdf["patch_id"] == start_from_patch].index[0]
    patches_gdf = patches_gdf.iloc[start_index:].reset_index(drop=True)

# Set parameters
resolution = 1024
time_interval = ("2010-01-01", "2030-12-31")
bands = ["VV"]
bands_units = None
bands_num = 1
sampleType = "FLOAT32"
catalog = SentinelHubCatalog(config=config)

# Evalscript for Sentinel-1 VV band - Removed units: "{bands_units}"
evalscript = f"""
//VERSION=3
function setup() {{
  return {{
    input: [{{
      bands: ["VV"],
    }}],
    output: {{
      bands: {bands_num},
      sampleType: "{sampleType}"
    }}
  }};
}}
function evaluatePixel(sample) {{
  return [sample.VV];
}}
"""

# Function to normalize SAR image
def normalize_sentinel1(image, p2=2, p98=98):
    image = image.astype(np.float32)
    norm_image = np.zeros_like(image)
    low_list, high_list = [], []
    for i in range(image.shape[2]):
        band = image[:, :, i]
        low, high = np.percentile(band, (p2, p98))
        band = np.clip(band, low, high)
        norm_image[:, :, i] = (band - low) / (high - low + 1e-6)
        low_list.append(float(low))
        high_list.append(float(high))
    return norm_image, low_list, high_list

# Progress tracking
n_patches = len(patches_gdf)
completed = 0
start_time = time.time()

# Main loop
for idx, row in patches_gdf.iterrows():
    patch_id = row["patch_id"]
    geom = row["geometry"]

    if geom.geom_type != "Polygon":
        print(f"Skipping non-polygon geometry in patch {patch_id}")
        continue

    patch_bbox = BBox(bbox=geom.bounds, crs=CRS.POP_WEB)
    print(f"\n[{idx+1}/{n_patches}] Processing {patch_id} with bbox {list(patch_bbox)}")

    try:
        search_iterator = catalog.search(
            data_collection,
            bbox=patch_bbox,
            time=time_interval,
            fields={"include": ["id", "properties.datetime"], "exclude": []}
        )
        timestamps = search_iterator.get_timestamps()
        unique_times = filter_times(timestamps, dt.timedelta(hours=6))

        if not unique_times:
            print(f"No acquisitions found for {patch_id}")
            continue

        client = SentinelHubDownloadClient(config=config)

        # Add before loop
        total_dates = len(unique_times)
        date_count = 0
        patch_start = time.time()

        for timestamp in unique_times:
            uid = str(uuid.uuid4())[:8]
            iso_time = timestamp.isoformat().replace(":", "").replace("-", "")
            fname_base = f"{patch_id}_{iso_time}_res{resolution}"
            raw_path = os.path.join(raw_dir, f"{fname_base}.tiff")
            proc_path = os.path.join(proc_dir, f"{fname_base}.tiff")
            json_path = os.path.join(proc_dir, f"{fname_base}.json")

            # if os.path.exists(raw_path) and os.path.exists(proc_path) and os.path.exists(json_path):
            #     print(f"Image already exists for {patch_id} at {timestamp}")
            #     continue

            if os.path.exists(raw_path) and os.path.exists(proc_path) and os.path.exists(json_path):
                print(f"[{date_count+1}/{total_dates}] ⏩ Skipped {timestamp} for {patch_id}")
                date_count += 1
                continue

            request = SentinelHubRequest(
                evalscript=evalscript,
                input_data=[
                    SentinelHubRequest.input_data(
                        data_collection=data_collection,
                        time_interval=(timestamp - dt.timedelta(hours=6), timestamp + dt.timedelta(hours=6))
                    )
                ],
                responses=[SentinelHubRequest.output_response("default", MimeType.TIFF)],
                bbox=patch_bbox,
                size=(resolution, resolution),
                config=config
            )

            image = None
            for attempt in range(5):
                try:
                    print(f"Downloading {patch_id} at {timestamp} (attempt {attempt+1})")
                    image = client.download(request.download_list[0])
                    break
                except Exception as e:
                    print(f"Attempt {attempt+1} failed: {e}")
                    time.sleep(2 ** attempt)

            if image is None:
                continue

            # Save raw image
            # Ensure image has 3 dimensions
            if image.ndim == 2:
                image = image[:, :, np.newaxis]

            image_raw = np.transpose(image, (2, 0, 1))
            tifffile.imwrite(raw_path, image_raw)
            print(f"Saved raw image: {raw_path}")

            # Normalize and save processed image
            norm_image, p2_list, p98_list = normalize_sentinel1(image)
            image_norm = np.transpose(norm_image, (2, 0, 1))
            tifffile.imwrite(proc_path, image_norm)
            print(f"Saved normalized image: {proc_path}")

            metadata = {
                "source": "SENTINEL1_IW",
                "patch_id": patch_id,
                "uuid": uid,
                "timestamp": timestamp.isoformat(),
                "resolution": resolution,
                "bands": bands,
                "units": bands_units,
                "sampleTypeRaw": sampleType,
                "sampleType": "float32",
                "normalization": {
                    "percentiles": [2, 98],
                    "p2": p2_list,
                    "p98": p98_list
                },
                "shape": image.shape,
                "file_raw": os.path.basename(raw_path),
                "file_processed": os.path.basename(proc_path)
            }

            with open(json_path, "w") as f:
                json.dump(metadata, f, indent=4)
                print(f"Saved metadata: {json_path}")

            date_count += 1
            patch_elapsed = time.time() - patch_start
            avg_per_date = patch_elapsed / date_count
            eta_dates_left = (total_dates - date_count) * avg_per_date

            print(f"[{date_count}/{total_dates}] ✅ Done {patch_id} @ {timestamp} | avg: {avg_per_date:.2f}s/date | ETA left: {eta_dates_left/60:.1f} min")
            # time.sleep(1)

    except Exception as e:
        print(f"Error processing {patch_id}: {e}")
        continue

    # Progress reporting
    completed += 1
    elapsed = time.time() - start_time
    avg_time = elapsed / max(completed, 1)
    remaining = (n_patches - (idx + 1)) * avg_time
    print(f"⏱ Completed {completed}/{n_patches} | ETA: {remaining/60:.1f} min")
    
    patch_elapsed = time.time() - patch_start  # <== fix
    print(f"🎉 Finished {patch_id}: {total_dates} timestamps processed in {patch_elapsed/60:.2f} min\n")

print("✅ Sentinel-1 patch processing complete")

Loaded 16 patches

[1/16] Processing patch_00032_5925311e with bbox [12807334.10496043, -969164.7850511064, 12817740.489491152, -958689.8491249421]
[1/896] ⏩ Skipped 2015-02-28 21:52:46+00:00 for patch_00032_5925311e
[2/896] ⏩ Skipped 2015-03-05 22:00:41+00:00 for patch_00032_5925311e
[3/896] ⏩ Skipped 2015-03-24 21:52:46+00:00 for patch_00032_5925311e
[4/896] ⏩ Skipped 2015-03-29 22:00:41+00:00 for patch_00032_5925311e
[5/896] ⏩ Skipped 2015-04-17 21:52:47+00:00 for patch_00032_5925311e
[6/896] ⏩ Skipped 2015-04-22 22:00:42+00:00 for patch_00032_5925311e
[7/896] ⏩ Skipped 2015-05-11 21:52:48+00:00 for patch_00032_5925311e
[8/896] ⏩ Skipped 2015-05-16 22:00:44+00:00 for patch_00032_5925311e
[9/896] ⏩ Skipped 2015-06-04 21:52:50+00:00 for patch_00032_5925311e
[10/896] ⏩ Skipped 2015-07-22 21:52:52+00:00 for patch_00032_5925311e
[11/896] ⏩ Skipped 2015-07-27 22:00:47+00:00 for patch_00032_5925311e
[12/896] ⏩ Skipped 2015-08-15 21:52:53+00:00 for patch_00032_5925311e
[13/896] ⏩ Skipped 20



Saved raw image: ../data/raw/sentinel1/patch_00044_4359be37_20200526T215234+0000_res1024.tiff
Saved normalized image: ../data/processed/sentinel1/patch_00044_4359be37_20200526T215234+0000_res1024.tiff
Saved metadata: ../data/processed/sentinel1/patch_00044_4359be37_20200526T215234+0000_res1024.json
[439/906] ✅ Done patch_00044_4359be37 @ 2020-05-26 21:52:34+00:00 | avg: 0.04s/date | ETA left: 0.3 min
Downloading patch_00044_4359be37 at 2020-05-29 10:41:26+00:00 (attempt 1)
Saved raw image: ../data/raw/sentinel1/patch_00044_4359be37_20200529T104126+0000_res1024.tiff
Saved normalized image: ../data/processed/sentinel1/patch_00044_4359be37_20200529T104126+0000_res1024.tiff
Saved metadata: ../data/processed/sentinel1/patch_00044_4359be37_20200529T104126+0000_res1024.json
[440/906] ✅ Done patch_00044_4359be37 @ 2020-05-29 10:41:26+00:00 | avg: 0.07s/date | ETA left: 0.5 min
Downloading patch_00044_4359be37 at 2020-06-01 21:53:11+00:00 (attempt 1)
Saved raw image: ../data/raw/sentinel1/patch