# Collect sentinel-1 rtc data from Planetary Computer

In [1]:
import geopandas as gpd
import planetary_computer
import pystac_client
import rioxarray
import numpy as np
import dask.array as da
from pathlib import Path
import warnings
import logging
from rasterio.errors import RasterioIOError
from functools import lru_cache
import os
import time 
from datetime import datetime
from tqdm import tqdm
import warnings

In [2]:
warnings.filterwarnings('ignore')

In [3]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("sentinel1_processing.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [4]:
# Paths and parameters
shapefile_path = "C:/Users/Jado/Documents/DFGF/Virunga_massif/Virunga_32735_massif.shp"
output_dir = Path("G:/Semester4/Innolab/eoAPI/data/Sentinel-1-RTC")
start_date = "2024-01-01"
end_date = "2024-12-31"
polarizations = ["vv", "vh"]
chunk_size = 2048 
max_retries = 5
initial_delay = 2

In [5]:
# Ensure output directories exist
output_dir.mkdir(parents=True, exist_ok=True)

In [6]:
def load_aoi():
    logger.info(f"Loading shapefile from {shapefile_path}")
    try:
        gdf = gpd.read_file(shapefile_path)
        if gdf.crs is None:
            raise ValueError("Shapefile has no CRS information")
        
        # Convert to WGS84 (EPSG:4326) for STAC API
        gdf = gdf.to_crs(epsg=4326)
        logger.info(f"Loaded shapefile with {len(gdf)} features, CRS: {gdf.crs}")
        return gdf
    except Exception as e:
        logger.error(f"Failed to load shapefile: {e}")
        raise

#### Connect to the planetary computer stac catalog

In [7]:
def connect_to_catalog(max_retries=5, initial_delay=2):
    retries = 0
    while retries < max_retries:
        try:
            catalog = pystac_client.Client.open(
                "https://planetarycomputer.microsoft.com/api/stac/v1",
                modifier=planetary_computer.sign_inplace,
            )
            logger.info("Successfully connected to STAC catalog")
            return catalog
        except Exception as e:
            retries += 1
            wait_time = initial_delay * (2 ** (retries - 1))  # Exponential backoff
            logger.warning(f"Connection attempt {retries} failed: {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
    
    raise ConnectionError("Failed to connect to STAC catalog after maximum retries")

### Search for sentinel-1 rtc items within the defined area of interest and time range

In [8]:
def search_sentinel1(catalog, aoi_geojson, time_range, max_retries=5, initial_delay=2):
    retries = 0
    while retries < max_retries:
        try:
            search = catalog.search(
                collections=["sentinel-1-rtc"],
                intersects=aoi_geojson,
                datetime=time_range
            )
            items = list(search.get_all_items())
            logger.info(f"🔍 Found {len(items)} Sentinel-1 items for {time_range}")
            
            # Sort by datetime
            sorted_items = sorted(items, key=lambda x: x.properties["datetime"])
            
            # Log item details
            for item in sorted_items:
                date = item.properties["datetime"].split("T")[0]
                orbit = item.properties.get("sat:orbit_state", "unknown")
                logger.debug(f"Item: {item.id}, Date: {date}, Orbit: {orbit}")
                
            return sorted_items
        except Exception as e:
            retries += 1
            wait_time = initial_delay * (2 ** (retries - 1))
            logger.warning(f"Search attempt {retries} failed: {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
    
    raise ConnectionError("Failed to search items after maximum retries")

### Get a signed url for accessing the datasets 

In [9]:
@lru_cache(maxsize=None)
def get_signed_url(href):
    try:
        return planetary_computer.sign(href)
    except Exception as e:
        logger.error(f"Failed to get signed URL for {href}: {e}")
        raise

### Clipping to the aoi 

In [10]:
def process_and_clip_band(signed_url, output_path, gdf, crs_target="EPSG:4326", max_retries=3):
    retries = 0
    while retries < max_retries:
        try:
            with rioxarray.open_rasterio(signed_url, masked=True, chunks={"x": chunk_size, "y": chunk_size}) as ds:
                # Check if we need to reproject
                if ds.rio.crs != crs_target:
                    ds_repr = ds.rio.reproject(crs_target)
                else:
                    ds_repr = ds
                    
                # Clip to geometry
                clipped = ds_repr.rio.clip(gdf.geometry, gdf.crs, all_touched=True, drop=True)
                
                # Force computation with Dask
                clipped = clipped.compute()
                
                # Save to file
                clipped.rio.to_raster(output_path)
                logger.info(f"✅ Processed and saved: {output_path}")
                return True
        except RasterioIOError as e:
            retries += 1
            wait_time = 2 * (2 ** (retries - 1))
            logger.warning(f"IO error processing band (attempt {retries}/{max_retries}): {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
        except Exception as e:
            logger.error(f"❌ Error processing band: {e}")
            return False
    
    logger.error(f"Failed to process band after {max_retries} attempts")
    return False

### Main execution 

In [11]:
start_time = datetime.now()
logger.info(f"Starting Sentinel-1 RTC data collection at {start_time}")

2025-03-23 22:17:49,401 - INFO - Starting Sentinel-1 RTC data collection at 2025-03-23 22:17:49.401128


In [12]:
try:
    # Load AOI
    gdf = load_aoi()
    aoi_geojson = gdf.unary_union.__geo_interface__
    
    # Connect to catalog
    catalog = connect_to_catalog(max_retries=max_retries, initial_delay=initial_delay)
    
    # Search for items
    time_range = f"{start_date}/{end_date}"
    items = search_sentinel1(catalog, aoi_geojson, time_range, max_retries=max_retries, initial_delay=initial_delay)
    
    if not items:
        logger.warning("No Sentinel-1 items found for the specified parameters")
    else:
        # Process each item with progress bar
        successful_items = 0
        failed_items = 0
        skipped_items = 0
        
        for item in tqdm(items, desc="Processing items"):
            item_dir = output_dir / item.id
            item_dir.mkdir(parents=True, exist_ok=True)
            
            orbit_direction = item.properties.get("sat:orbit_state", "unknown")
            date = item.properties["datetime"].split("T")[0]
            
            logger.info(f"Processing item {item.id} from {date} (orbit: {orbit_direction})")
            
            item_success = True
            item_skipped = True
            
            # Process each polarization
            for polarization in polarizations:
                asset = item.assets.get(polarization)
                if not asset:
                    logger.warning(f"No {polarization} asset found for item {item.id}")
                    continue
                
                output_path = item_dir / f"sentinel1_{polarization}_{date}.tif"
                
                # Skip if already exists
                if output_path.exists():
                    logger.info(f"📁 Skipping (already exists): {output_path}")
                    continue
                
                item_skipped = False  # At least one band is being processed
                
                # Get signed URL and process
                try:
                    signed_url = get_signed_url(asset.href)
                    success = process_and_clip_band(signed_url, output_path, gdf, max_retries=max_retries)
                    
                    if not success:
                        logger.warning(f"Failed to process {polarization} for item {item.id}")
                        item_success = False
                except Exception as e:
                    logger.error(f"Error getting signed URL or processing {polarization} for item {item.id}: {e}")
                    item_success = False
            
            if item_skipped:
                skipped_items += 1
            elif item_success:
                successful_items += 1
            else:
                failed_items += 1
        
        logger.info(f"Processing summary: {successful_items} successful, {failed_items} failed, {skipped_items} skipped items")
    
    end_time = datetime.now()
    duration = end_time - start_time
    logger.info(f"Processing completed in {duration}")
    
except Exception as e:
    logger.error(f"An error occurred during processing: {e}", exc_info=True)

2025-03-23 22:17:49,918 - INFO - Loading shapefile from C:/Users/Jado/Documents/DFGF/Virunga_massif/Virunga_32735_massif.shp
2025-03-23 22:17:50,005 - INFO - Loaded shapefile with 1 features, CRS: EPSG:4326
2025-03-23 22:17:50,164 - INFO - Successfully connected to STAC catalog
--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\U0001f50d' in position 33: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
   