# Collect sentinel-1 rtc data from Planetary Computer

In [2]:
import geopandas as gpd
import planetary_computer
import pystac_client
import rioxarray
import numpy as np
import dask.array as da
from pathlib import Path
import warnings
import logging
from rasterio.errors import RasterioIOError
from functools import lru_cache
import os
from datetime import datetime
from tqdm import tqdm
import warnings


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.0.0 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages

AttributeError: _ARRAY_API not found

In [3]:
warnings.filterwarnings('ignore')

In [4]:
# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    handlers=[
        logging.FileHandler("sentinel1_processing.log"),
        logging.StreamHandler()
    ]
)
logger = logging.getLogger(__name__)

In [5]:
# Paths and parameters
shapefile_path = "C:/Users/Jado/Documents/DFGF/Virunga_massif/Virunga_32735_massif.shp"
output_dir = Path("G:/Semester4/Innolab/eoAPI/data/sentinel-1-rtc")
start_date = "2024-06-01"
end_date = "2024-08-31"
polarizations = ["vv", "vh"]
chunk_size = 2048 

In [6]:
# Ensure output directories exist
output_dir.mkdir(parents=True, exist_ok=True)

In [7]:
def load_aoi():
    logger.info(f"Loading shapefile from {shapefile_path}")
    gdf = gpd.read_file(shapefile_path)
    if gdf.crs is None:
        raise ValueError("Shapefile has no CRS information")
    
    # Convert to WGS84 (EPSG:4326) for STAC API
    gdf = gdf.to_crs(epsg=4326)
    logger.info(f"Loaded shapefile with {len(gdf)} features, CRS: {gdf.crs}")
    return gdf

#### Connect to the planetary computer stac catalog

In [9]:
def connect_to_catalog():
    try:
        catalog = pystac_client.Client.open(
            "https://planetarycomputer.microsoft.com/api/stac/v1",
            modifier=planetary_computer.sign_inplace,
        )
        logger.info("✅ Connected to STAC catalog")
        return catalog
    except Exception as e:
        logger.error(f"Failed to connect to STAC catalog: {e}")
        raise

### Search for sentinel-1 rtc items within the defined area of interest and time range

In [11]:
def search_sentinel1(catalog, aoi_geojson, time_range):
    try:
        search = catalog.search(
            collections=["sentinel-1-rtc"],
            intersects=aoi_geojson,
            datetime=time_range
        )
        items = list(search.get_all_items())
        logger.info(f"🔍 Found {len(items)} Sentinel-1 items for {time_range}")
        
        # Sort by datetime
        sorted_items = sorted(items, key=lambda x: x.properties["datetime"])
        
        # Log item details
        for item in sorted_items:
            date = item.properties["datetime"].split("T")[0]
            orbit = item.properties.get("sat:orbit_state", "unknown")
            logger.debug(f"Item: {item.id}, Date: {date}, Orbit: {orbit}")
            
        return sorted_items
    except Exception as e:
        logger.error(f"Error searching for Sentinel-1 data: {e}")
        raise

### Get a signed url for accessing the datasets 

In [13]:
@lru_cache(maxsize=None)
def get_signed_url(href):
    return planetary_computer.sign(href)

### Clipping to the aoi 

In [24]:
def process_and_clip_band(signed_url, output_path, gdf, crs_target="EPSG:4326"):
    try:
        with rioxarray.open_rasterio(signed_url, masked=True, chunks={"x": chunk_size, "y": chunk_size}) as ds:
            # Check if we need to reproject
            if ds.rio.crs != crs_target:
                ds_repr = ds.rio.reproject(crs_target)
            else:
                ds_repr = ds
                
            # Clip to geometry
            clipped = ds_repr.rio.clip(gdf.geometry, gdf.crs, all_touched=True, drop=True)
            
            # Force computation with Dask
            clipped = clipped.compute()
            
            # Save to file
            clipped.rio.to_raster(output_path)
            logger.info(f"✅ Processed and saved: {output_path}")
            return True
    except Exception as e:
        logger.error(f"❌ Error processing band: {e}")
        return False

In [26]:
def main():
    start_time = datetime.now()
    logger.info(f"Starting Sentinel-1 RTC data collection at {start_time}")
    
    try:
        # Load AOI
        gdf = load_aoi()
        aoi_geojson = gdf.unary_union.__geo_interface__
        
        # Connect to catalog
        catalog = connect_to_catalog()
        
        # Search for items
        time_range = f"{start_date}/{end_date}"
        items = search_sentinel1(catalog, aoi_geojson, time_range)
        
        if not items:
            logger.warning("No Sentinel-1 items found for the specified parameters")
            return
        
        # Process each item with progress bar
        for item in tqdm(items, desc="Processing items"):
            item_dir = output_dir / item.id
            item_dir.mkdir(parents=True, exist_ok=True)
            
            orbit_direction = item.properties.get("sat:orbit_state", "unknown")
            date = item.properties["datetime"].split("T")[0]
            
            logger.info(f"Processing item {item.id} from {date} (orbit: {orbit_direction})")
            
            # Process each polarization
            for polarization in polarizations:
                asset = item.assets.get(polarization)
                if not asset:
                    logger.warning(f"No {polarization} asset found for item {item.id}")
                    continue
                
                output_path = item_dir / f"sentinel1_{polarization}_{date}.tif"
                
                # Skip if already exists
                if output_path.exists():
                    logger.info(f"📁 Skipping (already exists): {output_path}")
                    continue
                
                # Get signed URL and process
                signed_url = get_signed_url(asset.href)
                success = process_and_clip_band(signed_url, output_path, gdf)
                
                if not success:
                    logger.warning(f"Failed to process {polarization} for item {item.id}")
        
        end_time = datetime.now()
        duration = end_time - start_time
        logger.info(f"Processing completed in {duration}")
        
    except Exception as e:
        logger.error(f"An error occurred during processing: {e}", exc_info=True)

In [28]:
if __name__ == "__main__":
    main()

2025-03-12 19:48:49,920 - INFO - Starting Sentinel-1 RTC data collection at 2025-03-12 19:48:49.920356
2025-03-12 19:48:49,924 - INFO - Loading shapefile from C:/Users/Jado/Documents/DFGF/Virunga_massif/Virunga_32735_massif.shp
2025-03-12 19:48:50,684 - INFO - Loaded shapefile with 1 features, CRS: EPSG:4326
--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 33: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py