# Collecting Sentinel-2-2lA for the Virunga Volcanoes Massif

#### Import libraries 

In [1]:
import os
import planetary_computer
import pystac_client
import json
from datetime import datetime
from pathlib import Path
import rioxarray
import geopandas as gpd
from dask.diagnostics import ProgressBar
import warnings
import time
import logging
import xarray as xr
import numpy as np
import requests
from requests.exceptions import RequestException
from tqdm import tqdm

In [2]:
warnings.filterwarnings("ignore")

In [3]:
# Set up logging
logging.basicConfig(level=logging.INFO, 
                    format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[
                        logging.FileHandler("sentinel_download.log"),
                        logging.StreamHandler()
                    ])
logger = logging.getLogger(__name__)

### Define parameters 

In [4]:
bbox = [29.339450987927847, -1.568347900000008, 
        29.706553166843705, -1.3332541178785493]

start_date = "2024-01-01"
end_date = "2024-12-31"
output_dir = Path("G:/Semester4/Innolab/eoAPI/data/Sentinel-2-L2A")

In [7]:
# Create output directory
output_dir.mkdir(parents=True, exist_ok=True)

In [8]:
shapefile_path = "C:/Users/Jado/Documents/DFGF/Virunga_massif/Virunga_32735_massif.shp"
gdf = gpd.read_file(shapefile_path)
gdf = gdf.to_crs(epsg=4326)

In [9]:
# Define bands with their properties
BANDS = {
    'B01': {'alias': 'coastal', 'resolution': '60m'},
    'B02': {'alias': 'blue', 'resolution': '10m'},
    'B03': {'alias': 'green', 'resolution': '10m'},
    'B04': {'alias': 'red', 'resolution': '10m'},
    'B05': {'alias': 'rededge1', 'resolution': '20m'},
    'B06': {'alias': 'rededge2', 'resolution': '20m'},
    'B07': {'alias': 'rededge3', 'resolution': '20m'},
    'B08': {'alias': 'nir', 'resolution': '10m'},
    'B8A': {'alias': 'narrow_nir', 'resolution': '20m'},
    'B09': {'alias': 'water_vapor', 'resolution': '60m'},
    'B11': {'alias': 'swir1', 'resolution': '20m'},
    'B12': {'alias': 'swir2', 'resolution': '20m'},
    'SCL': {'alias': 'scene_classification', 'resolution': '20m'}
}

### Function to handle the API connection with retries 

In [10]:
def get_catalog_with_retry(max_retries=5, initial_delay=2):
    retries = 0
    while retries < max_retries:
        try:
            catalog = pystac_client.Client.open(
                "https://planetarycomputer.microsoft.com/api/stac/v1",
                modifier=planetary_computer.sign_inplace,
            )
            return catalog
        except Exception as e:
            retries += 1
            wait_time = initial_delay * (2 ** (retries - 1))  # Exponential backoff
            logger.warning(f"Connection attempt {retries} failed: {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
    
    raise ConnectionError("Failed to connect to STAC catalog after maximum retries")

### Function to search for items with retry logic 

In [11]:
def search_items_with_retry(catalog, max_retries=5, initial_delay=2):
    retries = 0
    while retries < max_retries:
        try:
            search = catalog.search(
                collections=["sentinel-2-l2a"],
                bbox=bbox,
                datetime=f"{start_date}/{end_date}",
                query={"eo:cloud_cover": {"lt": 30}},
            )
            items = list(search.get_all_items())
            return items
        except Exception as e:
            retries += 1
            wait_time = initial_delay * (2 ** (retries - 1))
            logger.warning(f"Search attempt {retries} failed: {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
    
    raise ConnectionError("Failed to search items after maximum retries")

In [12]:
# Function to check if a band has already been successfully processed
def is_band_processed(item_dir, band):
    band_file = item_dir / f"{band}.tif"
    if band_file.exists() and band_file.stat().st_size > 0:
        try:
            # Verify the file is valid by attempting to open it
            with rioxarray.open_rasterio(str(band_file)) as ds:
                if ds.shape[1] > 0 and ds.shape[2] > 0:  # Check dimensions
                    return True
        except Exception:
            # If there's an error opening the file, it's corrupted
            return False
    return False

### Function to process and clip a band with retries 

In [13]:
def process_and_clip_band(signed_url, output_path, gdf, band, max_retries=3, crs_target="EPSG:4326"):
    retries = 0
    while retries < max_retries:
        try:
            # First check if we can access the URL
            head_resp = requests.head(signed_url)
            if head_resp.status_code != 200:
                logger.warning(f"URL check failed for {band}: HTTP code {head_resp.status_code}")
                # Get a fresh signed URL
                new_url = planetary_computer.sign(signed_url.split('?')[0])
                signed_url = new_url
                retries += 1
                continue
            
            # Use smaller chunks for more manageable memory usage
            chunk_size = 1024 if band in ['B01', 'B09', 'SCL'] else 2048  # Smaller chunks for lower resolution bands
            with rioxarray.open_rasterio(signed_url, chunks={'x': chunk_size, 'y': chunk_size}) as ds:
                # Reproject to target CRS
                ds_repr = ds.rio.reproject(crs_target)
                
                # Clip with shapefile
                clipped = ds_repr.rio.clip(gdf.geometry, gdf.crs, all_touched=True, drop=True)
                
                # Save to file
                clipped.rio.to_raster(output_path)
                # Verify file was created successfully
                if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
                    return True
                else:
                    logger.warning(f"Output file {output_path} is empty or not created")
                    retries += 1
        except Exception as e:
            retries += 1
            wait_time = 2 * retries
            logger.warning(f"Processing attempt {retries} for {band} failed: {e}. Retrying in {wait_time}s...")
            time.sleep(wait_time)
    
    logger.error(f"Failed to process {band} after maximum retries")
    return False

### Function to get band file paths that were successfully processed 

In [14]:
def get_processed_band_files(item_dir):
    band_files = {}
    for band in BANDS.keys():
        band_file = item_dir / f"{band}.tif"
        if band_file.exists() and band_file.stat().st_size > 0:
            try:
                # Verify the file is valid
                with rioxarray.open_rasterio(str(band_file)) as ds:
                    if ds.shape[1] > 0 and ds.shape[2] > 0:
                        band_files[band] = str(band_file.name)
            except Exception:
                logger.warning(f"Skipping corrupted band file: {band_file}")
    return band_files

### Main execution 

In [None]:
try:
    # Connect to catalog with retry logic
    logger.info("Connecting to STAC catalog...")
    catalog = get_catalog_with_retry()
    logger.info("✅ Connected to STAC catalog")
    
    # Search for Sentinel-2 data with retry logic
    logger.info(f"Searching for Sentinel-2 data between {start_date} and {end_date}...")
    items = search_items_with_retry(catalog)
    logger.info(f"🔍 Found {len(items)} items")
    
    # Save the list of items for potential later use
    with open(output_dir / "item_list.json", "w") as f:
        # Convert items to a serializable format
        item_list = [{"id": item.id, "datetime": item.datetime.isoformat(), "cloud_cover": item.properties.get("eo:cloud_cover")} for item in items]
        json.dump(item_list, f, indent=2)
    
    # Process each item
    for idx, item in enumerate(items, 1):
        logger.info(f"\n📦 Processing item {idx}/{len(items)}: {item.id}")
        
        # Create directory for this scene
        item_dir = output_dir / item.id
        item_dir.mkdir(parents=True, exist_ok=True)
        
        # Process each band
        successfully_processed = 0
        already_processed = 0
        failed_bands = []
        
        for band, band_info in BANDS.items():
            # Check if band already processed successfully
            if is_band_processed(item_dir, band):
                logger.info(f"  ✓ Band {band} ({band_info['alias']}) already processed, skipping")
                already_processed += 1
                continue
                
            logger.info(f"  Processing {band} ({band_info['alias']})...")
            
            asset = item.assets.get(band)
            if not asset:
                logger.warning(f"⚠️ Missing band {band}")
                failed_bands.append(band)
                continue
            
            signed_url = planetary_computer.sign(asset.href)
            output_file = item_dir / f"{band}.tif"
            
            with ProgressBar():
                if process_and_clip_band(signed_url, str(output_file), gdf, band):
                    logger.info(f"  ✅ Saved clipped {band}.tif")
                    successfully_processed += 1
                else:
                    logger.error(f"  ❌ Failed to process {band}")
                    failed_bands.append(band)
        
        # Log summary for this item
        logger.info(f"  Summary for {item.id}:")
        logger.info(f"    - Already processed: {already_processed} bands")
        logger.info(f"    - Successfully processed: {successfully_processed} bands")
        logger.info(f"    - Failed: {len(failed_bands)} bands")
        if failed_bands:
            logger.info(f"    - Failed bands: {', '.join(failed_bands)}")
            
        # If there are many failures, pause before the next item to avoid rate limiting
        if len(failed_bands) > len(BANDS) / 2:
            pause_time = 30
            logger.info(f"  Many failures detected, pausing for {pause_time} seconds before next item...")
            time.sleep(pause_time)
    
    logger.info("\n🎉 Processing complete!")
    
    # Final report
    all_processed_scenes = list(output_dir.glob("S2*"))
    logger.info(f"Total processed scenes: {len(all_processed_scenes)}")
    
except Exception as e:
    logger.error(f"Critical error in main execution: {e}", exc_info=True)

2025-03-22 18:49:49,350 - INFO - Connecting to STAC catalog...
--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 33: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\ker

[########################################] | 100% Completed | 2.65 sms


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[########################################] | 100% Completed | 85.17 s


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[########################################] | 100% Completed | 57.64 s


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[########################################] | 100% Completed | 56.16 s


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[########################################] | 100% Completed | 13.37 s


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[########################################] | 100% Completed | 14.78 s


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[########################################] | 100% Completed | 15.07 s


--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Jado\anaconda3\Lib\logging\__init__.py", line 1113, in emit
    stream.write(msg + self.terminator)
  File "C:\Users\Jado\anaconda3\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u2705' in position 35: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "C:\Users\Jado\anaconda3\Lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File 

[###############################         ] | 77% Completed | 50.33 ss