In [9]:
# Parameters
region_name = None  # Will be injected by papermill
output_dir = None   # Will be injected by papermill 
parameters = {"region_name": region_name, "output_dir": output_dir}

In [7]:
{
    "metadata": {
        "kernelspec": {
            "display_name": "Python 3",
            "language": "python",
            "name": "python3"
        },
        "language_info": {
            "codemirror_mode": {
                "name": "ipython",
                "version": 3
            },
            "file_extension": ".py",
            "mimetype": "text/x-python",
            "name": "python",
            "nbconvert_exporter": "python",
            "pygments_lexer": "ipython3",
            "version": "3.12.6"
        }
    }
}

{'metadata': {'kernelspec': {'display_name': 'Python 3',
   'language': 'python',
   'name': 'python3'},
  'language_info': {'codemirror_mode': {'name': 'ipython', 'version': 3},
   'file_extension': '.py',
   'mimetype': 'text/x-python',
   'name': 'python',
   'nbconvert_exporter': 'python',
   'pygments_lexer': 'ipython3',
   'version': '3.12.6'}}}

# Sentinel-2 Data Download

This notebook handles the download of Sentinel-2 satellite imagery for bathymetry analysis. It uses the Copernicus Data Space API to search and download appropriate scenes based on the configured area of interest.

## Prerequisites
- Copernicus Data Space OAuth2 credentials in `sentinel_api_config.json`
- Required Python packages: sentinelsat, geopandas, folium
- Configured location_config.json file

In [None]:
# Import required libraries
import os
import sys
import json
import logging
from pathlib import Path
from datetime import datetime, timedelta
import folium
import geopandas as gpd
from shapely.geometry import box
from sentinelsat import SentinelAPI, geojson_to_wkt

# Set up logging
log_dir = Path(os.getcwd()).parent / 'logs'
log_dir.mkdir(parents=True, exist_ok=True)
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler(log_dir / 'pipeline_log.txt'),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

def verify_api_credentials(config_path: Path) -> dict:
    """
    Verify Copernicus API credentials from config file
    Returns the credentials if valid, otherwise raises ValueError
    """
    try:
        with open(config_path) as f:
            creds = json.load(f)
            
        # Check for OAuth2 credentials (preferred)
        if creds.get('client_id') and creds.get('client_secret'):
            logger.info("[OK] Copernicus API credentials verified and loaded from sentinel_api_config.json")
            return creds
        
        # Check for username/password credentials (fallback)
        if creds.get('username') and creds.get('password'):
            logger.info("[OK] Copernicus API credentials verified and loaded from sentinel_api_config.json")
            return creds
            
        raise ValueError("Missing both OAuth2 (client_id/client_secret) and username/password credentials")
        
    except FileNotFoundError:
        logger.error(f"[ERROR] Sentinel API config not found at: {config_path}")
        raise
    except json.JSONDecodeError:
        logger.error(f"[ERROR] Invalid JSON in sentinel_api_config.json")
        raise
    except Exception as e:
        logger.error(f"[ERROR] Error loading credentials: {str(e)}")
        raise

# Project setup
project_root = Path(os.getcwd()).parent
config_paths = [
    project_root.parent / 'sentinel2_pipeline' / 'config' / 'sentinel_api_config.json',
    project_root / 'config' / 'sentinel_api_config.json'
]

# Try to find and verify credentials
api_config = None
for config_path in config_paths:
    if config_path.exists():
        try:
            api_config = verify_api_credentials(config_path)
            print(f"[OK] Loaded Copernicus credentials from {config_path}")
            break
        except Exception as e:
            print(f"[ERROR] Failed to load credentials from {config_path}: {str(e)}")
            continue

if not api_config:
    error_message = """
[ERROR] Missing or invalid Copernicus API credentials

To fix this:
1. Create a config/sentinel_api_config.json file with your Copernicus credentials:
{
    "client_id": "your_client_id",      // Your Copernicus Data Space client ID
    "client_secret": "your_secret",     // Your Copernicus Data Space client secret
    "token_url": "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token"
}

OR (legacy):
{
    "username": "your_username",  // Your Copernicus Data Space username
    "password": "your_password"   // Your Copernicus Data Space password 
}

2. Make sure you have:
   - Registered at https://dataspace.copernicus.eu/
   - Verified your email address
   - Accepted the Terms and Conditions

Need help? Visit: https://dataspace.copernicus.eu/user-guides/getting-started/step-by-step-guide
"""
    print(error_message)
    raise RuntimeError("No valid API credentials found")

# Initialize paths
config_dir = project_root / 'config'
data_dir = project_root / 'data' / 'sentinel'
location_config_path = config_dir / 'location_config.json'

print(f"\nProject Configuration:")
print(f"Project root: {project_root}")
print(f"Config directory: {config_dir}")
print(f"Data directory: {data_dir}")
print(f"Location config: {location_config_path}")

# Initialize API with credentials
try:
    if api_config.get('client_id') and api_config.get('client_secret'):
        # OAuth2 authentication
        import requests
        
        # Get access token
        token_response = requests.post(
            api_config['token_url'],
            data={
                'grant_type': 'client_credentials',
                'client_id': api_config['client_id'],
                'client_secret': api_config['client_secret']
            }
        )
        token_response.raise_for_status()
        access_token = token_response.json()['access_token']
        
        # Initialize API with token
        api = SentinelAPI(
            None, None,
            'https://catalogue.dataspace.copernicus.eu/api/hub',
            show_progressbars=True,
            api_token=access_token
        )
        print("\n[OK] Successfully connected to Copernicus Data Space API using OAuth2")
    else:
        # Username/password authentication
        api = SentinelAPI(
            api_config['username'],
            api_config['password'],
            'https://apihub.copernicus.eu/apihub/', 
            show_progressbars=True
        )
        print("\n[OK] Successfully connected to Copernicus Data Space API")
        
except Exception as e:
    print(f"[ERROR] Failed to connect to API: {str(e)}")
    raise

2025-11-10 22:08:00,554 [ERROR] ‚ùå Error loading credentials: Missing username in config
‚ùå Failed to load credentials from d:\Project\sentinel2_pipeline\config\sentinel_api_config.json: Missing username in config
2025-11-10 22:08:00,557 [INFO] ‚úÖ Copernicus API credentials verified and loaded from sentinel_api_config.json
‚úÖ Loaded Copernicus credentials from d:\Project\sdb_project\config\sentinel_api_config.json

Project Configuration:
Project root: d:\Project\sdb_project
Config directory: d:\Project\sdb_project\config
Data directory: d:\Project\sdb_project\data\sentinel
Location config: d:\Project\sdb_project\config\location_config.json

‚úÖ Successfully connected to Copernicus Data Space API
‚ùå Failed to load credentials from d:\Project\sentinel2_pipeline\config\sentinel_api_config.json: Missing username in config
2025-11-10 22:08:00,557 [INFO] ‚úÖ Copernicus API credentials verified and loaded from sentinel_api_config.json
‚úÖ Loaded Copernicus credentials from d:\Project\sdb

--- Logging error ---
Traceback (most recent call last):
  File "C:\Users\Bhuvan\AppData\Local\Temp\ipykernel_3772\1052044263.py", line 37, in verify_api_credentials
    raise ValueError("Missing username in config")
ValueError: Missing username in config

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Python312\Lib\logging\__init__.py", line 1163, in emit
    stream.write(msg + self.terminator)
  File "C:\Python312\Lib\encodings\cp1252.py", line 19, in encode
    return codecs.charmap_encode(input,self.errors,encoding_table)[0]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
UnicodeEncodeError: 'charmap' codec can't encode character '\u274c' in position 32: character maps to <undefined>
Call stack:
  File "<frozen runpy>", line 198, in _run_module_as_main
  File "<frozen runpy>", line 88, in _run_code
  File "d:\Project\.venv\Lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.lau

# Sentinel-2 Data Download

This notebook handles the download of Sentinel-2 satellite imagery for bathymetry analysis. It uses the Copernicus Data Space API to search and download appropriate scenes based on the configured area of interest.

## Prerequisites
- Copernicus Data Space account credentials
- Required Python packages: sentinelsat, geopandas, folium
- Configured location_config.json file

## 1. Set up API Authentication

We'll first check for existing credentials in the configuration directory. If not found, we'll prompt for new credentials.

In [None]:
def get_auth_credentials():
    """Get or create Copernicus Data Space credentials"""
    if not auth_path.exists():
        print("\n[WARN] Copernicus authentication required!")
        print("Please enter your Copernicus Data Space credentials:")
        username = input("Username: ")
        password = input("Password: ")
        
        auth_data = {
            'username': username,
            'password': password
        }
        
        auth_path.parent.mkdir(parents=True, exist_ok=True)
        with open(auth_path, 'w') as f:
            json.dump(auth_data, f)
        
        print("[OK] Credentials saved to config/copernicus_auth.json")
        return auth_data
    else:
        with open(auth_path) as f:
            return json.load(f)

# Get credentials and initialize API
auth = get_auth_credentials()
api = SentinelAPI(
    auth['username'],
    auth['password'],
    'https://catalogue.dataspace.copernicus.eu/api/hub/'
)
print("[OK] Successfully connected to Copernicus Data Space API")

NameError: name 'auth_path' is not defined

## 2. Define Area of Interest

Load the area of interest from the location configuration file and create an interactive map visualization.

In [11]:
# Load location configuration
with open(location_config_path) as f:
    config = json.load(f)

# Create bounding box
bbox = box(
    config['aoi']['min_lon'],
    config['aoi']['min_lat'],
    config['aoi']['max_lon'],
    config['aoi']['max_lat']
)
bbox_geojson = gpd.GeoSeries([bbox]).__geo_interface__

# Create interactive map
center_lat = (config['aoi']['min_lat'] + config['aoi']['max_lat']) / 2
center_lon = (config['aoi']['min_lon'] + config['aoi']['max_lon']) / 2

m = folium.Map(location=[center_lat, center_lon], zoom_start=10)
folium.GeoJson(
    bbox_geojson,
    style_function=lambda x: {
        'color': 'red',
        'weight': 2,
        'fillOpacity': 0.1
    }
).add_to(m)

# Save and display map
map_path = project_root / 'temp' / 'aoi_map.html'
map_path.parent.mkdir(parents=True, exist_ok=True)
m.save(str(map_path))

# Print AOI details
print(f"Region: {config['region_name']}")
print(f"Area of Interest:")
print(f"  Latitude:  {config['aoi']['min_lat']}¬∞ to {config['aoi']['max_lat']}¬∞")
print(f"  Longitude: {config['aoi']['min_lon']}¬∞ to {config['aoi']['max_lon']}¬∞")

# Display map
from IPython.display import IFrame
IFrame(src=str(map_path), width='100%', height=500)

Region: Lakshadweep
Area of Interest:
  Latitude:  10.75¬∞ to 10.95¬∞
  Longitude: 72.35¬∞ to 72.65¬∞


## 3. Query Available Sentinel-2 Images

Search for Sentinel-2 Level-2A products over our area of interest with low cloud coverage.

In [None]:
import pandas as pd
import requests

# Set search parameters
footprint = geojson_to_wkt(bbox_geojson)
end_date = datetime.utcnow()
start_date = end_date - timedelta(days=project_root.joinpath('config').exists() and 180 or 180)  # Last 6 months

print("[SEARCH] Searching for Sentinel-2 products...")
print(f"Date range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")

products_df = pd.DataFrame()

# First, try sentinelsat (legacy) - may fail
try:
    products = api.query(
        footprint,
        date=(start_date, end_date),
        platformname='Sentinel-2',
        processinglevel='Level-2A',
        cloudcoverpercentage=tuple(api_config.get('download_options', {}).get('cloudcoverpercentage', (0,20)))
    )
    products_df = api.to_dataframe(products)
    if not products_df.empty and 'cloudcoverpercentage' in products_df:
        products_df = products_df.sort_values('cloudcoverpercentage')
    logger.info('Primary sentinelsat query succeeded')

except Exception as sent_err:
    logger.warning(f"Primary sentinelsat query failed: {sent_err}")

    # Attempt Dataspace OAuth2 token exchange and REST search
    try:
        cfg = project_root / 'config' / 'sentinel_api_config.json'
        with open(cfg) as f:
            ds_cfg = json.load(f)

        client_id = ds_cfg.get('client_id')
        client_secret = ds_cfg.get('client_secret')
        token_url = ds_cfg.get('token_url') or ds_cfg.get('token_endpoint')
        ds_search_url = ds_cfg.get('dataspace_search_url') or 'https://dataspace.copernicus.eu/resto/api/collections/Sentinel2/search.json'

        if not client_id or not client_secret or not token_url:
            raise RuntimeError('Missing Dataspace OAuth2 credentials in config')

        # Get token
        token_resp = requests.post(
            token_url,
            data={
                'grant_type': 'client_credentials',
                'client_id': client_id,
                'client_secret': client_secret
            },
            timeout=30
        )
        token_resp.raise_for_status()
        token_json = token_resp.json()
        access_token = token_json.get('access_token') or token_json.get('accessToken')
        if not access_token:
            raise RuntimeError('No access_token returned from token endpoint')

        headers = {'Authorization': f'Bearer {access_token}'}

        # Build REST search params (Resto)
        minlon = config['aoi']['min_lon']
        minlat = config['aoi']['min_lat']
        maxlon = config['aoi']['max_lon']
        maxlat = config['aoi']['max_lat']

        params = {
            'startDate': start_date.strftime('%Y-%m-%d'),
            'endDate': end_date.strftime('%Y-%m-%d'),
            'box': f"{minlon},{minlat},{maxlon},{maxlat}",
            'maxRecords': ds_cfg.get('search_options', {}).get('max_products', 100),
            'cloudCover': ','.join(map(str, ds_cfg.get('download_options', {}).get('cloudcoverpercentage', [0,20])))
        }

        resp = requests.get(ds_search_url, params=params, headers=headers, timeout=60)
        resp.raise_for_status()
        data = resp.json()

        # Resto returns 'features' (GeoJSON) or 'entries' - parse defensively
        entries = data.get('features') or data.get('entries') or []
        rows = []
        for e in entries:
            # support both GeoJSON Feature and Resto entry formats
            if 'properties' in e and isinstance(e['properties'], dict):
                props = e['properties']
            else:
                props = e if isinstance(e, dict) else {}

            title = props.get('title') or props.get('id') or props.get('identifier')
            begin = props.get('beginposition') or props.get('beginPosition') or props.get('date') or props.get('begin')
            cloud = props.get('cloudCover') or props.get('cloudcoverpercentage') or props.get('cloudcoverage')
            size = props.get('size') or props.get('granuleSize')
            uuid = props.get('id') or props.get('uuid') or props.get('identifier')
            footprint_prop = props.get('footprint') or props.get('geometry') or props.get('footprint_geojson')

            rows.append({
                'title': title,
                'beginposition': begin,
                'cloudcoverpercentage': cloud,
                'size': size,
                'uuid': uuid,
                'footprint': footprint_prop
            })

        products_df = pd.DataFrame(rows)
        if not products_df.empty and 'cloudcoverpercentage' in products_df:
            # attempt to coerce cloud to numeric then sort
            products_df['cloudcoverpercentage'] = pd.to_numeric(products_df['cloudcoverpercentage'], errors='coerce')
            products_df = products_df.sort_values('cloudcoverpercentage')

        logger.info(f'Dataspace REST returned {len(products_df)} entries')

    except Exception as ds_err:
        logger.error(f"Dataspace REST API failed: {ds_err}")
        # Final fallback: populate demo placeholder to let downstream steps run
        products_df = pd.DataFrame()

# Present results
if len(products_df) > 0:
    print(f"\n[OK] Found {len(products_df)} suitable products")
    display(products_df[[col for col in products_df.columns if col in ['title', 'beginposition', 'cloudcoverpercentage', 'size']]].head())
else:
    print("[ERROR] No suitable products found or search failed. Using demo mode is available.")

  end_date = datetime.utcnow()


üîç Searching for Sentinel-2 products...
Date range: 2025-05-14 to 2025-11-10
2025-11-11 00:13:43,615 [ERROR] Dataspace REST API failed: 404 Client Error: Not Found for url: https://dataspace.copernicus.eu/resto/api/collections/Sentinel2/search.json?startDate=2025-05-14&endDate=2025-11-10&box=72.35%2C10.75%2C72.65%2C10.95&maxRecords=5&cloudCover=0%2C20
‚ùå No suitable products found or search failed. Using demo mode is available.
2025-11-11 00:13:43,615 [ERROR] Dataspace REST API failed: 404 Client Error: Not Found for url: https://dataspace.copernicus.eu/resto/api/collections/Sentinel2/search.json?startDate=2025-05-14&endDate=2025-11-10&box=72.35%2C10.75%2C72.65%2C10.95&maxRecords=5&cloudCover=0%2C20
‚ùå No suitable products found or search failed. Using demo mode is available.


In [None]:
# Save search results to logs/sentinel_search_results.json
from pathlib import Path
import json
import pandas as pd

out_path = project_root / 'logs' / 'sentinel_search_results.json'
out_path.parent.mkdir(parents=True, exist_ok=True)

try:
    if 'products_df' in globals() and hasattr(products_df, 'to_dict'):
        if products_df.empty:
            results = []
        else:
            cols = [c for c in ['title','beginposition','cloudcoverpercentage','size','uuid'] if c in products_df.columns]
            df = products_df[cols].copy()
            # Ensure JSON-serializable
            if 'beginposition' in df.columns:
                df['beginposition'] = df['beginposition'].astype(str)
            if 'cloudcoverpercentage' in df.columns:
                df['cloudcoverpercentage'] = pd.to_numeric(df['cloudcoverpercentage'], errors='coerce')
            results = df.to_dict(orient='records')
    else:
        results = []
except Exception as _e:
    results = []

out = {'count': len(results), 'results': results}
with open(out_path, 'w', encoding='utf-8') as fh:
    json.dump(out, fh, default=str, indent=2)

print(f"Saved search results to: {out_path}")
print(f"Matching products: {out['count']}")
if out['count'] > 0:
    for r in out['results'][:10]:
        print(f" - {r.get('title')} | date: {r.get('beginposition')} | cloud: {r.get('cloudcoverpercentage')}")

Saved search results to: d:\Project\sdb_project\logs\sentinel_search_results.json
Matching products: 0


## 4. Download Selected Product

Download the best available Sentinel-2 product (lowest cloud coverage) and organize the data.

In [None]:
if len(products_df) > 0:
    # Select best product
    best_product = products_df.iloc[0]
    
    # Create download directory
    region_slug = config['region_name'].lower().replace(' ', '_')
    download_dir = data_dir / region_slug / 'raw'
    download_dir.mkdir(parents=True, exist_ok=True)
    
    print(f"[DOWNLOAD] Downloading: {best_product['title']}")
    print(f"Cloud coverage: {best_product['cloudcoverpercentage']:.1f}%")
    print(f"Date: {best_product['beginposition'].strftime('%Y-%m-%d')}")
    print(f"Size: {best_product['size']}")
    print(f"Download directory: {download_dir}")
    
    # Download product
    api.download(best_product['uuid'], directory_path=str(download_dir))
    
    print("\n[OK] Download completed successfully")
    
    # Save metadata
    metadata = {
        'product_id': best_product['title'],
        'download_date': datetime.now().isoformat(),
        'cloud_coverage': float(best_product['cloudcoverpercentage']),
        'acquisition_date': best_product['beginposition'].isoformat(),
        'footprint': best_product['footprint'],
        'processing_level': best_product['processinglevel']
    }
    
    metadata_file = download_dir / 'metadata.json'
    with open(metadata_file, 'w') as f:
        json.dump(metadata, f, indent=2)
        
    print(f"[OK] Metadata saved to {metadata_file}")
else:
    print("[ERROR] No products to download")

## 5. Verify Downloaded Data

Check the downloaded data and display the directory structure.

In [None]:
def print_directory_tree(path, prefix=''):
    """Print a directory tree structure"""
    if not path.exists():
        print("Directory not found!")
        return
        
    contents = sorted(path.glob('*'))
    pointers = ['‚îú‚îÄ‚îÄ ' if i < len(contents) - 1 else '‚îî‚îÄ‚îÄ ' for i in range(len(contents))]
    
    for pointer, path in zip(pointers, contents):
        print(prefix + pointer + path.name)
        if path.is_dir():
            extension = '‚îÇ   ' if pointer == '‚îú‚îÄ‚îÄ ' else '    '
            print_directory_tree(path, prefix + extension)

# Print the data directory structure
region_slug = config['region_name'].lower().replace(' ', '_')
data_path = data_dir / region_slug
print(f"Data directory structure for {config['region_name']}:")
print_directory_tree(data_path)

# Display metadata if available
metadata_file = data_path / 'raw' / 'metadata.json'
if metadata_file.exists():
    with open(metadata_file) as f:
        metadata = json.load(f)
    print("\nProduct Metadata:")
    for key, value in metadata.items():
        print(f"{key}: {value}")