# Satellite Data Analysis

Analyze and process real satellite imagery:
- Access Sentinel-1/2 data
- Process SAR imagery
- Compare satellite sources
- Quality control analysis

In [None]:
import os
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt
import requests

print("Imports successful!")
print("\n⚠️ Note: Sentinel credentials required for data download")
print("   Register at: https://scihub.copernicus.eu/dhus")

## 1. Sentinel Data Configuration

In [None]:
# Sentinel credentials (set in .env file)
SENTINEL_USERNAME = os.getenv('SENTINEL_USERNAME', 'your_username')
SENTINEL_PASSWORD = os.getenv('SENTINEL_PASSWORD', 'your_password')

# Arctic bounding box (example: Greenland area)
ARCTIC_BBOX = {
    'min_lon': -50,
    'min_lat': 70,
    'max_lon': -40,
    'max_lat': 75
}

print("Configuration:")
print(f"  Bounding Box: {ARCTIC_BBOX}")
print(f"  Username set: {SENTINEL_USERNAME != 'your_username'}")

if SENTINEL_USERNAME == 'your_username':
    print("\n⚠️ Please set SENTINEL_USERNAME and SENTINEL_PASSWORD in .env file")
    print("   Or update the variables above")

## 2. Connect to Sentinel Hub

In [None]:
def connect_to_sentinel():
    """Connect to Sentinel API"""
    try:
        api = SentinelAPI(
            SENTINEL_USERNAME,
            SENTINEL_PASSWORD,
            'https://scihub.copernicus.eu/dhus'
        )
        print("✅ Connected to Sentinel Hub")
        return api
    except Exception as e:
        print(f"❌ Connection failed: {e}")
        print("\nPlease check:")
        print("  1. Credentials are correct")
        print("  2. Account is activated")
        print("  3. Internet connection")
        return None

# Connect (only if credentials are set)
if SENTINEL_USERNAME != 'your_username':
    api = connect_to_sentinel()
else:
    api = None
    print("⚠️ Skipping connection (credentials not set)")

## 3. Search for Sentinel-1 SAR Data

In [None]:
def search_sentinel1_data(api, bbox, start_date, end_date, limit=10):
    """Search for Sentinel-1 SAR imagery"""
    if api is None:
        print("⚠️ API not connected. Set credentials first.")
        return None
    
    # Create WKT footprint
    footprint = f"POLYGON(({bbox['min_lon']} {bbox['min_lat']}, {bbox['max_lon']} {bbox['min_lat']}, {bbox['max_lon']} {bbox['max_lat']}, {bbox['min_lon']} {bbox['max_lat']}, {bbox['min_lon']} {bbox['min_lat']}))"
    
    print(f"Searching Sentinel-1 data...")
    print(f"  Date range: {start_date.date()} to {end_date.date()}")
    print(f"  Area: {bbox}")
    
    try:
        products = api.query(
            area=footprint,
            date=(start_date, end_date),
            platformname='Sentinel-1',
            producttype='GRD'
        )
        
        print(f"\n✅ Found {len(products)} Sentinel-1 products")
        
        # Display first few
        for i, (product_id, product) in enumerate(list(products.items())[:limit]):
            print(f"\n  Product {i+1}:")
            print(f"    Title: {product['title']}")
            print(f"    Date: {product['beginposition']}")
            print(f"    Size: {product['size']}")
        
        return products
    
    except Exception as e:
        print(f"❌ Search failed: {e}")
        return None

# Search for recent data
if api is not None:
    end_date = datetime.now()
    start_date = end_date - timedelta(days=7)
    
    products = search_sentinel1_data(api, ARCTIC_BBOX, start_date, end_date)
else:
    print("⚠️ Skipping search (API not connected)")

## 4. Data Source Comparison

In [None]:
# Compare different data sources
data_sources = {
    'Sentinel-1': {
        'type': 'SAR',
        'resolution': '10-20m',
        'revisit': '6 days',
        'cost': 'FREE',
        'url': 'https://scihub.copernicus.eu/dhus',
        'pros': ['All-weather', 'Day/night', 'Free', 'Arctic coverage'],
        'cons': ['Lower resolution', 'Complex processing']
    },
    'Sentinel-2': {
        'type': 'Optical',
        'resolution': '10m',
        'revisit': '5 days',
        'cost': 'FREE',
        'url': 'https://scihub.copernicus.eu/dhus',
        'pros': ['High resolution', 'True color', 'Free'],
        'cons': ['Cloud dependent', 'Daylight only']
    },
    'RADARSAT': {
        'type': 'SAR',
        'resolution': '3-100m',
        'revisit': 'Daily',
        'cost': 'PAID',
        'url': 'https://www.eodms-sgdot.nrcan-rncan.gc.ca/',
        'pros': ['Very high resolution', 'Daily coverage', 'Arctic focus'],
        'cons': ['Expensive', 'Requires subscription']
    },
    'NSIDC': {
        'type': 'Ice concentration',
        'resolution': '25km',
        'revisit': 'Daily',
        'cost': 'FREE',
        'url': 'https://nsidc.org/data/g02135',
        'pros': ['Ready-to-use', 'Daily updates', 'Free', 'Reliable'],
        'cons': ['Lower resolution', 'Derived product']
    }
}

# Display comparison table
print("="*80)
print("SATELLITE DATA SOURCE COMPARISON")
print("="*80)

for source, info in data_sources.items():
    print(f"\n{source}:")
    print(f"  Type: {info['type']}")
    print(f"  Resolution: {info['resolution']}")
    print(f"  Revisit: {info['revisit']}")
    print(f"  Cost: {info['cost']}")
    print(f"  URL: {info['url']}")
    print(f"  Pros: {', '.join(info['pros'])}")
    print(f"  Cons: {', '.join(info['cons'])}")

print("\n" + "="*80)

## 5. Download NSIDC Ice Concentration Data

In [None]:
def download_nsidc_sample():
    """Download sample NSIDC ice concentration data"""
    # Example: Recent daily ice concentration
    base_url = "https://noaadata.apps.nsidc.org/NOAA/G02202_V4/north/daily"
    
    # Use recent date
    date = datetime.now() - timedelta(days=2)
    date_str = date.strftime("%Y%m%d")
    year = date.year
    
    filename = f"seaice_conc_daily_nh_{date_str}_f17_v04r00.nc"
    url = f"{base_url}/{year}/{filename}"
    
    print(f"Attempting to download NSIDC data...")
    print(f"  Date: {date.date()}")
    print(f"  URL: {url}")
    
    try:
        response = requests.head(url, timeout=10)
        if response.status_code == 200:
            size_mb = int(response.headers.get('content-length', 0)) / (1024*1024)
            print(f"\n✅ File available!")
            print(f"   Size: {size_mb:.2f} MB")
            print(f"\nTo download:")
            print(f"  wget {url}")
            return url
        else:
            print(f"\n⚠️ File not available (status: {response.status_code})")
            print(f"   Try a different date or check NSIDC website")
            return None
    except Exception as e:
        print(f"\n❌ Error: {e}")
        return None

download_nsidc_sample()

## 6. Data Quality Checklist

In [None]:
quality_checklist = {
    'Data Selection': [
        '☐ Choose appropriate satellite source for use case',
        '☐ Verify temporal coverage',
        '☐ Check spatial resolution requirements',
        '☐ Confirm data availability for Arctic region'
    ],
    'Download': [
        '☐ Set up API credentials',
        '☐ Verify download links',
        '☐ Check file integrity (checksums)',
        '☐ Ensure sufficient storage space'
    ],
    'Preprocessing': [
        '☐ Apply atmospheric correction (optical)',
        '☐ Perform speckle filtering (SAR)',
        '☐ Geo-reference imagery',
        '☐ Normalize pixel values',
        '☐ Remove cloud-covered pixels'
    ],
    'Quality Control': [
        '☐ Check for artifacts',
        '☐ Verify geographic alignment',
        '☐ Inspect for missing data',
        '☐ Validate against ground truth',
        '☐ Compare with other sources'
    ],
    'Training Data': [
        '☐ Label data accurately',
        '☐ Balance class distribution',
        '☐ Create train/val/test splits',
        '☐ Apply data augmentation',
        '☐ Document data provenance'
    ]
}

print("="*60)
print("SATELLITE DATA QUALITY CHECKLIST")
print("="*60)

for category, items in quality_checklist.items():
    print(f"\n{category}:")
    for item in items:
        print(f"  {item}")

print("\n" + "="*60)
print("Use this checklist when working with real satellite data!")
print("="*60)

## 7. Useful Resources

In [None]:
resources = {
    'Data Sources': {
        'Sentinel Hub': 'https://scihub.copernicus.eu/dhus',
        'NSIDC Sea Ice': 'https://nsidc.org/data/g02135',
        'NOAA Ice Charts': 'https://usicecenter.gov/',
        'NASA Earthdata': 'https://earthdata.nasa.gov/',
        'ESA CCI': 'https://data.ceda.ac.uk/neodc/esacci/sea_ice'
    },
    'Tools': {
        'sentinelsat': 'https://github.com/sentinelsat/sentinelsat',
        'rasterio': 'https://rasterio.readthedocs.io/',
        'SNAP (ESA)': 'https://step.esa.int/main/download/snap-download/',
        'QGIS': 'https://qgis.org/'
    },
    'Tutorials': {
        'Sentinel-1 Processing': 'https://step.esa.int/main/doc/tutorials/',
        'SAR Basics': 'https://www.earthdata.nasa.gov/learn/backgrounders/what-is-sar',
        'Ice Charts Guide': 'https://nsidc.org/cryosphere/seaice/characteristics/'
    }
}

print("="*60)
print("USEFUL RESOURCES FOR SATELLITE DATA")
print("="*60)

for category, links in resources.items():
    print(f"\n{category}:")
    for name, url in links.items():
        print(f"  {name}:")
        print(f"    {url}")

print("\n" + "="*60)

## Summary

Satellite data analysis notebook complete!

**What You Learned**:
- How to access Sentinel-1/2 data
- Comparison of different satellite sources
- Data quality checklist
- Useful resources and tools

**Next Steps**:
1. Register for Sentinel Hub account
2. Download real satellite imagery
3. Process and label data
4. Retrain models with real data
5. Compare synthetic vs real data performance

**Pro Tip**: Start with NSIDC data (free, no registration) for quick testing!