In [3]:
import datetime as dt
import sys
import os
import eranest


In [5]:

# - eranest.download_surface_data() for surface variables
# - eranest.download_atmospheric_data() for pressure level data
# - eranest.process_netcdf_dataset() for spatial filtering
# - eranest.filter_by_geometry() for geometric operations
# - eranest.validate_coordinates() for coordinate validation
#

In [6]:
# Example 1: Download surface data with bounding box
print("🌡️ Example 1: Surface data download with bounding box")

try:
    # Step 1: Download surface data
    print("\n📥 Downloading surface data...")
    surface_file = eranest.download_surface_data(
        request_id="demo_surface",
        variables=["2m_temperature", "total_precipitation", "surface_pressure", "2m_dewpoint_temperature"],
        start_date=dt.datetime(2024, 1, 1),
        end_date=dt.datetime(2024, 1, 2),
        north=30, south=20, east=80, west=70,  # India region
        frequency=eranest.DataFrequency.DAILY,
        resolution=0.25
    )
    print(f"✅ Surface data downloaded to: {surface_file}")
    
    # Step 2: Process the data
    print("\n🔄 Processing downloaded data...")
    result = eranest.process_netcdf_dataset(
        surface_file,
        variables=["2m_temperature", "total_precipitation", "surface_pressure", "2m_dewpoint_temperature"]
    )
    
    print(f"✅ Processing completed!")
    print(f"📊 Data shape: {result.processed_data.shape}")
    print(f"⏱️  Processing time: {result.processing_time:.2f} seconds")
    print(f"💾 Memory usage: {result.memory_usage_mb:.1f} MB")
    print(f"📈 Records processed: {result.records_processed}")
    print(f"📍 Columns: {list(result.processed_data.columns)}")
    
    df = result.processed_data
    
except Exception as e:
    print(f"❌ Error: {e}")
    print("💡 Check CDS API credentials and internet connection")
    df = None

2025-06-19 19:23:59,636 - eranest.download.era5 - INFO - Starting download for request: demo_surface
2025-06-19 19:23:59,637 - eranest.download.era5 - INFO - Download attempt 1/4


🌡️ Example 1: Surface data download with bounding box

📥 Downloading surface data...
✓ CDS API configuration is already set up and valid.


2025-06-19 19:24:00,581 INFO [2025-06-16T00:00:00] CC-BY licence to replace Licence to use Copernicus Products on 02 July 2025. More information available [here](https://forum.ecmwf.int/t/cc-by-licence-to-replace-licence-to-use-copernicus-products-on-02-july-2025/13464)
2025-06-19 19:24:00,583 INFO [2025-06-10T00:00:00] To improve our C3S service, we need to hear from you! Please complete this very short [survey](https://confluence.ecmwf.int/x/E7uBEQ/). Thank you.
2025-06-19 19:24:00,584 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-06-19 19:24:01,439 INFO Request ID is 5e41e594-78a5-478e-804a-c7229d4a0c3a
2025-06-19 19:24:01,675 INFO status has been updated to accepted
2025-06-19 19:24:10,746 INFO status has been updated to running
2025-06-19 19:24:23,819 INFO status has been updated to successful


39858fd458618226d17ea85bc26ea222.zip:   0%|          | 0.00/465k [00:00<?, ?B/s]

2025-06-19 19:24:26,484 - eranest.download.era5 - INFO - Download completed successfully: ./demo_surface_surface.nc
2025-06-19 19:24:26,486 - eranest.processing.data - INFO - Processing 1 NetCDF files...
2025-06-19 19:24:26,520 - eranest.processing.data - ERROR - Error processing ./demo_surface_surface.nc: Failed to load NetCDF dataset demo_surface_surface.nc: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html
2025-06-19 19:24:26,526 - eranest.processing.data - INFO - Processing completed in 0.04s
2025-06-19 19:24:26,527 - eranest.processing.data - INFO - Memory usage: 0.4 MB
2025-06-19 19:24:26,527 - eranest.processing.data - INFO - Records: 0 → 0 (filtered)


✅ Surface data downloaded to: ./demo_surface_surface.nc

🔄 Processing downloaded data...
✅ Processing completed!
📊 Data shape: (0, 0)
⏱️  Processing time: 0.04 seconds
💾 Memory usage: 0.4 MB
📈 Records processed: 0
📍 Columns: []


In [7]:
# Example 2: Download and filter with GeoJSON geometry
print("🗺️ Example 2: Surface data with GeoJSON spatial filtering")

try:
    # Create a simple GeoJSON geometry for demonstration
    demo_geojson = {
        "type": "FeatureCollection",
        "features": [{
            "type": "Feature",
            "properties": {"name": "Demo Region"},
            "geometry": {
                "type": "Polygon",
                "coordinates": [[[75, 22], [78, 22], [78, 25], [75, 25], [75, 22]]]
            }
        }]
    }
    
    # Save demo GeoJSON
    import json
    demo_geojson_path = "demo_region.geojson"
    with open(demo_geojson_path, 'w') as f:
        json.dump(demo_geojson, f, indent=2)
    print(f"📁 Created demo GeoJSON: {demo_geojson_path}")
    
    # Validate the geometry
    is_valid = eranest.validate_geojson(demo_geojson)
    print(f"✅ GeoJSON validation: {'Valid' if is_valid else 'Invalid'}")
    
    # Extract bounding box
    bbox = eranest.extract_bounding_box(demo_geojson)
    print(f"📐 Bounding box: {bbox}")
    
    # Download surface data
    print("\n📥 Downloading surface data...")
    surface_file = eranest.download_surface_data(
        request_id="demo_geojson",
        variables=["2m_temperature", "total_precipitation"],
        start_date=dt.datetime(2024, 1, 1),
        end_date=dt.datetime(2024, 1, 2),
        north=bbox[3], south=bbox[1], east=bbox[2], west=bbox[0],  # Use bounding box
        frequency=eranest.DataFrequency.HOURLY,
        resolution=0.25
    )
    print(f"✅ Surface data downloaded to: {surface_file}")
    
    # Process with spatial filtering
    print("\n🔄 Processing with GeoJSON spatial filtering...")
    result = eranest.process_netcdf_dataset(
        surface_file,
        geometry=demo_geojson_path,
        variables=["2m_temperature", "total_precipitation"]
    )
    
    print(f"✅ Spatial filtering completed!")
    print(f"📊 Filtered data shape: {result.processed_data.shape}")
    print(f"⏱️  Processing time: {result.processing_time:.2f} seconds")
    print(f"💾 Memory usage: {result.memory_usage_mb:.1f} MB")
    print(f"📈 Records: {result.records_processed} → {result.records_filtered}")
    print(f"🎯 Spatial filtering efficiency: {result.records_filtered/result.records_processed*100:.1f}%")
    
    df_filtered = result.processed_data
    
except Exception as e:
    print(f"❌ Error: {e}")
    df_filtered = None

2025-06-19 19:24:26,664 - eranest.download.era5 - INFO - Starting download for request: demo_geojson
2025-06-19 19:24:26,665 - eranest.download.era5 - INFO - Download attempt 1/4


🗺️ Example 2: Surface data with GeoJSON spatial filtering
📁 Created demo GeoJSON: demo_region.geojson
✅ GeoJSON validation: Valid
📐 Bounding box: (75.0, 22.0, 78.0, 25.0)

📥 Downloading surface data...
✓ CDS API configuration is already set up and valid.


2025-06-19 19:24:27,238 INFO [2025-06-16T00:00:00] CC-BY licence to replace Licence to use Copernicus Products on 02 July 2025. More information available [here](https://forum.ecmwf.int/t/cc-by-licence-to-replace-licence-to-use-copernicus-products-on-02-july-2025/13464)
2025-06-19 19:24:27,239 INFO [2025-06-10T00:00:00] To improve our C3S service, we need to hear from you! Please complete this very short [survey](https://confluence.ecmwf.int/x/E7uBEQ/). Thank you.
2025-06-19 19:24:27,239 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-06-19 19:24:28,157 INFO Request ID is debd81f1-64d9-4ccb-a6b1-77886841149d
2025-06-19 19:24:28,536 INFO status has been updated to accepted
2025-06-19 19:24:37,689 INFO status has been updated to running
2025-06-19 19:24:43,044 INFO status has been updated to successful


4ce0d05416d6943b4179f204b9b66bf4.zip:   0%|          | 0.00/70.2k [00:00<?, ?B/s]

2025-06-19 19:24:45,171 - eranest.download.era5 - INFO - Download completed successfully: ./demo_geojson_surface.nc
2025-06-19 19:24:45,173 - eranest.processing.data - INFO - Processing 1 NetCDF files...
2025-06-19 19:24:45,175 - eranest.processing.data - ERROR - Error processing ./demo_geojson_surface.nc: Failed to load NetCDF dataset demo_geojson_surface.nc: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html
2025-06-19 19:24:45,176 - eranest.processing.data - INFO - Processing completed in 0.00s
2025-06-19 19:24:45,177 - eranest.processing.data - INFO - Memory usage: 0.0 MB
2025-06-19 19:24:45,177 - eranest.processing.data - INFO - Records: 0 → 0 (filtered)


✅ Surface data downloaded to: ./demo_geojson_surface.nc

🔄 Processing with GeoJSON spatial filtering...
✅ Spatial filtering completed!
📊 Filtered data shape: (0, 0)
⏱️  Processing time: 0.00 seconds
💾 Memory usage: 0.0 MB
📈 Records: 0 → 0
❌ Error: division by zero


In [8]:
# Example 3: Atmospheric data download
print("🌪️ Example 3: Atmospheric pressure-level data")

try:
    # Validate coordinates first
    is_valid = eranest.validate_coordinates(25.0, 75.0)  # Center point
    print(f"🗺️  Coordinates validation: {'✅ Valid' if is_valid else '❌ Invalid'}")
    
    # Download atmospheric data
    print("\n📥 Downloading atmospheric data...")
    atmospheric_file = eranest.download_atmospheric_data(
        request_id="demo_atmospheric",
        variables=["temperature", "u_component_of_wind", "v_component_of_wind"],
        start_date=dt.datetime(2024, 1, 1),
        end_date=dt.datetime(2024, 1, 2),
        pressure_levels=["850", "500", "250"],  # Key pressure levels
        north=30, south=20, east=80, west=70,
        frequency=eranest.DataFrequency.DAILY,
        resolution=0.5
    )
    print(f"✅ Atmospheric data downloaded to: {atmospheric_file}")
    
    # Process the atmospheric data
    print("\n🔄 Processing atmospheric data...")
    result = eranest.process_netcdf_dataset(
        atmospheric_file,
        variables=["temperature", "u_component_of_wind", "v_component_of_wind"]
    )
    
    print(f"✅ Atmospheric processing completed!")
    print(f"📊 Data shape: {result.processed_data.shape}")
    print(f"⏱️  Processing time: {result.processing_time:.2f} seconds")
    print(f"💾 Memory usage: {result.memory_usage_mb:.1f} MB")
    print(f"📈 Records processed: {result.records_processed}")
    
    # Show pressure level distribution
    if 'level' in result.processed_data.columns:
        levels = sorted(result.processed_data['level'].unique())
        print(f"🌀 Pressure levels: {levels} hPa")
    
    df_atmospheric = result.processed_data
    
except Exception as e:
    print(f"❌ Error: {e}")
    df_atmospheric = None

2025-06-19 19:24:45,190 - eranest.download.era5 - INFO - Starting download for request: demo_atmospheric
2025-06-19 19:24:45,191 - eranest.download.era5 - INFO - Download attempt 1/4


🌪️ Example 3: Atmospheric pressure-level data
🗺️  Coordinates validation: ✅ Valid

📥 Downloading atmospheric data...
✓ CDS API configuration is already set up and valid.


2025-06-19 19:24:45,765 INFO [2025-06-16T00:00:00] CC-BY licence to replace Licence to use Copernicus Products on 02 July 2025. More information available [here](https://forum.ecmwf.int/t/cc-by-licence-to-replace-licence-to-use-copernicus-products-on-02-july-2025/13464)
2025-06-19 19:24:45,766 INFO [2025-06-10T00:00:00] To improve our C3S service, we need to hear from you! Please complete this very short [survey](https://confluence.ecmwf.int/x/E7uBEQ/). Thank you.
2025-06-19 19:24:45,767 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
2025-06-19 19:24:46,479 INFO Request ID is 63d32226-150f-44c6-bee0-c582c44292de
2025-06-19 19:24:46,687 INFO status has been updated to accepted
2025-06-19 19:24:55,852 INFO status has been updated to running
2025-06-19 19:25:37,854 INFO status has been updated to successful


a6d98482c74ba3366179ec79939e12d4.nc:   0%|          | 0.00/438k [00:00<?, ?B/s]

2025-06-19 19:25:40,254 - eranest.download.era5 - INFO - Download completed successfully: ./demo_atmospheric_atmospheric.nc
2025-06-19 19:25:40,256 - eranest.processing.data - INFO - Processing 1 NetCDF files...


✅ Atmospheric data downloaded to: ./demo_atmospheric_atmospheric.nc

🔄 Processing atmospheric data...


2025-06-19 19:25:42,248 - eranest.processing.data - INFO - Processing completed in 1.99s
2025-06-19 19:25:42,248 - eranest.processing.data - INFO - Memory usage: 8.1 MB
2025-06-19 19:25:42,248 - eranest.processing.data - INFO - Records: 0 → 0 (filtered)


✅ Atmospheric processing completed!
📊 Data shape: (0, 0)
⏱️  Processing time: 1.99 seconds
💾 Memory usage: 8.1 MB
📈 Records processed: 0


In [9]:
# Example 4: Performance monitoring and data aggregation
print("📈 Example 4: Advanced processing with performance monitoring")

try:
    # Download data with performance monitoring
    print("\n📥 Downloading data with parallel processing...")
    surface_file = eranest.download_surface_data(
        request_id="demo_performance",
        variables=["2m_temperature", "total_precipitation"],
        start_date=dt.datetime(2024, 1, 1),
        end_date=dt.datetime(2024, 1, 3),  # 3 days
        north=25, south=20, east=80, west=75,
        frequency=eranest.DataFrequency.HOURLY,
        resolution=0.25,
        parallel_downloads=True,
        max_workers=4
    )
    print(f"✅ Data downloaded: {surface_file}")
    
    # Process with chunked processing for memory efficiency
    print("\n🔄 Processing with memory optimization...")
    from eranest.processing import DataProcessor
    
    processor = DataProcessor(
        chunk_size=1000,
        max_memory_mb=512,
        parallel_processing=True,
        use_dask=False  # Optional: set to True if Dask is available
    )
    
    result = processor.process_netcdf_files(
        [surface_file],
        variables=["2m_temperature", "total_precipitation"]
    )
    
    print(f"✅ Advanced processing completed!")
    print(f"📊 Data shape: {result.processed_data.shape}")
    print(f"⏱️  Processing time: {result.processing_time:.2f} seconds")
    print(f"💾 Memory usage: {result.memory_usage_mb:.1f} MB")
    print(f"📈 Records: {result.records_processed} → {result.records_filtered}")
    
    # Temporal aggregation
    print("\n📅 Performing temporal aggregation...")
    daily_avg = eranest.aggregate_temporal_data(
        result.processed_data,
        frequency=eranest.DataFrequency.DAILY,
        method="mean"
    )
    
    print(f"✅ Temporal aggregation completed!")
    print(f"📊 Daily averages shape: {daily_avg.shape}")
    print(f"📅 Date range: {daily_avg['time'].min()} to {daily_avg['time'].max()}")
    
    df_performance = result.processed_data
    df_daily = daily_avg
    
except Exception as e:
    print(f"❌ Error: {e}")
    df_performance = None
    df_daily = None

2025-06-19 19:25:42,255 - eranest.download.era5 - INFO - Starting download for request: demo_performance
2025-06-19 19:25:42,255 - eranest.download.era5 - INFO - Download attempt 1/4


📈 Example 4: Advanced processing with performance monitoring

📥 Downloading data with parallel processing...
✓ CDS API configuration is already set up and valid.


2025-06-19 19:25:42,819 INFO [2025-06-16T00:00:00] CC-BY licence to replace Licence to use Copernicus Products on 02 July 2025. More information available [here](https://forum.ecmwf.int/t/cc-by-licence-to-replace-licence-to-use-copernicus-products-on-02-july-2025/13464)
2025-06-19 19:25:42,820 INFO [2025-06-10T00:00:00] To improve our C3S service, we need to hear from you! Please complete this very short [survey](https://confluence.ecmwf.int/x/E7uBEQ/). Thank you.
2025-06-19 19:25:42,821 INFO [2024-09-26T00:00:00] Watch our [Forum](https://forum.ecmwf.int/) for Announcements, news and other discussed topics.
Recovering from HTTP error [500 Internal Server Error], attempt 1 of 500
Retrying in 120 seconds
2025-06-19 19:27:44,205 INFO Request ID is 52a18c98-523d-4a1e-903d-b9bbfea55c8a
2025-06-19 19:27:44,443 INFO status has been updated to accepted
2025-06-19 19:27:58,825 INFO status has been updated to running
2025-06-19 19:28:06,625 INFO status has been updated to successful


dc230b2cf5d0e8c1e6356363baa39781.zip:   0%|          | 0.00/123k [00:00<?, ?B/s]

2025-06-19 19:28:08,733 - eranest.download.era5 - INFO - Download completed successfully: ./demo_performance_surface.nc
2025-06-19 19:28:08,734 - eranest.processing.data - INFO - Processing 1 NetCDF files...
2025-06-19 19:28:08,737 - eranest.processing.data - ERROR - Error processing ./demo_performance_surface.nc: Failed to load NetCDF dataset demo_performance_surface.nc: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'scipy']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html
2025-06-19 19:28:08,739 - eranest.processing.data - INFO - Processing completed in 0.00s
2025-06-19 19:28:08,739 - eranest.processing.data - INFO - Memory usage: 0.0 MB
2025-06-19 19:28:08,740 - eranest.processing.data - INFO - Records: 0 → 0 (filtered)


✅ Data downloaded: ./demo_performance_surface.nc

🔄 Processing with memory optimization...
✅ Advanced processing completed!
📊 Data shape: (0, 0)
⏱️  Processing time: 0.00 seconds
💾 Memory usage: 0.0 MB
📈 Records: 0 → 0

📅 Performing temporal aggregation...
❌ Error: Time column 'time' not found in DataFrame


In [10]:
# Example 5: Constants and configurations showcase
print("🌟 Example 5: Available constants and advanced features")

try:
    print("\n📊 Available constants and configurations:")
    print(f"🌀 Aurora pressure levels: {eranest.AURORA_PRESSURE_LEVELS}")
    print(f"🌡️  Default surface variables: {eranest.DEFAULT_SURFACE_VARIABLES}")
    print(f"🌪️  Default atmospheric variables: {eranest.DEFAULT_ATMOSPHERIC_VARIABLES}")
    print(f"🗺️  Default static variables: {eranest.DEFAULT_STATIC_VARIABLES}")
    print(f"📅 Available data frequencies: {[freq.value for freq in eranest.DataFrequency]}")
    
    print(f"\n🔧 Available utility functions:")
    print(f"   • validate_coordinates() - Coordinate validation")
    print(f"   • validate_geojson() - GeoJSON validation")
    print(f"   • extract_bounding_box() - Bounding box extraction")
    print(f"   • load_json_file() / save_json_file() - JSON file operations")
    print(f"   • validate_date_range() - Date range validation")
    
    print(f"\n🚀 Performance features:")
    print(f"   • Parallel downloads with configurable workers")
    print(f"   • Memory-optimized chunked processing")
    print(f"   • Spatial indexing for geometric operations")
    print(f"   • Real-time performance monitoring")
    print(f"   • Automatic memory management")
    print(f"   • Progress tracking for long operations")
    
    print(f"\n🤖 Aurora integration:")
    print(f"   • create_aurora_batch() - ERA5 to Aurora conversion")
    print(f"   • AuroraConverter class for advanced conversions")
    print(f"   • Automatic variable name mapping")
    print(f"   • Tensor optimization for PyTorch")
    
    # Demonstrate coordinate validation
    print(f"\n📍 Coordinate validation examples:")
    test_coords = [
        (25.0, 75.0),   # Valid
        (0.0, 0.0),     # Valid (equator/prime meridian)
        (90.0, 180.0),  # Valid (pole)
        (95.0, 75.0),   # Invalid latitude
        (25.0, 190.0)   # Invalid longitude
    ]
    
    for lat, lon in test_coords:
        try:
            is_valid = eranest.validate_coordinates(lat, lon, strict=False)
            status = "✅ Valid" if is_valid else "❌ Invalid"
            print(f"   ({lat:6.1f}, {lon:7.1f}): {status}")
        except Exception as e:
            print(f"   ({lat:6.1f}, {lon:7.1f}): ❌ Error - {e}")
    
    print(f"\n✅ eranest modern API showcase completed!")
    
except Exception as e:
    print(f"❌ Error: {e}")

print("\n📚 Next steps:")
print("   • Try eranest_aurora.ipynb for Microsoft Aurora integration")
print("   • Check the documentation for advanced usage patterns")
print("   • Use help(eranest.function_name) for detailed function docs")
print("   • Explore the modular API for custom workflows")

🌟 Example 5: Available constants and advanced features

📊 Available constants and configurations:
🌀 Aurora pressure levels: ['50', '100', '150', '200', '250', '300', '400', '500', '600', '700', '850', '925', '1000']
🌡️  Default surface variables: ['2m_temperature', '10m_u_component_of_wind', '10m_v_component_of_wind', 'mean_sea_level_pressure']
🌪️  Default atmospheric variables: ['temperature', 'u_component_of_wind', 'v_component_of_wind', 'specific_humidity', 'geopotential']
🗺️  Default static variables: ['geopotential', 'land_sea_mask', 'soil_type']
📅 Available data frequencies: ['hourly', 'daily', 'weekly', 'monthly', 'yearly']

🔧 Available utility functions:
   • validate_coordinates() - Coordinate validation
   • validate_geojson() - GeoJSON validation
   • extract_bounding_box() - Bounding box extraction
   • load_json_file() / save_json_file() - JSON file operations
   • validate_date_range() - Date range validation

🚀 Performance features:
   • Parallel downloads with configura