In [None]:
import xarray as xr
import pandas as pd
import numpy as np

# --- Process daily data to create weekly aggregations ---
print("Loading daily ERA5 data for weekly aggregation...")

# Load daily data
ds_daily = xr.open_dataset('era5_daily_data_2024_2025.nc')

# Convert to DataFrame
daily_df = ds_daily.to_dataframe().reset_index()
daily_df['date'] = pd.to_datetime(daily_df['valid_time'])

# Create weekly aggregations
daily_df['week'] = daily_df['date'].dt.to_period('W').apply(lambda r: r.start_time)

# Group by lat, lon, week and aggregate
weekly_df = daily_df.groupby(['latitude', 'longitude', 'week']).agg({
    't2m': 'mean',           # Average temperature
    'tp': 'sum',             # Total precipitation 
    'swvl1': 'mean',         # Average soil moisture
    'u10': 'mean',           # Average wind components
    'v10': 'mean',
    'ssrd': 'mean'           # Average solar radiation
}).reset_index()

# Rename week column to date for consistency
weekly_df = weekly_df.rename(columns={'week': 'date'})

print(f"Created weekly dataset with {len(weekly_df)} records")
print(f"Date range: {weekly_df['date'].min()} to {weekly_df['date'].max()}")
print(f"Variables: {[col for col in weekly_df.columns if col not in ['latitude', 'longitude', 'date']]}")

# Save weekly data
weekly_df.to_parquet('era5_weekly_data_2024_2025.parquet')
print("Saved weekly data to era5_weekly_data_2024_2025.parquet")

In [None]:
# --- Also process monthly data for historical analysis ---
print("Loading monthly ERA5 data...")

# Load monthly data (2009-2025)
ds_monthly = xr.open_dataset('era5_monthly_data_09_25.nc')
monthly_df = ds_monthly.to_dataframe().reset_index()
monthly_df['date'] = pd.to_datetime(monthly_df['valid_time'])

print(f"Monthly dataset: {len(monthly_df)} records")
print(f"Date range: {monthly_df['date'].min()} to {monthly_df['date'].max()}")

# Save monthly data
monthly_df.to_parquet('era5_monthly_data_2009_2025.parquet')
print("Saved monthly data to era5_monthly_data_2009_2025.parquet")

In [None]:
# --- Create combined dataset with both weekly and monthly data ---
print("Creating combined weekly/monthly dataset...")

# Add frequency indicator
weekly_df_tagged = weekly_df.copy()
weekly_df_tagged['frequency'] = 'weekly'
weekly_df_tagged['period'] = weekly_df_tagged['date'].dt.strftime('%Y-W%U')  # Year-Week format

monthly_df_tagged = monthly_df.copy() 
monthly_df_tagged['frequency'] = 'monthly'
monthly_df_tagged['period'] = monthly_df_tagged['date'].dt.strftime('%Y-%m')  # Year-Month format

# Ensure same column structure
common_cols = ['latitude', 'longitude', 'date', 't2m', 'tp', 'swvl1', 'u10', 'v10', 'ssrd', 'frequency', 'period']

weekly_subset = weekly_df_tagged[common_cols]
monthly_subset = monthly_df_tagged[common_cols]

# Combine datasets
combined_df = pd.concat([weekly_subset, monthly_subset], ignore_index=True)
combined_df = combined_df.sort_values(['latitude', 'longitude', 'date'])

print(f"Combined dataset: {len(combined_df)} records")
print(f"Weekly records: {len(combined_df[combined_df['frequency'] == 'weekly'])}")
print(f"Monthly records: {len(combined_df[combined_df['frequency'] == 'monthly'])}")

# Save combined data
combined_df.to_parquet('era5_combined_weekly_monthly.parquet')
print("Saved combined dataset to era5_combined_weekly_monthly.parquet")

# Display sample
print("\nSample of combined data:")
print(combined_df.head(10))

# ERA5 Climate Data - Weekly Update

This notebook downloads and processes ERA5 climate data with weekly temporal resolution:

## Data Sources
1. **Monthly Data (2009-2025)**: Historical monthly averages for long-term trends
2. **Daily Data (2024-2025)**: Recent daily data aggregated to weekly for detailed analysis

## Output Files
- `era5_weekly_data_2024_2025.parquet`: Weekly aggregated climate data
- `era5_monthly_data_2009_2025.parquet`: Monthly climate data for historical context
- `era5_combined_weekly_monthly.parquet`: Combined dataset with frequency indicators

## Variables
- **t2m**: 2-meter temperature (Kelvin)
- **tp**: Total precipitation (meters)
- **swvl1**: Volumetric soil water layer 1 (m³/m³)
- **u10**: 10-meter U wind component (m/s)
- **v10**: 10-meter V wind component (m/s)
- **ssrd**: Surface solar radiation downwards (J/m²)

## Next Steps
1. Spatial aggregation by ADM3 administrative regions
2. Time series analysis with weekly resolution
3. Climate anomaly detection and trends

In [1]:
import cdsapi

# --- Download ERA5 weekly climate data ---
c = cdsapi.Client()

# Download monthly data first (ERA5 doesn't have direct weekly aggregation)
c.retrieve(
    'reanalysis-era5-single-levels-monthly-means',
    {
        'product_type': 'monthly_averaged_reanalysis',
        'variable': [
            '2m_temperature', 'total_precipitation', 'volumetric_soil_water_layer_1',
            '10m_u_component_of_wind', '10m_v_component_of_wind',
            'surface_solar_radiation_downwards'
        ],
        'year': [str(year) for year in range(2009, 2025)],  # Updated to include 2024
        'month': ['%02d' % m for m in range(1, 13)],
        'time': '00:00',
        'area': [38, 68, 6, 97],  # India bounding box
        'format': 'netcdf'
    },
    'era5_monthly_data_09_25.nc'
)

# For weekly data, we'll also download daily data and aggregate to weekly
c.retrieve(
    'reanalysis-era5-single-levels',
    {
        'product_type': 'reanalysis',
        'variable': [
            '2m_temperature', 'total_precipitation', 'volumetric_soil_water_layer_1',
            '10m_u_component_of_wind', '10m_v_component_of_wind',
            'surface_solar_radiation_downwards'
        ],
        'year': ['2024', '2025'],  # Recent years for weekly analysis
        'month': ['01', '02', '03', '04', '05', '06'],  # First half of year
        'day': ['%02d' % d for d in range(1, 32)],
        'time': '12:00',  # Noon for daily representative value
        'area': [38, 68, 6, 97],
        'format': 'netcdf'
    },
    'era5_daily_data_2024_2025.nc'
)

Exception: Missing/incomplete configuration file: /home/denis/.cdsapirc