# Monthly Data Workflow v2: Downloading and Verifying Multi-Variable Data

**Goal:** To download one full year (1995) of monthly-averaged data for our key climate variables and perform a sanity check to ensure the file is valid and readable. This workflow is based on the successful test download.

In [None]:
# Cell 1: Download Multi-Variable Data for 1995
import cdsapi
import os

# --- Configuration ---
output_dir = '../data/climate_monthly/'
os.makedirs(output_dir, exist_ok=True)
output_file = os.path.join(output_dir, 'era5_land_monthly_1995_multi-variable.grib')

# --- API Request ---
request_dictionary = {
    'product_type': 'monthly_averaged_reanalysis',
    'variable': [
        '2m_temperature', 'total_precipitation', 'volumetric_soil_water_layer_1',
        'surface_net_solar_radiation', 'potential_evaporation',
    ],
    'year': '1995',
    'month': [f'{m:02d}' for m in range(1, 13)],
    'time': '00:00',
    'format': 'grib',
    # The crucial key that ensures a clean, unarchived file
    'download_format': 'unarchived',
}

# --- Execute Download ---
try:
    if not os.path.exists(output_file):
        c = cdsapi.Client()
        print("Submitting API request for 1995 multi-variable monthly data...")
        c.retrieve(
            'reanalysis-era5-land-monthly-means',
            request_dictionary,
            output_file
        )
        print(f"\nDownload complete! File saved to: {output_file}")
        print(f"File size: {os.path.getsize(output_file) / 1e6:.2f} MB")
    else:
        print(f"File already exists, skipping download: {output_file}")
except Exception as e:
    print(f"\nAn error occurred: {e}")

## Sanity Check

Now we will attempt to open the GRIB file we just downloaded to confirm it is valid and contains all the variables we requested.

In [None]:
# Cell 2: Sanity Check the Downloaded File
import xarray as xr

# --- File Path ---
file_path = '../data/climate_monthly/era5_land_monthly_1995_multi-variable.grib'

# --- Attempt to Open the File ---
try:
    # We use the cfgrib engine, which we know works with the server's GRIB format.
    ds_climate = xr.open_dataset(file_path, engine='cfgrib')
    
    print("--- SUCCESS! ---")
    print("The multi-variable GRIB file was loaded correctly.")
    print("\nDataset structure:")
    print(ds_climate)
    
    print("\nVariables found in file:")
    for var in ds_climate.data_vars:
        print(f"- {var}")

except Exception as e:
    print("--- FAILED ---")
    print(f"An error occurred while trying to open the file: {e}")

## Analysis: Linking Yield to Climate (1995)

Now that we have successfully downloaded the multi-variable climate data for 1995, we will process it and link it to the 1995 maize yield data.

In [None]:
# Cell 3 (Corrected): Load Data for Analysis using correct GRIB short names

import xarray as xr
import numpy as np

# --- 1. Load 1995 Maize Yield Data ---
YIELD_PATH = '../data/maize/yield_1995.nc4'
ds_yield = xr.open_dataset(YIELD_PATH)
print("--- Yield Data for 1995 ---")
print(ds_yield)

# --- 2. Load 1995 Monthly Climate Data (Variable by Variable) ---
CLIMATE_PATH = '../data/climate_monthly/era5_land_monthly_1995_multi-variable.grib'

# Define the correct GRIB short names for the variables we want.
variables_to_load = {
    '2t': '2m_temperature',
    'tp': 'total_precipitation',
    'swvl1': 'volumetric_soil_water_layer_1',
    'ssr': 'surface_net_solar_radiation',
    'pev': 'potential_evaporation'
}

data_arrays = []
print("\n--- Loading Monthly Climate Data for 1995 (Variable by Variable) ---")

for short_name in variables_to_load.keys():
    try:
        # Open the SAME file multiple times, but filter for only ONE variable each time
        ds_single_var = xr.open_dataset(
            CLIMATE_PATH, 
            engine='cfgrib',
            backend_kwargs={'filter_by_keys': {'shortName': short_name}}
        )
        print(f"Successfully loaded '{short_name}'")
        data_arrays.append(ds_single_var)
    except Exception as e:
        # This will now correctly tell us if a variable like 'swvl1' is truly missing
        print(f"Could not load variable with shortName '{short_name}'. It may not be in the file. Skipping.")

# --- 3. Merge the individual variables into a single Dataset ---
if data_arrays:
    # Use compat='no_conflicts' to drop the conflicting 'step' coordinate
    ds_climate_monthly = xr.merge(data_arrays, compat='no_conflicts')
    
    print("\n--- Merged Monthly Climate Data for 1995 ---")
    print(ds_climate_monthly)
else:
    print("No climate variables were successfully loaded.")

In [None]:
# Cell to verify all variables using the correct method
import xarray as xr

# --- Configuration ---
file_path = r'../data/climate_monthly/era5_land_monthly_1995_multi-variable.grib'

# The correct GRIB short names for the variables we requested.
variables_to_check = {
    '2t': '2m_temperature',
    'tp': 'total_precipitation',
    'swvl1': 'volumetric_soil_water_layer_1',
    'ssr': 'surface_net_solar_radiation',
    'pev': 'potential_evaporation'
}

print(f"--- Verifying variables in file: {file_path} ---")

# --- Loop and Check Each Variable ---
for short_name, long_name in variables_to_check.items():
    print(f"\nChecking for: {long_name} (shortName: {short_name})")
    try:
        # Using the correct method you provided
        ds_var = xr.open_dataset(
            file_path,
            engine="cfgrib",
            backend_kwargs={"indexpath": "", "filter_by_keys": {"shortName": short_name}},
        )
        print(f"  -> SUCCESS: Found and loaded '{long_name}'.")
        # print(ds_var) # Optional: uncomment to see the structure
    except Exception as e:
        print(f"  -> FAILED: Variable '{long_name}' not found or could not be loaded. Error: {e}")