# 1. ERA5 Climate Data Download


This notebook automates the download of ERA5 reanalysis climate data from the Copernicus Climate Data Store (CDS) for the Azores Free Technological Zone digital twin project.The notebook supports downloading data for multiple spatial grids to create training and test datasets with different geographic coverage patterns.


**API Documentation**: [CDS ERA5 Timeseries](https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=download)

In [2]:
import cdsapi
import zipfile
import os
import xarray as xr

def download_era5_combined(latitude, longitude, start_date, end_date, client, out_dir="dataset_test"):
    """
    Download ERA5 single-levels timeseries data (surface + wave variables),
    extract, merge, and save as a single NetCDF file.

    https://cds.climate.copernicus.eu/datasets/reanalysis-era5-single-levels-timeseries?tab=download
    """
    # --------------------- Setup ---------------------
    os.makedirs(out_dir, exist_ok=True)
    dataset = "reanalysis-era5-single-levels-timeseries"

    coord_tag = f"{latitude:.2f}N_{abs(longitude):.2f}{'W' if longitude < 0 else 'E'}"
    date_tag = f"{start_date}_to_{end_date}"

    # --------------------- Helper function ---------------------
    def download_and_extract(variables, label):
        tmp_zip = f"{label}_temp.zip"

        request = {
            "variable": variables,
            "location": {"longitude": longitude, "latitude": latitude},
            "date": [f"{start_date}/{end_date}"],
            "data_format": "netcdf",
        }

        client.retrieve(dataset, request).download(tmp_zip)

        with zipfile.ZipFile(tmp_zip, "r") as z:
            nc_files = [f for f in z.namelist() if f.endswith(".nc")]
            if not nc_files:
                raise FileNotFoundError(f"No .nc file found in {label} ZIP archive.")
            extracted = z.extract(nc_files[0], path=out_dir)

            new_name = f"ERA5_{label}_{coord_tag}_{date_tag}.nc"
            new_path = os.path.join(out_dir, new_name)
            os.rename(extracted, new_path)

        os.remove(tmp_zip)
        print(f"{label.capitalize()} file saved as: {new_path}")
        return new_path

    # --------------------- Variable sets ---------------------
    surface_vars = [
        "2m_dewpoint_temperature",
        "mean_sea_level_pressure",
        "skin_temperature",
        "surface_pressure",
        "surface_solar_radiation_downwards",
        "sea_surface_temperature",
        "surface_thermal_radiation_downwards",
        "2m_temperature",
        "total_precipitation",
        "10m_u_component_of_wind",
        "10m_v_component_of_wind",
    ]

    wave_vars = [
        "mean_wave_direction",
        "mean_wave_period",
        "significant_height_of_combined_wind_waves_and_swell",
    ]

    # --------------------- Download both datasets ---------------------
    surf_path = download_and_extract(surface_vars, "surface")
    wave_path = download_and_extract(wave_vars, "wave")

    # --------------------- Merge and cleanup ---------------------
    ds_surf = xr.open_dataset(surf_path)
    ds_wave = xr.open_dataset(wave_path)

    merged = xr.merge([ds_surf, ds_wave], compat="override", join="outer")
    merged_path = os.path.join(out_dir, f"ERA5_{coord_tag}_{date_tag}.nc")
    merged.to_netcdf(merged_path)

    ds_surf.close()
    ds_wave.close()
    os.remove(surf_path)
    os.remove(wave_path)

    print(f"Merged dataset saved as: {merged_path}")
    return merged_path


# 2. Spatial Grid Configuration

Define the spatial coordinates and temporal range for data download. The configuration supports multiple grid patterns to create diverse training and test datasets.

In [3]:
import numpy as np

# Create a single CDS API client
client = cdsapi.Client()


# --------------------- SET THE GRID ---------------------
#Train
latitudes_list =  [np.arange(39.50, 40.51, 0.25),   np.arange(39.75, 40.26, 0.25),  np.arange(37.75, 38.51, 0.25),  np.arange(35.25, 36.51, 0.25)]
longitudes_list = [np.arange(-32.50, -31.74, 0.25), np.arange(-27.0, -25.49, 0.25), np.arange(-31.5, -29.99, 0.25), np.arange(-25.25, -23.99, 0.25) ]

#Test
latitudes_list =  [np.arange(36.0, 37.01, 0.25)]
longitudes_list = [np.arange(-28.5, -27.49, 0.25)]

n_group = 0


latitudes = latitudes_list[n_group]
longitudes = longitudes_list[n_group]

print(latitudes)
print(longitudes)

for lat in latitudes:
    for lon in longitudes:
        try:
            download_era5_combined(latitude=lat,
                                   longitude=lon,
                                   start_date="2025-01-01",
                                   end_date="2025-10-03",
                                   client=client)
        except Exception as e:
            print(f"Failed for {lat:.2f}N, {abs(lon):.2f}{'W' if lon < 0 else 'E'}: {e}")

#Bip when done
import winsound
duration = 10000  # milliseconds
freq = 440  # Hz
winsound.Beep(freq, duration)

[36.   36.25 36.5  36.75 37.  ]
[-28.5  -28.25 -28.   -27.75 -27.5 ]



Notification of changes via this catalogue entry banner and/or in the [Forum](https://forum.ecmwf.int/) will be provided on best efforts.
2025-10-16 14:17:23,593 INFO Request ID is 66d670a9-e151-4ed8-8e24-cc7a0fbc2cc7
2025-10-16 14:17:23,593 INFO Request ID is 66d670a9-e151-4ed8-8e24-cc7a0fbc2cc7
2025-10-16 14:17:23,706 INFO status has been updated to accepted
2025-10-16 14:17:23,706 INFO status has been updated to accepted


KeyboardInterrupt: 