In [4]:
import cdsapi

dataset = "reanalysis-era5-single-levels"
request = {
    "product_type": ["reanalysis"],
    "variable": [
        "10m_u_component_of_wind",
        "10m_v_component_of_wind",
        "2m_temperature",
        "surface_pressure"
    ],
    "year": ["2018",
            # "2019",
            # "2020"
            ],
    "month": [
        "01", "02", "03",
        "04", "05", "06",
        # "07", "08", "09",
        # "10", "11", "12"
    ],
    "day": [
        "01", "02", "03",
        "04", "05", "06",
        "07", "08", "09",
        "10", "11", "12",
        "13", "14", "15",
        "16", "17", "18",
        "19", "20", "21",
        "22", "23", "24",
        "25", "26", "27",
        "28", "29", "30",
        "31"
    ],
    "time": [
        "00:00", "01:00", "02:00",
        "03:00", "04:00", "05:00",
        "06:00", "07:00", "08:00",
        "09:00", "10:00", "11:00",
        "12:00", "13:00", "14:00",
        "15:00", "16:00", "17:00",
        "18:00", "19:00", "20:00",
        "21:00", "22:00", "23:00"
    ],
    "data_format": "netcdf",                # maybe change data_format to format ?
    "download_format": "unarchived",
    # North, West, South, East
    "area": [35.05, -118.30, 35.00, -118.20]

}

client = cdsapi.Client()
target = 'era5_tehachapi.nc'
client.retrieve(dataset, request, target)


2025-11-14 05:15:41,223 INFO Request ID is 440e7a06-b8a4-464b-873f-b143d339109e
2025-11-14 05:15:41,361 INFO status has been updated to accepted
2025-11-14 05:16:31,720 INFO status has been updated to running
2025-11-14 05:30:05,453 INFO status has been updated to successful


b750421790870da679b9115091336ab0.nc:   0%|          | 0.00/393k [00:00<?, ?B/s]

'era5_tehachapi.nc'

In [7]:
# Quick inspection of the .nc file
import xarray as xr

ds = xr.open_dataset("era5_tehachapi.nc", engine="netcdf4")
print(ds)


<xarray.Dataset> Size: 174kB
Dimensions:     (valid_time: 4344, latitude: 1, longitude: 1)
Coordinates:
    number      int64 8B ...
  * valid_time  (valid_time) datetime64[ns] 35kB 2018-01-01 ... 2018-06-30T23...
  * latitude    (latitude) float64 8B 35.0
  * longitude   (longitude) float64 8B -118.3
    expver      (valid_time) <U4 70kB ...
Data variables:
    u10         (valid_time, latitude, longitude) float32 17kB ...
    v10         (valid_time, latitude, longitude) float32 17kB ...
    t2m         (valid_time, latitude, longitude) float32 17kB ...
    sp          (valid_time, latitude, longitude) float32 17kB ...
Attributes:
    GRIB_centre:             ecmf
    GRIB_centreDescription:  European Centre for Medium-Range Weather Forecasts
    GRIB_subCentre:          0
    Conventions:             CF-1.7
    institution:             European Centre for Medium-Range Weather Forecasts
    history:                 2025-11-14T10:28 GRIB to CDM+CF via cfgrib-0.9.1...


In [9]:
# convert to dataframe
df = ds.to_dataframe().reset_index()
df.head()

Unnamed: 0,valid_time,latitude,longitude,u10,v10,t2m,sp,number,expver
0,2018-01-01 00:00:00,35.0,-118.3,-0.808231,-0.069685,291.890625,89925.25,0,1
1,2018-01-01 01:00:00,35.0,-118.3,-1.340298,-0.725997,290.692627,89972.25,0,1
2,2018-01-01 02:00:00,35.0,-118.3,-0.462882,-1.156458,288.544922,90036.1875,0,1
3,2018-01-01 03:00:00,35.0,-118.3,0.300817,-1.009697,285.121826,90091.3125,0,1
4,2018-01-01 04:00:00,35.0,-118.3,0.736362,-0.846944,283.71582,90125.125,0,1


In [16]:
# len(df)
print(df.to_string(max_rows=10))
# print(df['latitude'].unique())

              valid_time  latitude  longitude       u10       v10         t2m          sp  number expver
0    2018-01-01 00:00:00      35.0     -118.3 -0.808231 -0.069685  291.890625  89925.2500       0   0001
1    2018-01-01 01:00:00      35.0     -118.3 -1.340298 -0.725997  290.692627  89972.2500       0   0001
2    2018-01-01 02:00:00      35.0     -118.3 -0.462882 -1.156458  288.544922  90036.1875       0   0001
3    2018-01-01 03:00:00      35.0     -118.3  0.300817 -1.009697  285.121826  90091.3125       0   0001
4    2018-01-01 04:00:00      35.0     -118.3  0.736362 -0.846944  283.715820  90125.1250       0   0001
...                  ...       ...        ...       ...       ...         ...         ...     ...    ...
4339 2018-06-30 19:00:00      35.0     -118.3 -2.346984  0.714672  303.041016  89691.8750       0   0001
4340 2018-06-30 20:00:00      35.0     -118.3 -3.172818  0.781558  304.735596  89659.8125       0   0001
4341 2018-06-30 21:00:00      35.0     -118.3 -3.157588

In [None]:
# so the CDS api only allows us to download 6 months roughly at time, i tried more it didn't let me
# so let's write this script to load our data

years = ["2018", "2019", "2020"]
month_groups = [
    ["01", "02", "03", "04", "05", "06"],
    ["07", "08", "09", "10", "11", "12"]
]

for year in years:
    for month_group in month_groups:
        l