In [1]:
import dask.array as da
import xarray as xr
import numpy as np
import glob
from dask.distributed import Client, LocalCluster

VAR_LIST = {
    "10m_u_component_of_wind": "u10",
    "10m_v_component_of_wind": "v10",
    "2m_temperature": "t2m",
    "mean_sea_level_pressure": "msl",    
    "geopotential": "z",
    "specific_humidity": "q",
    "temperature": "t", 
    "u_component_of_wind": "u",
    "v_component_of_wind": "v",
    "vertical_velocity": "w",
    # "total_precipitation_6hr": "tp",
}

VAR_PATH = {
    "10m_u_component_of_wind": "/camdata2/ERA5/monthly/single_level/10m_uwind/",
    "10m_v_component_of_wind": "/camdata2/ERA5/monthly/single_level/10m_vwind/",
    "2m_temperature": "/camdata2/ERA5/monthly/single_level/T2m/",
    "mean_sea_level_pressure": "/camdata2/ERA5/monthly/single_level/SLP/",
    "geopotential": "/camdata2/ERA5/monthly/pressure/geopotential/",
    "specific_humidity": "/camdata2/ERA5/monthly/pressure/specific_humidity/",
    "temperature": "/camdata2/ERA5/monthly/pressure/temperature/",
    "u_component_of_wind": "/camdata2/ERA5/monthly/pressure/u_wind/",
    "v_component_of_wind": "/camdata2/ERA5/monthly/pressure/v_wind/",
    "vertical_velocity": "/camdata2/ERA5/monthly/pressure/vertical_velocity/",
    # "total_precipitation_6hr": "/camdata2/ERA5/hourly/total_precip/total_precip_",
}

PRESSURE_VAR = ["geopotential", "specific_humidity", "temperature", "u_component_of_wind", "v_component_of_wind", "vertical_velocity"]

PRESSURE_LEVEL = [1000,  975,  950,  925,  900,  
                  875,  850,  825,  800,  775,  
                  750,  700, 650,  600,  550,  
                  500,  450,  400,  350,  300,  
                  250,  225,  200,  175, 150,  
                  125,  100,   70,   50,   30,   
                  20,   10,    7,    5,    3,    
                  2, 1]

SURFACE_VAR_MEAN = {}
SURFACE_VAR_STD = {}

PRESSURE_VAR_MEAN = {var : [] for var in PRESSURE_VAR}
PRESSURE_VAR_STD = {var : [] for var in PRESSURE_VAR}

In [3]:
for key, value in VAR_LIST.items():
    print(key)
    file_pattern = VAR_PATH[key]+"{year}.nc"
    files = [file_pattern.format(year=year) for year in range(1979, 2016)]

    existing_files = [f for f in files if glob.glob(f)]

    datasets = [xr.open_dataset(f, chunks={'time': 'auto'}) for f in existing_files]
    datasets = xr.concat(datasets, dim='time')

    if key in PRESSURE_VAR:
        mean = datasets.mean(dim=('longitude', 'latitude', "time")).compute()
        std = datasets.std(dim=('longitude', 'latitude', "time")).compute()
        PRESSURE_VAR_MEAN[key].append(mean[value].values)
        PRESSURE_VAR_STD[key].append(std[value].values)

    elif key == "total_precipitation_6hr":
        continue
        SURFACE_VAR_MEAN[key] = datasets.resample(time="6h", closed='right', label='right').sum().mean(dim=('longitude', 'latitude', 'time')).compute()[value].values
        SURFACE_VAR_STD[key] = datasets.resample(time="6h", closed='right', label='right').sum().std(dim=('longitude', 'latitude', 'time')).compute()[value].values
    
    else:
        SURFACE_VAR_MEAN[key] = datasets.mean(dim=('longitude', 'latitude', "time")).compute()[value].values
        SURFACE_VAR_STD[key] = datasets.std(dim=('longitude', 'latitude', "time")).compute()[value].values


10m_u_component_of_wind
10m_v_component_of_wind
2m_temperature
mean_sea_level_pressure
geopotential
specific_humidity
temperature
u_component_of_wind
v_component_of_wind
vertical_velocity


In [7]:
ds = xr.Dataset()

# Add surface variables
for key, value in SURFACE_VAR_MEAN.items():
    ds[f"{key}_mean"] = xr.DataArray(value)
    ds[f"{key}_std"] = xr.DataArray(SURFACE_VAR_STD[key])

# Add pressure variables
pressure_coords = xr.DataArray(PRESSURE_LEVEL, dims=["pressure"])
for var in PRESSURE_VAR:
    print(var)
    ds[f"{var}_mean"] = xr.DataArray(PRESSURE_VAR_MEAN[var][0], dims=["pressure"], coords={"pressure": pressure_coords})
    ds[f"{var}_std"] = xr.DataArray(PRESSURE_VAR_STD[var][0], dims=["pressure"], coords={"pressure": pressure_coords})

# Save to netCDF file
ds.to_netcdf("stat_ERA5.nc")

geopotential
specific_humidity
temperature
u_component_of_wind
v_component_of_wind
vertical_velocity


In [48]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

ds1 = xr.open_dataset("testdata/stats_ERA5.nc")
ds2_mean = xr.open_dataset("testdata/stats_mean_by_level.nc")
ds2_std = xr.open_dataset("testdata/stats_stddev_by_level.nc")

for var in ds2_std.data_vars:
    print(f"{var}: {ds2_std[var].values}")

geopotential: [17112.60664237 14927.3830529  13569.13788806 12021.93667118
 11014.84782739  9987.53635965  8028.3607167   6968.46691684
  5891.3230744   5493.19393649  5516.18006813  5709.44148615
  5831.10992954  5873.2232132   5833.65858819  5717.92889441
  5546.00947586  5104.03554327  4619.87146783  4158.27623795
  3739.08438868  3357.50376237  3013.59784776  2698.2886516
  2405.98546406  2136.0905654   1890.45768725  1775.92280741
  1667.27670656  1564.40081849  1468.29212746  1379.5975592
  1298.64948217  1226.65683518  1163.91950284  1111.57901419
  1070.68713485]
specific_humidity: [1.48849409e-07 2.19824872e-07 2.70846700e-07 3.20418690e-07
 3.41756584e-07 3.47264044e-07 3.06770109e-07 3.08641848e-07
 3.60361226e-07 4.06171659e-07 5.68540895e-07 1.18334710e-06
 3.76140966e-06 1.00441224e-05 2.24821285e-05 4.34226183e-05
 7.38970488e-05 1.67277814e-04 3.10717315e-04 5.04287834e-04
 7.53847651e-04 1.07276912e-03 1.44746218e-03 1.76266741e-03
 2.10465153e-03 2.54103764e-03 3.0431