In [10]:
import dask
import xarray as xr
import numpy as np
import xbatcher as xb

In [11]:
GS_PATH = "gs://gcp-public-data-arco-era5/1959-2022-6h-1440x721.zarr/"
LONGITUDE = 1440
LATITUDE = 721
LEVEL = [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000]
VARS_SURFACE = ["2m_temperature", "10m_u_component_of_wind", "10m_v_component_of_wind", "mean_sea_level_pressure"]
VARS_ATMOSPHERE = ["temperature" ,"u_component_of_wind", "v_component_of_wind", "geopotential", "specific_humidity"]

In [12]:
dask.config.set(scheduler="threads", num_workers=4)

<dask.config.set at 0x2350bd6fb00>

In [13]:
ds = xr.open_zarr(
    'gs://gcp-public-data-arco-era5/ar/full_37-1h-0p25deg-chunk-1.zarr-v3',
    chunks=None,
    storage_options=dict(token='anon'),
)

ds = ds.sel(time=slice(ds.attrs['valid_time_start'], ds.attrs['valid_time_stop']))
ds = ds.sel(level=LEVEL)
ds = ds[VARS_SURFACE + VARS_ATMOSPHERE]
ds


In [21]:
bgen = xb.BatchGenerator(
        ds,
        input_dims={"time": 10, "level": 13, "latitude": 721, "longitude": 1440},
        
    )

data = bgen[73000].load()

In [22]:
array = data.to_stacked_array(
    new_dim="channel",
    sample_dims=["latitude", "longitude", "time"],
).transpose("channel","time", "longitude", "latitude")

array

In [23]:
array.mean(dim=["time", "longitude", "latitude"])

In [24]:
array.std(dim=["time", "longitude", "latitude"])

In [25]:
mean_1 = np.array([2.75967712e+02, -2.22368330e-01, -2.15047508e-01,  1.01246727e+05,
                   2.15070007e+02,  2.10031494e+02,  2.14018921e+02,  2.17501572e+02,
                   2.21371201e+02,  2.26636307e+02,  2.39520569e+02,  2.50521912e+02,
                   2.58944122e+02,  2.65517212e+02,  2.72741272e+02,  2.75425598e+02,
                   2.78671967e+02,  1.03608549e+00,  8.31944084e+00,  1.36120901e+01,
                   1.50306454e+01,  1.41801939e+01,  1.26519327e+01,  9.24522495e+00,
                   6.47516203e+00,  4.48572683e+00,  2.87725925e+00,  9.53606129e-01,
                   8.16841573e-02, -3.67954791e-01,  2.73605585e-01,  3.40733565e-02,
                   2.09475175e-01,  2.44738385e-01,  1.62622541e-01,  1.11954860e-01,
                   -1.92232933e-02,  1.51987211e-03,  1.10736554e-02, -3.60198505e-03,
                   -1.25714377e-01, -1.65739879e-01, -2.39981651e-01,  1.99144484e+05,
                   1.57036328e+05,  1.32374906e+05,  1.14563891e+05,  1.00517969e+05,
                   8.88052266e+04,  6.95796406e+04,  5.38789141e+04,  4.05352695e+04,
                   2.89137070e+04,  1.38578760e+04,  7.17843945e+03,  9.54477600e+02,
                   2.98447389e-06,  2.31290983e-06,  3.84114173e-06,  1.28046395e-05,
                   4.04926068e-05,  9.41455583e-05,  2.98157887e-04,  6.69989036e-04,
                   1.24174904e-03,  2.04507494e-03,  4.04278282e-03,  5.43047814e-03,
                   6.29268773e-03])

mean_2 = np.array([2.76677612e+02,  9.19840336e-02, -2.33745620e-01,  1.00842969e+05,
                   2.14355026e+02,  2.09703049e+02,  2.14147339e+02,  2.17802536e+02,
                   2.21773438e+02,  2.27247162e+02,  2.40236923e+02,  2.51155151e+02,
                   2.59646790e+02,  2.66248077e+02,  2.73468414e+02,  2.76208832e+02,
                   2.79573914e+02,  4.31863117e+00,  9.29177380e+00,  1.37944288e+01,
                   1.46930666e+01,  1.40398073e+01,  1.25427265e+01,  9.46230698e+00,
                   6.82800961e+00,  5.02230406e+00,  3.50000882e+00,  1.44800925e+00,
                   6.45735741e-01, -1.86480805e-02,  5.27674481e-02,  1.06645459e-02,
                   2.63152242e-01,  3.48387212e-01,  1.55034199e-01,  7.27493912e-02,
                   -3.35093886e-02, -9.50008929e-02, -1.01644890e-02, -2.08017249e-02,
                   -7.84878358e-02, -8.55691805e-02, -2.19739988e-01,  1.99044141e+05,
                   1.57025984e+05,  1.32389125e+05,  1.14555406e+05,  1.00488953e+05,
                   8.87489453e+04,  6.94661797e+04,  5.37226719e+04,  4.03435781e+04,
                   2.86885566e+04,  1.35902285e+04,  6.89174902e+03,  6.49629944e+02,
                   2.74857894e-06,  2.45567003e-06,  4.15833392e-06,  1.43458337e-05,
                   4.23810270e-05,  9.80081677e-05,  3.03966255e-04,  6.94804476e-04,
                   1.34054665e-03,  2.14526197e-03,  4.12505725e-03,  5.50352037e-03,
                   6.32317364e-03])

mean_3 = np.array([2.7747659e+02, -1.7726766e-01,  2.0137815e-01,  1.0111635e+05,
                   2.1228047e+02,  2.0927252e+02,  2.1450591e+02,  2.1873245e+02,
                   2.2320924e+02,  2.2897107e+02,  2.4190178e+02,  2.5267509e+02,
                   2.6086682e+02,  2.6702429e+02,  2.7408551e+02,  2.7687726e+02,
                   2.8033862e+02,  6.2629104e+00,  1.0579687e+01,  1.4536409e+01,
                   1.5152886e+01,  1.4408839e+01,  1.2829268e+01,  9.5149441e+00,
                   6.9991465e+00,  5.0437040e+00,  3.2779326e+00,  1.1867758e+00,
                   2.9544556e-01, -2.1634321e-01,  7.7156350e-02,  1.1223329e-02,
                   5.9473015e-02,  1.8773082e-01,  6.8190708e-03, -1.2069984e-01,
                   -1.0721262e-01,  5.9598811e-02,  4.0386770e-02,  7.3744766e-02,
                   1.8809338e-01,  2.0849346e-01,  2.1482959e-01,  1.9969903e+05,
                   1.5792822e+05,  1.3328525e+05,  1.1540271e+05,  1.0125800e+05,
                   8.9434688e+04,  7.0010781e+04,  5.4162055e+04,  4.0710320e+04,
                   2.9003076e+04,  1.3837869e+04,  7.1105156e+03,  8.4248004e+02,
                   2.9039222e-06,  2.5798092e-06,  5.2520227e-06,  1.9172099e-05,
                   5.6120287e-05,  1.2505421e-04,  3.9448868e-04,  8.5212773e-04,
                   1.5386717e-03,  2.4375340e-03,  4.5270547e-03,  5.8460077e-03,
                   6.8327598e-03])

std_1 = np.array([2.01661186e+01, 5.13918447e+00, 4.26606894e+00, 1.30800183e+03,
                  1.11247005e+01, 1.47876205e+01, 8.89158058e+00, 5.49031210e+00,
                  7.16918802e+00, 1.03747082e+01, 1.28821411e+01, 1.33957949e+01,
                  1.33854179e+01, 1.37529011e+01, 1.42817669e+01, 1.49910336e+01,
                  1.69092445e+01, 1.15726576e+01, 1.44870672e+01, 1.71055756e+01,
                  1.84008293e+01, 1.82593422e+01, 1.74101391e+01, 1.46701450e+01,
                  1.23630495e+01, 1.04308863e+01, 9.16172791e+00, 7.78410387e+00,
                  7.40781879e+00, 5.89710855e+00, 9.31402397e+00, 9.11365604e+00,
                  1.07413568e+01, 1.23432732e+01, 1.32342272e+01, 1.29508352e+01,
                  1.08196011e+01, 8.80899334e+00, 7.42852211e+00, 6.53877544e+00,
                  5.87006664e+00, 6.11310673e+00, 5.04436493e+00, 2.72143188e+03,
                  3.94221997e+03, 5.00976807e+03, 5.29980127e+03, 5.13777686e+03,
                  4.72994971e+03, 3.78837183e+03, 2.97914355e+03, 2.32167773e+03,
                  1.79032007e+03, 1.19912183e+03, 1.03115930e+03, 9.92472290e+02,
                  2.65486591e-07, 6.41156987e-07, 2.23797633e-06, 1.57480517e-05,
                  5.58321517e-05, 1.31932771e-04, 4.24062309e-04, 9.30902315e-04,
                  1.56503206e-03, 2.28157546e-03, 3.86485946e-03, 4.90261102e-03,
                  5.71243325e-03])

std_2 = np.array([2.00837173e+01, 5.23634815e+00, 4.39379930e+00, 1.46362659e+03,
                  1.09278088e+01, 1.38894491e+01, 9.14680195e+00, 5.33937979e+00,
                  6.92013121e+00, 9.94387627e+00, 1.23354387e+01, 1.27500248e+01,
                  1.26841450e+01, 1.30903358e+01, 1.37514715e+01, 1.42884674e+01,
                  1.61719608e+01, 1.14741697e+01, 1.12625580e+01, 1.42091408e+01,
                  1.61359730e+01, 1.65070095e+01, 1.58413467e+01, 1.33585901e+01,
                  1.12931147e+01, 9.75179672e+00, 8.71062851e+00, 7.80166721e+00,
                  7.53706264e+00, 5.85454512e+00, 7.57078218e+00, 7.24204683e+00,
                  8.56305981e+00, 1.01543045e+01, 1.14963598e+01, 1.15911074e+01,
                  9.85923386e+00, 8.26035404e+00, 7.16296721e+00, 6.54049540e+00,
                  6.13875341e+00, 6.28153944e+00, 4.99785709e+00, 3.51846606e+03,
                  4.07575122e+03, 5.00873584e+03, 5.33733691e+03, 5.21293604e+03,
                  4.83318262e+03, 3.93635107e+03, 3.16377759e+03, 2.53730786e+03,
                  2.02933032e+03, 1.45418921e+03, 1.26529089e+03, 1.16373450e+03,
                  1.17172959e-07, 4.15399256e-07, 2.16956937e-06, 1.54258523e-05,
                  5.42753405e-05, 1.32956702e-04, 4.28031781e-04, 9.25520028e-04,
                  1.61621789e-03, 2.27551349e-03, 3.87136987e-03, 4.85318387e-03,
                  5.63411089e-03])

std_3 = np.array([2.27328854e+01, 5.37448645e+00, 4.97379637e+00, 1.15717261e+03,
                  4.88185167e+00, 1.10926781e+01, 6.90179682e+00, 4.65013504e+00,
                  6.56796169e+00, 9.47751904e+00, 1.20881672e+01, 1.26128416e+01,
                  1.31490383e+01, 1.51863937e+01, 1.62130547e+01, 1.68763752e+01,
                  1.82538738e+01, 7.22799110e+00, 9.82074833e+00, 1.39986553e+01,
                  1.66079464e+01, 1.68655338e+01, 1.61749916e+01, 1.37194767e+01,
                  1.15212965e+01, 9.98389339e+00, 8.96374798e+00, 7.82630062e+00,
                  7.54775000e+00, 5.98931837e+00, 4.51197481e+00, 6.71417665e+00,
                  9.12931347e+00, 1.20292788e+01, 1.36636715e+01, 1.37198343e+01,
                  1.17533960e+01, 9.79505444e+00, 8.47955513e+00, 7.67996025e+00,
                  7.00397491e+00, 7.02060986e+00, 5.62000513e+00, 2.69287354e+03,
                  3.95461621e+03, 4.90652148e+03, 5.22117871e+03, 5.12247510e+03,
                  4.76975293e+03, 3.90033057e+03, 3.13301099e+03, 2.49366455e+03,
                  1.93856238e+03, 1.26941858e+03, 1.03022961e+03, 8.91611511e+02,
                  1.80326182e-07, 3.68718815e-07, 3.52468760e-06, 2.15964556e-05,
                  7.10573222e-05, 1.62842029e-04, 5.11946098e-04, 1.07153272e-03,
                  1.74916757e-03, 2.57699098e-03, 4.20427555e-03, 5.20208478e-03,
                  6.06187806e-03])

In [26]:
new_mean = np.mean([mean_1, mean_2, mean_3], axis=0)
new_mean

array([ 2.76707305e+02, -1.02550652e-01, -8.24716593e-02,  1.01068682e+05,
        2.13901834e+02,  2.09669021e+02,  2.14224057e+02,  2.18012186e+02,
        2.22117960e+02,  2.27618180e+02,  2.40553091e+02,  2.51450718e+02,
        2.59819244e+02,  2.66263193e+02,  2.73431732e+02,  2.76170563e+02,
        2.79528167e+02,  3.87254235e+00,  9.39696721e+00,  1.39809760e+01,
        1.49588660e+01,  1.42096134e+01,  1.26746424e+01,  9.40749201e+00,
        6.76743938e+00,  4.85057830e+00,  3.21840022e+00,  1.19613039e+00,
        3.40955153e-01, -2.00982027e-01,  1.34509794e-01,  1.86537438e-02,
        1.77366811e-01,  2.60285472e-01,  1.08158604e-01,  2.13348037e-02,
       -5.33151006e-02, -1.12940699e-02,  1.37653121e-02,  1.64470187e-02,
       -5.36961093e-03, -1.42718665e-02, -8.16306830e-02,  1.99295885e+05,
        1.57330177e+05,  1.32683094e+05,  1.14840669e+05,  1.00754974e+05,
        8.89962866e+04,  6.96855338e+04,  5.39212137e+04,  4.05297225e+04,
        2.88684465e+04,  

In [27]:
new_std = np.mean([std_1, std_2, std_3], axis=0)
new_std

array([2.09942404e+01, 5.25000636e+00, 4.54455487e+00, 1.30960034e+03,
       8.97812032e+00, 1.32565826e+01, 8.31339312e+00, 5.15994231e+00,
       6.88576031e+00, 9.93203450e+00, 1.24352490e+01, 1.29195538e+01,
       1.30728671e+01, 1.40098769e+01, 1.47487644e+01, 1.53852921e+01,
       1.71116930e+01, 1.00916061e+01, 1.18567912e+01, 1.51044572e+01,
       1.70482496e+01, 1.72106285e+01, 1.64754925e+01, 1.39160706e+01,
       1.17258202e+01, 1.00555255e+01, 8.94536813e+00, 7.80402390e+00,
       7.49754381e+00, 5.91365735e+00, 7.13226032e+00, 7.68995984e+00,
       9.47791003e+00, 1.15089522e+01, 1.27980862e+01, 1.27539256e+01,
       1.08107437e+01, 8.95480061e+00, 7.69034815e+00, 6.91974370e+00,
       6.33759832e+00, 6.47175201e+00, 5.22074238e+00, 2.97759049e+03,
       3.99086247e+03, 4.97500846e+03, 5.28610563e+03, 5.15772933e+03,
       4.77762842e+03, 3.87501782e+03, 3.09197738e+03, 2.45088338e+03,
       1.91940426e+03, 1.30757654e+03, 1.10889327e+03, 1.01593943e+03,
      