# `xarrayvideo` for the ERA5 dataset

Notebook setup

In [1]:
#Autoreload .py files
%load_ext autoreload
%autoreload 2

#https://github.com/chmp/ipytest/issues/80
import sys
sys.breakpointhook = sys.__breakpointhook__

Load required libraries

In [2]:
import xarray as xr
import numpy as np
from pathlib import Path
from xarrayvideo import xarray2video, video2xarray, gap_fill, plot_image, to_netcdf
import matplotlib.pyplot as plt

  from .autonotebook import tqdm as notebook_tqdm


Select the version of the dataset

In [None]:
# era5_version= '1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr' #623Tb
era5_version= '1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr' #80Tb
# era5_version= '1959-2023_01_10-6h-240x121_equiangular_with_poles_conservative.zarr' #2Tb
# era5_version= '1959-2023_01_10-6h-64x32_equiangular_conservative.zarr' #163Gb

In [25]:
era5= xr.open_zarr(f'gs://weatherbench2/datasets/era5/{era5_version}') 
print(f'Size: {era5.nbytes / 2**30:.3f}Gb')
era5


KeyboardInterrupt



Choose a subset of the dataset

In [None]:
#Choose a subset of the dataset
import pandas as pd
start= '2022-07-01'
data_from_start_date= era5.sel(time=slice(pd.Timestamp(start), None))

Save it locally, to avoid having to re-download every time

In [None]:
#data_from_start_date.to_zarr(f'since_{start}_{era5_version}')

In [None]:
import pandas as pd

#Open from local
era5_version= '../since_2022-07-01_2022-07-01_1959-2023_01_10-wb13-6h-1440x721_with_derived_variables.zarr'

era5= xr.open_dataset(era5_version, engine='zarr')
print(f'Size: {era5.nbytes / 2**30:.3f}Gb')
era5

In [None]:
era5['relative_humidity'].isel({'time':50, 'level':12}).plot()
plt.show()

In [None]:
list(era5.variables)

Note that we can automatically get generic conversion rules (which might be suboptimal)

In [None]:
from xarrayvideo import get_recipe
from pprint import pprint as pp

conversion_rules= get_recipe(era5, t='time', x='longitude', y='latitude', c='level', bits=10)
pp(conversion_rules)

Compress

In [None]:
#Ideally, we want to manually optimize the recipe
lossy_params = {
    'c:v': 'libx265',  #[libx264, libx265, vp9, ffv1]
    'preset': 'medium',  #Preset for quality/encoding speed tradeoff: quick, medium, slow (better)
    'crf': [0],#,1,3,6,9], #14 default, 11 for higher quality and size
    'x265-params': 'qpmin=0:qpmax=0.001:psy-rd=0:psy-rdoq=0',
    }

conversion_rules= {
    'wind': ( ('10m_u_component_of_wind', '10m_v_component_of_wind', '10m_wind_speed'), ('time', 'longitude', 'latitude'), 
             0, lossy_params, 10),
    # 'wind_speed': ('wind_speed', ('time', 'longitude', 'latitude', 'level'), 0, lossy_params, 10),
    # # 'wind_speed2': ('wind_speed', ('time', 'longitude', 'latitude', 'level'), 12, lossy_params, 10),
    'relative_humidity': ('relative_humidity', ('time', 'longitude', 'latitude', 'level'), 0, lossy_params, 10),
    'wind_speed': ('wind_speed', ('time', 'longitude', 'latitude', 'level'), 0, lossy_params, 10),
    'wind_u': ('u_component_of_wind', ('time', 'longitude', 'latitude', 'level'), 0, lossy_params, 10),
    }

arr_dict= xarray2video(era5.sel(time=slice(pd.Timestamp('2022-11-01'), None)), 
                       era5_version.replace('.zarr', ''), conversion_rules,
                       output_path=output_path, use_ssim=False, compute_stats=True,
                       loglevel='verbose', #verbose, quiet
                       save_dataset=False #Disable saving
                       )  