# Sea Level Rise AR5
Notebook environment to migrate netcdf files to CF compliant zarr

In [1]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

<IPython.core.display.Javascript object>

### Configure OS independent paths

In [2]:
# Import standard packages
import os
import pathlib
import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import netCDF4 as nc
import numpy.ma as ma

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))

# Get root paths
home = pathlib.Path().home()
root = home.root

# Import custom functionality
from etl import p_drive
from etl.CF_compliancy_checker import check_compliancy, save_compliancy

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11205479-coclico", "data")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    home.joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)



<IPython.core.display.Javascript object>

In [3]:
# Project paths & files (manual input)
ds_dir = coclico_data_dir.joinpath("18_AR5_SLP_IPCC")
ds_rcp26_path = ds_dir.joinpath("total-ens-slr-26-5.nc")
ds_rcp45_path = ds_dir.joinpath("total-ens-slr-45-5.nc")
ds_rcp85_path = ds_dir.joinpath("total-ens-slr-85-5.nc")
ds_out_file = "total-ens-slr"
CF_dir = coclico_data_dir.joinpath(r"CF")  # directory to save output CF check files

<IPython.core.display.Javascript object>

### Check CF compliancy original NetCDF files

In [4]:
# open datasets
ds_26rcp = xr.open_dataset(ds_rcp26_path)
ds_45rcp = xr.open_dataset(ds_rcp45_path)
ds_85rcp = xr.open_dataset(ds_rcp85_path)

# check original dataset
ds_26rcp

Cannot find the ecCodes library
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(
  new_vars[k] = decode_cf_variable(


<IPython.core.display.Javascript object>

In [5]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp26_path, working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [6]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp26_path, working_dir=CF_dir)



<IPython.core.display.Javascript object>

In [7]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp45_path, working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [8]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp45_path, working_dir=CF_dir)



<IPython.core.display.Javascript object>

In [9]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp85_path, working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [10]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp85_path, working_dir=CF_dir)



<IPython.core.display.Javascript object>

### Make CF compliant alterations to the NetCDF files (dataset dependent)

In [11]:
# NetCDF attribute alterations

# # add global attributes
ds_26rcp.attrs["Conventions"] = "CF-1.8"
ds_45rcp.attrs["Conventions"] = "CF-1.8"
ds_85rcp.attrs["Conventions"] = "CF-1.8"

<IPython.core.display.Javascript object>

In [12]:
# NetCDF variable and dimension alterations

# rename or swap dimension names, the latter in case the name already exists as coordinate
ds_26rcp = ds_26rcp.rename_dims(
    {"ens": "nensemble", "bnds": "nv"}  # nv = number of vertices
)
ds_45rcp = ds_45rcp.rename_dims({"ens": "nensemble", "bnds": "nv"})
ds_85rcp = ds_85rcp.rename_dims({"ens": "nensemble", "bnds": "nv"})

# # rename variables, if necessary
ds_26rcp = ds_26rcp.rename_vars({"modelname": "ensemble"})
ds_45rcp = ds_45rcp.rename_vars({"modelname": "ensemble"})
ds_85rcp = ds_85rcp.rename_vars({"modelname": "ensemble"})

# # set some data variables to coordinates to avoid duplication of dimensions in later stage
ds_26rcp = ds_26rcp.set_coords(["ensemble", "time_bnds"])
ds_45rcp = ds_45rcp.set_coords(["ensemble", "time_bnds"])
ds_85rcp = ds_85rcp.set_coords(["ensemble", "time_bnds"])

# encoding settings
ds_26rcp.time_bnds.encoding[
    "_FillValue"
] = None  # xarray sets _FillValue automatically to None for float types, prevent this when needed
ds_45rcp.time_bnds.encoding[
    "_FillValue"
] = None  # xarray sets _FillValue automatically to None for float types, prevent this when needed
ds_85rcp.time_bnds.encoding[
    "_FillValue"
] = None  # xarray sets _FillValue automatically to None for float types, prevent this when needed

# construct equal dimensions in ensembles
# note, has to be partly manual as nensemble has to be indexed by ensemble strings
strip_ensembles = list(
    set(ds_85rcp["ensemble"].values.astype("U").tolist()).difference(
        ds_26rcp["ensemble"].values.astype("U").tolist()
    )
)  # remove these at correct positions in the data from RCP45 & RCP85
strip_ensembles_idx = [
    idx
    for idx, j in enumerate(ds_85rcp["ensemble"].values.astype("U").tolist())
    if j in strip_ensembles
]
ds_45rcp = ds_45rcp.drop_sel(
    nensemble=strip_ensembles_idx
)  # remove indixed ensembles from RCP45
ds_85rcp = ds_85rcp.drop_sel(
    nensemble=strip_ensembles_idx
)  # remove indixed ensembles from RCP85

# info on all attributes
# !ncinfo -v totslr {ds_rcp26_path}

<IPython.core.display.Javascript object>

In [13]:
# concat datasets along new dimension with index values and name derived from pandas index object, if necessary
dataset = xr.concat([ds_26rcp, ds_45rcp, ds_85rcp], dim="nscenarios")
dataset = dataset.assign_coords(
    scenarios=("nscenarios", np.array(["RCP26", "RCP45", "RCP85"], dtype="S"))
)

# dataset = xr.concat(
#     [dataset_historical, dataset_45rcp, dataset_85rcp],
#     pd.Index(["historical", "rcp45", "rcp85"], name="scenarios"),
# )

# dataset["scenarios"].values.astype("U") # retrieve scenarios as string

<IPython.core.display.Javascript object>

In [14]:
# re-order shape of the data variables
ds_26rcp = ds_26rcp.transpose("time", "lat", "lon", "nv", "nensemble")
ds_45rcp = ds_45rcp.transpose("time", "lat", "lon", "nv", "nensemble")
ds_85rcp = ds_85rcp.transpose("time", "lat", "lon", "nv", "nensemble")
dataset = dataset.transpose("nscenarios", "time", "lat", "lon", "nv", "nensemble")

# add or change certain variable / coordinate attributes
dataset_attributes = {
    "scenarios": {"long_name": "climate scenarios"}
}  # specify custom (CF convention) attributes

# add / overwrite attributes
for k, v in dataset_attributes.items():
    try:
        dataset[k].attrs = dataset_attributes[k]
    except:
        continue

# add epsg
ds_26rcp.attrs["crs"] = 4326
ds_45rcp.attrs["crs"] = 4326
ds_85rcp.attrs["crs"] = 4326
dataset.attrs["crs"] = 4326

<IPython.core.display.Javascript object>

In [15]:
# check the xarray dataset, best practice is to have as many as possible bold dimensions (dimension == coordinate name).
# in this way, the Front-End can access the variable directly without having to index the variable first

dataset
# dataset["nscenarios"]

<IPython.core.display.Javascript object>

In [21]:
# save new .nc files
# ds_26rcp.to_netcdf(path=str(ds_rcp26_path).replace(".nc", "_CF.nc"))
# ds_45rcp.to_netcdf(path=str(ds_rcp45_path).replace(".nc", "_CF.nc"))
# ds_85rcp.to_netcdf(path=str(ds_rcp85_path).replace(".nc", "_CF.nc"))
dataset.to_netcdf(path=ds_dir.joinpath(ds_out_file + "_CF.nc"))

<IPython.core.display.Javascript object>

### Check CF compliancy altered NetCDF files

In [22]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp26_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [23]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp26_path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)



<IPython.core.display.Javascript object>

In [24]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp45_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [25]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp45_path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)



<IPython.core.display.Javascript object>

In [26]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp85_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [27]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp85_path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)



<IPython.core.display.Javascript object>

In [28]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=ds_dir.joinpath(ds_out_file + "_CF.nc"), working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [29]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=ds_dir.joinpath(ds_out_file + "_CF.nc"), working_dir=CF_dir,
)



<IPython.core.display.Javascript object>

### write data to Zarr files

In [30]:
# export to zarr in write mode (to overwrite if exists)
dataset.to_zarr(ds_dir.joinpath("%s.zarr" % ds_out_file), mode="w")

<xarray.backends.zarr.ZarrStore at 0x25781461c10>

<IPython.core.display.Javascript object>