In [11]:
import xarray as xr
import numpy as np

In [2]:
# this example is about 85MB
!wget http://dap.ceda.ac.uk/thredds/fileServer/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-AM4/amip/r1i1p1f1/Amon/tas/gr1/files/d20180807/tas_Amon_GFDL-AM4_amip_r1i1p1f1_gr1_198001-201412.nc

--2020-07-10 16:02:09--  http://dap.ceda.ac.uk/thredds/fileServer/badc/cmip6/data/CMIP6/CMIP/NOAA-GFDL/GFDL-AM4/amip/r1i1p1f1/Amon/tas/gr1/files/d20180807/tas_Amon_GFDL-AM4_amip_r1i1p1f1_gr1_198001-201412.nc
Resolving dap.ceda.ac.uk (dap.ceda.ac.uk)... 130.246.128.117
Connecting to dap.ceda.ac.uk (dap.ceda.ac.uk)|130.246.128.117|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 87117584 (83M) [application/octet-stream]
Saving to: ‘tas_Amon_GFDL-AM4_amip_r1i1p1f1_gr1_198001-201412.nc’


2020-07-10 16:02:21 (7.57 MB/s) - ‘tas_Amon_GFDL-AM4_amip_r1i1p1f1_gr1_198001-201412.nc’ saved [87117584/87117584]



In [3]:
filename = 'tas_Amon_GFDL-AM4_amip_r1i1p1f1_gr1_198001-201412.nc'

# Keeping attributes and CF compliance.

## Viewing attributes
The example netcdf dataset is CF compliant. The [CF conventions](https://cfconventions.org/) are a set of metadata conventions for making files self-describing and easy to load.

In [16]:
ds = xr.open_dataset(filename, use_cftime=True)

ds

In order to be self-describing, the dataset has 'attributes'.

In [7]:
ds.attrs

{'title': 'NOAA GFDL GFDL-AM4 model output prepared for CMIP6 AMIP',
 'history': 'File was processed by fremetar (GFDL analog of CMOR). TripleID: [exper_id_Pw6DDkBKpw,realiz_id_sd6xUIzFNL,run_id_xt6y8xrBWd]',
 'table_id': 'Amon',
 'contact': 'gfdl.climate.model.info@noaa.gov',
 'comment': '<null ref>',
 'tracking_id': 'hdl:21.14100/3b95ceac-9bd6-42c9-a130-130fc1ba108c',
 'further_info_url': 'https://furtherinfo.es-doc.org/CMIP6.NOAA-GFDL.GFDL-AM4.amip.none.r1i1p1f1',
 'branch_time_in_child': 0.0,
 'branch_method': 'no parent',
 'creation_date': '2018-08-07T17:02:18Z',
 'Conventions': 'CF-1.7 CMIP-6.0 UGRID-1.0',
 'sub_experiment': 'none',
 'frequency': 'monC',
 'forcing_index': 1,
 'physics_index': 1,
 'initialization_index': 1,
 'realization_index': 1,
 'parent_variant_label': 'no parent',
 'parent_experiment_id': 'no parent',
 'data_specs_version': '01.00.27',
 'experiment_id': 'amip',
 'experiment': 'AMIP',
 'activity_id': 'CMIP',
 'source_id': 'GFDL-AM4',
 'source_type': 'AGCM',
 '

The individual variables also have their own attributes, making them self-describing:

In [8]:
ds.lon.attrs

{'long_name': 'Longitude',
 'units': 'degrees_east',
 'bounds': 'lon_bnds',
 'axis': 'X',
 'standard_name': 'longitude'}

In [9]:
ds.tas.attrs

{'long_name': 'Near-Surface Air Temperature',
 'units': 'K',
 'cell_methods': 'area: time: mean',
 'cell_measures': 'area: areacella',
 'standard_name': 'air_temperature',
 'interp_method': 'conserve_order2',
 'original_name': 'tas'}

## Losing attributes
By default, some xarray operations can cause attributes to be discarded from variables. This includes most statistical operations, and interpolation. This is a sensible default behaviour, as it prevents files from falsely claiming CF compliance.

In [29]:
new_lat = np.linspace(ds.lat.min(), ds.lat.max(), 100)
ds_new = ds.interp(lat=new_lat)

`ds_new` has it's attributes still:

In [30]:
ds_new.attrs == ds_new.attrs

True

But the axis on which we interpolated has lost its attributes:

In [31]:
ds_new.lat.attrs

{}

The other variables still have their attributes:

In [32]:
ds_new.lon.attrs == ds.lon.attrs, ds_new.tas.attrs == ds.tas.attrs

(True, True)

Reducing functions cause a dataset to lose all of its attributes.

In [33]:
ds_new = ds.mean()
ds_new.attrs

{}

## Keeping attributes
There is an option that will lead attributes to be kept in more cases.

They will be kept when reducing functions are used:

In [38]:
with xr.set_options(keep_attrs=True):
    print(ds.mean().attrs == ds.attrs)

True


But still not when the interp function is used:

In [37]:
with xr.set_options(keep_attrs=True):
    print(ds.interp(lat=new_lat).lat.attrs == ds.lat.attrs)

False
