# CMIP5 Global mean temperatures

Create a NetCDF with global mean temperature from all CMIP5 models (Taylor et al., 2012). We make use of Jan Sedláček's post-processing of CMIP5 data (https://data.iac.ethz.ch/atmos/). The data is stored at IAC at: /net/atmos/data/cmip5-ng/.

The table with all used models is at the end of the [notebook](#Models).


In [None]:
from glob import glob
from os import path

import numpy as np
import pandas as pd
import xarray as xr

In [None]:
def cmip5_filename(var, time, model, scen, ens, res="g025"):
    """
    list cmip5 filenames according to criteria

    Parameters
    ----------
    var : string
        Variable name.
    time : string
        Time resolution, e.g. 'ann', 'seas'.
    model : string
        Models to look for, e.g. '*', 'NorESM1'
    scen : string
        Scenario, e.g. 'rcp85', ...
    ens : string
        Which ensemble members, e.g. '*', 'r1i1p?', 'r1i1p1'
    res : string
        Resolution, 'native' or 'g025'. Optional, default: 'g025'.

    ..note::

    All arguments can take wildcards.

    """

    folder_root = "/net/atmos/data/cmip5-ng/"
    folder = path.join(folder_root, var)

    fN = "_".join([var, time, model, scen, ens, res])
    fN = path.join(folder, fN + ".nc")

    fN = sorted(glob(fN))

    if not fN:
        raise RuntimeError("No simulations found")

    return fN

In [None]:
# obtain a list of cmip5 output, annual mean, surface air temperature
# use only the first ensemble member

fNs = cmip5_filename("tas", "ann", "*", "rcp85", "r1i1p1", res="g025")

print(len(fNs))

for fN in fNs[:4]:
    print(fN)

print("...")

In [None]:
# cosine weighting of the latitude


def cos_wgt(lat):
    """cosine-weighted latitude"""
    return np.cos(np.deg2rad(lat))

In [None]:
# Unfortunately, xarray does not have a weighted average function as of yet.
# see https://github.com/pydata/xarray/issues/422


def _average_da(self, dim=None, axis=None, weights=None, skipna=None, keep_attrs=False):
    """
    weighted average for DataArrays

    Parameters
    ----------
    dim : str or sequence of str, optional
        Dimension(s) over which to apply `average`.
    axis : int or sequence of int, optional
            Axis(es) over which to apply `average`. Only one of the 'dim'
            and 'axis' arguments can be supplied. If neither are supplied, then
            `average` is calculated over axes.
    weights : DataArray
        weights to apply. Shape must be broadcastable to shape of self.
    keep_attrs : bool, optional
        If True, the attributes (`attrs`) will be copied from the original
        object to the new one.  If False (default), the new object will be
        returned without attributes.

    Returns
    -------
    reduced : DataArray
        New DataArray with average applied to its data and the indicated
        dimension(s) removed.

    """

    if weights is None:
        return self.mean(dim=dim, axis=axis, keep_attrs=keep_attrs)

    if not isinstance(weights, xr.DataArray):
        raise TypeError("weights must be a DataArray")

    # check that weights.dims are in DataArray
    invalid = set([d for d in weights.dims if d not in self.dims])
    if invalid:
        raise ValueError("Invalid dims in weights: %s" % " ".join(invalid))

    valid = self.notnull()
    sum_of_weights = weights.where(valid).sum(dim=dim, axis=axis)  # ,
    # skipna=skipna)

    if (sum_of_weights == 0).any():
        msg = "Weights sum to zero, returns 0."
        # warnings.warn(msg, RuntimeWarning)
        print(msg)
        print(skipna)

    w = weights / sum_of_weights

    return (self * w).sum(dim, axis=axis, skipna=skipna, keep_attrs=keep_attrs)


def _average_ds(self, dim=None, axis=None, weights=None, keep_attrs=False):
    """
    weighted average for Datasets

    ... omitted ..

    """

    if weights is None:
        return self.mean(dim=dim, axis=axis, keep_attrs=keep_attrs)
    else:
        return self.apply(_average_da, dim=dim, axis=axis, weights=weights)


# monkey patch
xr.DataArray.average = _average_da
xr.Dataset.average = _average_ds

In [None]:
def get_tas_ann(fNs):
    """get global annual mean temperature"""

    # open first dataset to get lat coordinates
    ds = xr.open_dataset(fNs[0])
    wgt = cos_wgt(ds.lat)

    ds_all = []
    for fN in fNs:
        ds = xr.open_dataset(fN)

        # replace the datetime64[ns] object with integers (2000, 2001, etc.)
        ds = ds.groupby("year.year", squeeze=False).mean("year")

        ds = ds.average(("lat", "lon"), weights=wgt)
        ds_all.append(ds)

    ds = xr.concat(ds_all, "ens")

    ens = np.arange(len(fNs))
    ds["ens"] = ("ens", ens)

    # ds = ds.rename(dict(year='time'))

    # year = ds['time.year']
    # ds = ds.assign(year=('time', year.values))

    # get the model name
    model = []
    for fN in fNs:
        basename = path.basename(fN)
        model.append(basename.split("_")[2])

    # model = pd.Series(model, dtype="category")
    ds = ds.assign_coords(model=("ens", model)).tas

    return ds

In [None]:
# get the CO2 data from https://climexp.knmi.nl

co2_data_location = dict(
    rcp26="https://climexp.knmi.nl/data/iRCP3PD_CO2.dat",
    rcp45="https://climexp.knmi.nl/data/iRCP45_CO2.dat",
    rcp60="https://climexp.knmi.nl/data/iRCP6_CO2.dat",
    rcp85="https://climexp.knmi.nl/data/iRCP85_CO2.dat",
)


def get_co2(scen):
    # load the co2 data directly from the climate explorer
    fN = co2_data_location[scen]

    d = pd.read_csv(fN, header=None, skiprows=4, index_col=0, sep=" ")
    d.columns = ["co2"]
    d.index.name = "year"

    d = d.to_xarray()

    d = d.co2

    d.attrs = dict(
        data="CMIP5 CO2 concentrations",
        units="ppm",
        source=fN,
        reference="M. Meinshausen, S. Smith et al.",
    )
    return d

In [None]:
def get_tas_ann_scen(scen):
    print(scen)

    # get filenames
    fNs = cmip5_filename("tas", "ann", "*", scen, "r1i1p1", res="g025")

    # get time series
    ds = get_tas_ann(fNs)

    # calculate anomaly
    ds_anom = ds - ds.sel(year=slice(1861, 1880)).mean("year")

    ds = ds.to_dataset(name="tas")

    # add anomaly time series
    ds = ds.assign(tas_anom=ds_anom)

    # add global attributes
    ds.attrs = dict(
        data="CMIP5 surface air temperaure (tas) global annual mean",
        source="https://data.iac.ethz.ch/atmos/",
        reference="Taylor et al., 2012",
        scenario=scen,
    )

    # add attributes for the variables
    ds.tas.attrs = dict(units="K")
    ds.tas_anom.attrs = dict(units="K", postprocessing="anomalies wrt. 1861..1880")

    # get the co2 data
    co2 = get_co2(scen)

    # maxe sure both datasets span the same years
    ds, co2 = xr.align(ds, co2, join="inner")

    # add the co2 data
    ds = ds.assign(co2=co2)

    # get the time axis
    year = str(ds.year[0].values)
    periods = len(ds.year)
    time = pd.date_range(year, periods=periods, freq="A-JUN")

    # add it
    ds = ds.assign_coords(time=("year", time))

    # make time the main coord
    ds = ds.swap_dims(dict(year="time"))

    ds.to_netcdf(f"cmip5_tas_{scen}_ts.nc", format="NETCDF4_CLASSIC")

    return ds

In [None]:
rcp85 = get_tas_ann_scen("rcp85")

In [None]:
rcp85

In [None]:
# rcp85.swap_dims(dict(time='year'))

In [None]:
rcp85.assign(abc=("xyz", rcp85.time))

In [None]:
rcp85.tas.plot.pcolormesh()
# looks good

In [None]:
# plot ts for all models

[rcp85.tas.sel(ens=ens).plot.line("0.5") for ens in rcp85.ens];

In [None]:
# plot again

[rcp85.tas_anom.sel(ens=ens).plot.line("0.5") for ens in rcp85.ens];

In [None]:
rcp85.co2.plot()

In [None]:
rcp26 = get_tas_ann_scen("rcp26")
rcp45 = get_tas_ann_scen("rcp45")
rcp60 = get_tas_ann_scen("rcp60")

## Models

Print all the used models:

In [None]:
m_rcp26 = set(rcp26.model.values)
m_rcp45 = set(rcp45.model.values)
m_rcp60 = set(rcp60.model.values)
m_rcp85 = set(rcp85.model.values)

In [None]:
all_models = frozenset().union(m_rcp26, m_rcp45, m_rcp60, m_rcp85)

In [None]:
for model in all_models:
    print(model)