# GMST datasets, ensembles, detrending

In [None]:
import os
import sys
sys.path.append("..")
import numpy as np
import xarray as xr
import seaborn as sns
import cartopy
import cartopy.crs as ccrs
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import statsmodels.api as sm

In [None]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}
%load_ext autoreload
%autoreload 2

In [None]:
from maps import map_robinson, map_eq_earth
from GMST import GMST_timeseries, GMST_regression, atm_heat_content, GMST_GISTEMP
from paths import path_results, path_samoc, path_data
from plotting import shifted_color_map, discrete_cmap
from constants import abs_zero, cp_air
from timeseries import IterateOutputCESM
from xr_integrate import xr_surf_mean, xr_zonal_mean
from xr_DataArrays import xr_AREA

# Observations & ensemble results
- observations: HadCRUT4 & GISTEMP (`GISTEMP = GMST_GISTEMP()`)
- ensembles: MPI ESM GE, CMIP5

In [None]:
hadcrut = xr.open_dataarray(f'{path_data}/HadCRUT/ihad4_krig_v2_0-360E_-90-90N_n_mean1_anom_30.nc', decode_times=False)
gistemp = xr.open_dataarray(f'{path_data}/GISTEMP/igiss_temp_250_0-360E_-90-90N_n_mean1_anom_30.nc', decode_times=False)
kajtar  = xr.open_dataarray(f'{path_data}/CMIP5/Kajtaretal_gmst.mmm.historical+rcp85.nc', decode_times=False)
cmip5   = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_GMST_yrly.nc', decode_times=False)  # first year 1861
cmip5m  = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_GMST_monthly.nc', decode_times=False)  # int months since Jan 1861
cmip5a  = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_historicalNat_1861_2018.nc', decode_times=False)  # first year 1861
tpi1    = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_TPI1_yrly.nc', decode_times=False)
tpi2    = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_TPI2_yrly.nc', decode_times=False)
tpi3    = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_TPI3_yrly.nc', decode_times=False)
amo     = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_AMO_yrly.nc', decode_times=False)
som     = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_SOM_yrly.nc', decode_times=False)
sst     = xr.open_dataarray(f'{path_data}/CMIP5/KNMI_CMIP5_global_SST_yrly.nc', decode_times=False)
tpi     = tpi2 - (tpi1+tpi3)/2

In [None]:
cmip5b = cmip5a.copy()
cmip5b_m = cmip5b[-8:-3].mean()
cmip5b.values[-3:]  = cmip5b_m
cmip5c = xr.concat([cmip5b, xr.DataArray(6*[cmip5b_m],
                                         coords=[np.arange(152,158)],
                                         dims=['time'])],
                   dim='time')

cmip5_natural = cmip5c[9:]-cmip5c[9:].mean()
cmip5_anthro  = (cmip5[9:158]-cmip5c[9:]) - (cmip5[9:158]-cmip5c[9:]).mean()
cmip5_all     = cmip5[9:158]-cmip5[9:158].mean()

cmip5_natural.to_netcdf(f'{path_samoc}/GMST/CMIP5_natural.nc')
cmip5_anthro .to_netcdf(f'{path_samoc}/GMST/CMIP5_anthro.nc')
cmip5_all    .to_netcdf(f'{path_samoc}/GMST/CMIP5_all.nc')

plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.plot(np.arange(1870,2019), cmip5_anthro +.5, label=r'MMM$_{anthro.}$ = MMM$_{all}$ - MMM$_{natural}$')
plt.plot(np.arange(1870,2019), cmip5_natural, label=r'MMM$_{natural}$')
plt.plot(np.arange(1870,2019), cmip5_all    +1, label=r'MMM$_{all}$')
plt.legend(fontsize=14, frameon=False)
plt.xlabel('time [years C.E.]', fontsize=14)
plt.ylabel('MMM temperature anomaly [K]'  , fontsize=14)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/CMIP5_MMM', dpi=150)

In [None]:
cmip5m.time/12

In [None]:
hadcrut.to_netcdf(f'{path_samoc}/GMST/GMST_yrly_had.nc')

In [None]:
np.corrcoef(kajtar, cmip5[19:157])[0,1]

In [None]:
cmip5_1980_2010 = cmip5[119:149].mean()
cmip5_1950_1980 = cmip5[89:119].mean()

In [None]:
# MPI ESM GE
def add_member_dim(ds):
    ds = ds.squeeze()
    ds = ds.drop(['lat', 'lon', 'time_bnds'])
    member = ds.attrs['history'][-26:-23]
    ds['member'] = member
    ds = ds.expand_dims('member')
    ds = ds.set_coords('member', member)
    return ds

ds = xr.open_mfdataset(f'{path_data}/hedemannetal/rawdata/tsurf_gm_yy_1850_2015_lkm*.nc',
                       preprocess=add_member_dim,
                       concat_dim=None)

ds['tsurf_mean'] = ds.tsurf.mean(dim='member')
mpige = ds['tsurf_mean']-273.15
ds.to_netcdf(f'{path_samoc}/GMST/GMST_MPI_GE.nc')

In [None]:
plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.xlabel('time [year C.E.]', fontsize=16)
plt.ylabel('GMST [$^\circ$C]', fontsize=16)
for i in range(100):
    L1, =plt.plot(np.arange(1850,2016), ds.tsurf[i,:]-273.15,
             alpha=.2, lw=.5, c='C3', label='MPI ESM ensemble members')
L3, = plt.plot(np.arange(1850,2016), mpige, lw=2, label='MPI ESM GE mean')
L4, = plt.plot(np.arange(1850,2019), hadcrut+cmip5_1950_1980, label='HadCRUT')
L5, = plt.plot(np.arange(1880,2019), gistemp+cmip5_1950_1980, label='GISTEMP')
L2, = plt.plot(np.arange(1861,2019), cmip5[:158], lw=2, label='CMIP5 single member ensemble mean')
L6, = plt.plot(np.arange(1880,2018), kajtar, label='CMIP5 (Kajtar et al.)')
leg1 = plt.legend(handles=[L2, L6, L1, L3], fontsize=14, frameon=False, loc=2)
plt.legend(handles=[L4, L5], fontsize=14, frameon=False, loc=4)
plt.gca().add_artist(leg1)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/GMST_tseries_MPIGE_CMIP5_HadCRUT_GISTEMP')

In [None]:
plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.xlabel('time [year C.E.]', fontsize=16)
plt.ylabel('CMIP5 MMEM anomalies [K]', fontsize=16)
plt.axhline(0, c='k', lw=.5)
plt.plot(np.arange(1861,2019), cmip5[:158] - cmip5[119:149].mean(), lw=1.5, label='GMST')
plt.plot(np.arange(1861,2019), sst  [:158] - sst  [119:149].mean(), lw=1.5, label='global SST')
plt.plot(np.arange(1861,2019), amo  [:158] - amo  [119:149].mean(), lw=1.5, label='AMO')
plt.plot(np.arange(1861,2019), som  [:158] - som  [119:149].mean(), lw=1.5, label='SOM')
plt.plot(np.arange(1861,2019), tpi  [:158] - tpi  [119:149].mean(), lw=1, ls='--', label='TPI')
plt.legend(fontsize=14, frameon=False, ncol=3)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/CMIP5_MMEMs')

In [None]:
def corr(x,y):
    return np.corrcoef(x, y)[0,1]

plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.xlabel('CMIP5 MMEM GMST anomalies [K]', fontsize=16)
plt.ylabel('CMIP5 MMEM anomalies [K]', fontsize=16)
plt.xlim((-1,.7))
plt.ylim((-1.2,.8))
plt.plot([-1,.7], [- .7, 1], c='C0', lw=.5, label='1:1 line')
plt.plot([-1,.7], [-1  ,.7], c='C1', lw=.5, label='1:1 line')
plt.plot([-1,.7], [-1.3,.4], c='C2', lw=.5, label='1:1 line')
x = cmip5[:158] - cmip5[119:149].mean()
y = sst  [:158] - sst  [119:149].mean()
r = corr(x, y)
plt.scatter(x, y+.3, label=f'global SST (r={r:5.3f})')
y = amo  [:158] - amo  [119:149].mean()
r = corr(x, y)
plt.scatter(x, y   , label=f'AMO (r={r:5.3f})')
y = som  [:158] - som  [119:149].mean()
r = corr(x, y)
plt.scatter(x, y-.3, label=f'SOM (r={r:5.3f})')
plt.legend(fontsize=14, frameon=False, ncol=2)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/GMST_CMIP5_SST_AMO_SOM_corr')

In [None]:
plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.xlabel('CMIP5 ensemble mean anomaly [K]', fontsize=16)
plt.ylabel('MPI ESM GE mean anomaly [K]', fontsize=16)
plt.xlim((-1,.7))
plt.ylim((-1,.7))
plt.plot([-1,1], [-1,1], c='k', lw=.5, label='1:1 line')
mpige_1980_2010 = mpige.sel(time=slice('1980', '2010')).mean()
plt.scatter(cmip5[:155]-cmip5_1980_2010       , (mpige-mpige_1980_2010).sel(time=slice('1861', '2015')), alpha=.5, label='KNMI MMEM')
plt.scatter(kajtar[:-2]-kajtar[100:130].mean(), (mpige-mpige_1980_2010).sel(time=slice('1880', '2015')), alpha=.5, label='Kajtar et al. (2019)')

plt.legend(fontsize=14)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/GMST_MPIGE_CMIP5_corr')

In [None]:
had_mpige = hadcrut[:-3].values-mpige.values
had_cmip5 = hadcrut[11:].values-cmip5[:158].values
gis_mpige = gistemp[:-3].values-mpige[30:].values
gis_cmip5 = gistemp.values-cmip5[19:158].values

In [None]:
plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.xlabel('time [years C.E.]', fontsize=16)
plt.ylabel('internal variability [K]', fontsize=16)
plt.axhline(0, c='k', lw=.5)
plt.plot(np.arange(1850,2016), had_mpige-had_mpige.mean(), label='HadCRUT$-$MPIGE')
plt.plot(np.arange(1861,2019), had_cmip5-had_cmip5.mean(), label='HadCRUT$-$CMIP5' )
plt.plot(np.arange(1880,2016), gis_mpige-gis_mpige.mean(), label='GISTEMP$-$MPIGE')
plt.plot(np.arange(1880,2019), gis_cmip5-gis_cmip5.mean(), label='GISTEMP$-$CMIP5' )
plt.legend(fontsize=14, frameon=False, loc=1, ncol=2)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/GMST_obs-forcing')

In [None]:
def scaled_detrended_gmst(obs, mmem):
    assert len(obs)==len(mmem)

    X = mmem - np.mean(mmem)
    Y = obs - np.mean(obs)
    plt.plot(X)
    plt.plot(Y)
    model = sm.OLS(Y, X).fit()
#         print(index, '\n', model.summary())
    print(f'{len(obs)}, R^2: {model.rsquared:4.2e} params: {model.params}\n')
    a = model.params[0]
    return Y-a*X, a

In [None]:
had_mpige, a_hm = scaled_detrended_gmst(hadcrut[:-3].values, mpige.values        )
had_cmip5, a_hc = scaled_detrended_gmst(hadcrut[11:].values, cmip5[:158].values  )
gis_mpige, a_gm = scaled_detrended_gmst(gistemp[:-3].values, mpige[30:].values   )
gis_cmip5, a_gc = scaled_detrended_gmst(gistemp.values     , cmip5[19:158].values)

In [None]:
plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
plt.xlabel('time [years C.E.]', fontsize=16)
plt.ylabel('GMST $-$ GMST$_{forced}$ [K]', fontsize=16)
plt.axhline(0, c='k', lw=.5)
plt.plot(np.arange(1850,2016), had_mpige, label=f'HadCRUT-{a_hm:4.2f}*MPIGE')
plt.plot(np.arange(1861,2019), had_cmip5, label=f'HadCRUT-{a_hc:4.2f}*CMIP5' )
plt.plot(np.arange(1880,2016), gis_mpige, label=f'GISTEMP-{a_gm:4.2f}*MPIGE')
plt.plot(np.arange(1880,2019), gis_cmip5, label=f'GISTEMP-{a_gc:4.2f}*CMIP5' )
plt.ylim((-.3,.43))
plt.legend(fontsize=14, frameon=False, loc=9, ncol=2)
plt.tight_layout()
plt.savefig(f'{path_results}/GMST/GMST_obs-scaled_forcing')

### making the `GMST_yrly_had.nc` file

In [None]:
xr.DataArray(data=had_cmip5,
             coords={'time':np.arange(1861, 2019)},
             dims=['time']
            ).to_netcdf(f'{path_samoc}/GMST/GMST_dt_yrly_had.nc')

In [None]:
(gmst_ctrl-gmst_wt_ctrl.lin_fit ).to_netcdf(f'{path_samoc}/GMST/GMST_dt_yrly_ctrl.nc')
(gmst_rcp -gmst_wt_rcp .quad_fit).to_netcdf(f'{path_samoc}/GMST/GMST_dt_yrly_rcp.nc' )
(gmst_lpd -gmst_wt_lpd .lin_fit ).to_netcdf(f'{path_samoc}/GMST/GMST_dt_yrly_lpd.nc' )
(gmst_lpi -gmst_wt_lpi .lin_fit ).to_netcdf(f'{path_samoc}/GMST/GMST_dt_yrly_lpi.nc' )

In [None]:
plt.figure(figsize=(8,5))
plt.tick_params(labelsize=14)
# plt.xlabel('time [years C.E.]', fontsize=16)
plt.ylabel('internal variability [K]', fontsize=16)
plt.axhline(0, c='k', lw=.5)
bins = np.arange(-.45,.5,.05)
plt.axvline(0, c='k', lw=.5)
plt.hist(had_mpige-had_mpige.mean(), bins=bins, alpha=.2, label='HadCRUT$-$MPIGE')
plt.hist(had_cmip5-had_cmip5.mean(), bins=bins, alpha=.2, label='HadCRUT$-$CMIP5' )
plt.hist(gis_mpige-gis_mpige.mean(), bins=bins, alpha=.2, label='GISTEMP$-$MPIGE')
plt.hist(gis_cmip5-gis_cmip5.mean(), bins=bins, alpha=.2, label='GISTEMP$-$CMIP5' )
plt.legend(fontsize=14, frameon=False, loc=2, ncol=1)
plt.tight_layout()

## two-factor detrending