# HadISST3
https://climatedataguide.ucar.edu/climate-data/sst-data-hadsst3

In [None]:
import os
import sys
sys.path.append("..")
import scipy as sp
import numpy as np
import xarray as xr
import seaborn as sns
import cmocean
import cartopy
import cartopy.crs as ccrs
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

In [None]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}
%load_ext autoreload
%autoreload 2
%aimport - numpy - scipy - matplotlib.pyplot

In [None]:
from maps import make_map
from paths import file_HadISST, path_results, file_RMASK_ocn_had, path_samoc
from plotting import discrete_cmap, shifted_color_map
from timeseries import deseasonalize
from xr_regression import xr_linear_trends_2D
from xr_DataArrays import xr_AREA

In [None]:
ds = xr.open_dataset(file_HadISST, decode_times=False)  # time in days since 1.1.1870

In [None]:
ds = ds.where(ds['sst'] != -1000.)

In [None]:
ds

In [None]:
ds.sst

## How much data is available

In [None]:
label = 'fraction of data available'
text1 = 'HadISST'
text2 = '1870-2018'
cmap = plt.get_cmap('viridis', 10)
fn = f'{path_results}/SST/SST_HadISST_data_fraction'
make_map(xa=ds.sst.count(dim='time')/len(ds.time), domain='ocn_had', proj='rob', cmap=cmap, minv=0.5, maxv=1,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

In [None]:
ds.sst.count(dim='time').max()

In [None]:
len(ds.time)

In [None]:
ds.sst.count(dim='time').plot()

In [None]:
ds.sst.count(dim='time').where(ds.sst.count(dim='time')!=len(ds.time)).sel({'latitude':slice(60,-60)}).plot(vmin=1500)

In [None]:
# generating monthly SST field without missing values
# replacing missing values with -1.8, as the missing values occur in polar latitudes during winter.
sst_had = ds.sst.where(np.isnan(ds.sst)==False, -1.8)
sst_had.to_netcdf(f'{path_samoc}/SST/SST_monthly_had.nc')

In [None]:
test = ds.sst.sel({'latitude':80, 'longitude':0}, method='nearest')
print(test[5].values)
print(np.dtype(test[5]))

print(np.isnan(test).any().values)
print(test.mean('time').values)

print(test.where(test!=np.nan).count('time').values)    # 1641
print(test.where(np.isnan(test)).count('time').values)  #0

print(test.count(dim='time').values)
print(test.time.count(dim='time').values)

test[:30].plot()
test = test.where(np.isnan(test)==False, -1.8)
print(np.isnan(test).any().values)
(test[:30]+1).plot()
print(test.where(test==np.nan).count('time').values)

## yearly means

In [None]:
ds2 = xr.open_dataset(file_HadISST)
ds2 = ds2.where(ds2['sst'] != -1000.)
ds2 = ds2.sst.where(np.isnan(ds2.sst)==False, -1.8)
ds2 = ds2.groupby('time.year').mean('time')
ds2 = ds2.rename({'year':'time'})
ds2.coords['time'] = (ds2.coords['time']-1870)*365

In [None]:
ds2.to_netcdf(f'{path_samoc}/SST/SST_yrly_had.nc')

In [None]:
ds2[0,:,:].plot()

## MASK

In [None]:
MASK_np = np.where(ds.sst.count(dim='time')!=0, 1, 0)
MASK = ds.sst[0,:,:].drop(['time']).copy()
MASK.values = MASK_np

In [None]:
MASK

In [None]:
MASK.plot()

In [None]:
xr.open_dataarray(f'{file_RMASK_ocn_had}').plot(vmin=0)

use with caution: there are some interpolation errors

## AREA

In [None]:
xr_AREA('ocn_had').plot()

## Mean and standard deviation 

In [None]:
sst_1980_2010 = ds.sst.sel(time=slice(110*365.25, 140*365.25)).where(MASK)

In [None]:
label = 'temperature [$^\circ$C]'
text1 = 'HadISST\nmean'
text2 = '1980-2010'
cmap = discrete_cmap(base_cmap=cmocean.cm.thermal, N=17)
fn = f'{path_results}/SST/SST_HadISST_mean_1980_2010'
make_map(xa=sst_1980_2010.mean(dim='time'),
         domain='ocn_had', proj='rob', cmap=cmap, minv=-2, maxv=32,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

In [None]:
label = 'std(temperature) [K]'
text1 = 'HadISST\nmonthly\nstd'
text2 = '1980-2010'
cmap = discrete_cmap(base_cmap=cmocean.cm.haline, N=12)
fn = f'{path_results}/SST/SST_HadISST_std_1980_2010'
make_map(xa=sst_1980_2010.std(dim='time'),
         domain='ocn_had', proj='rob', cmap=cmap, minv=0, maxv=6,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

In [None]:
label = 'std(temperature) [K]'
text1 = 'HadISST\ndeseas.\nstd'
text2 = '1980-2010'
cmap = discrete_cmap(base_cmap=cmocean.cm.haline, N=12)
fn = f'{path_results}/SST/SST_HadISST_ds_std_1980_2010'
make_map(xa=deseasonalize(sst_1980_2010).std(dim='time'),
         domain='ocn_had', proj='rob', cmap=cmap, minv=0, maxv=1.2,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

## 60$^\circ$S-60$^\circ$N mean timeseries

In [None]:
AREA = xr_AREA('ocn_had')

In [None]:
x_AREA = AREA.sel(latitude=slice(60, -60)).where(MASK)

In [None]:
x_area_sum = x_AREA.sum()

In [None]:
x_area_sum

In [None]:
x_AREA.plot()

In [None]:
SST_xm = ((sst_had.sel(latitude=slice(60, -60))*x_AREA).sum(dim=['latitude', 'longitude']))/x_area_sum

In [None]:
SST_xm.plot()
deseasonalize(SST_xm).plot()

In [None]:
SST_xm.to_netcdf(f'{path_samoc}/SST/SST_60S_60N_mean_monthly_had.nc')

## Trends

In [None]:
(ds.sst[-12:,:,:].mean(dim='time')-ds.sst[:12,:,:].mean(dim='time')).plot(vmin=-2, vmax=2, cmap='RdBu_r')

In [None]:
def xr_linear_trends_2D1(da, dim_names, with_nans=False):
    """ calculate linear trend of 2D field in time
    
    input:
    da        .. 3D xr DataArray with (dim_names) dimensions
    dim_names .. tuple of 2 strings: e.g. lat, lon dimension names
    
    output:
    da_trend  .. slope of linear regression
    """
    
    def xr_linear_trend_with_nans(x):
        """ function to compute a linear trend coeficient of a timeseries """
        if np.isnan(x).any():
            x = x.dropna(dim='time')
            if x.size>1:
                pf = np.polynomial.polynomial.polyfit(x.time, x, 1)
            else:
                pf = np.array([np.nan, np.nan])
        else:
            pf = np.polynomial.polynomial.polyfit(x.time, x, 1)
        return xr.DataArray(pf[1])
    
    (dim1, dim2) = dim_names
    # stack lat and lon into a single dimension called allpoints
    stacked = da.stack(allpoints=[dim1, dim2])
    # apply the function over allpoints to calculate the trend at each point
    if with_nans==False:
        trend = stacked.groupby('allpoints').apply(xr_linear_trend)
        # unstack back to lat lon coordinates
        da_trend = trend.unstack('allpoints')
    if with_nans==True:
        trend = stacked.groupby('allpoints').apply(xr_linear_trend_with_nans)
        # unstack back to lat lon coordinates
        da_trend = trend.unstack('allpoints')
    #da_trend = da_trend.rename({'allpoints_level_0':dim1, 'allpoints_level_1':dim2})
    return da_trend

In [None]:
ds_trend_full1 = xr_linear_trends_2D1(da=ds.sst.where(MASK), 
                                    dim_names=('latitude', 'longitude'), with_nans=True)

In [None]:
%%time
ds_trend_full = xr_linear_trends_2D(da=ds.sst.where(MASK), 
                                    dim_names=('latitude', 'longitude'), with_nans=True)

In [None]:
ds_trend_full1

In [None]:
%%time
ds_trend_1980_2010 = xr_linear_trends_2D(da=ds.sst.sel(time=slice(110*365.25, 140*365.25)).where(MASK),
                                         dim_names=('latitude', 'longitude'), with_nans=True)

In [None]:
%%time
ds_trend_1968_2018 = xr_linear_trends_2D(da=ds.sst.sel(time=slice(98*365.25, 148*365.25)).where(MASK),
                                         dim_names=('latitude', 'longitude'), with_nans=True)

In [None]:
ds_trend_full      = ds_trend_full     .squeeze()
ds_trend_1980_2010 = ds_trend_1980_2010.squeeze()
ds_trend_1968_2018 = ds_trend_1968_2018.squeeze()

In [None]:
label = 'SST trend [K/centrury]'
text1 = 'HadISST'
text2 = '1870-2018'
cmap = discrete_cmap(base_cmap=shifted_color_map(start=.33, stop=1, midpoint=.5, 
                                                 cmap=cmocean.cm.balance), N=16)
fn = f'{path_results}/SST/SST_HadISST_trend_1870_2018'
make_map(xa=ds_trend_full*365.25*100, domain='ocn_had', proj='rob', cmap=cmap, minv=-.5, maxv=1.5,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

In [None]:
label = 'SST trend [K/centrury]'
text1 = 'HadISST'
text2 = '1980-2010'
cmap = discrete_cmap(base_cmap=shifted_color_map(start=.3, stop=1, midpoint=.5, 
                                                 cmap=cmocean.cm.balance), N=14)
fn = f'{path_results}/SST/SST_HadISST_trend_1980_2010'
make_map(xa=ds_trend_1980_2010*365.25*100, domain='ocn_had', proj='rob', cmap=cmap, minv=-2, maxv=5,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

In [None]:
label = 'SST trend [K/centrury]'
text1 = 'HadISST'
text2 = '1968-2018'
cmap = discrete_cmap(base_cmap=shifted_color_map(start=.3, stop=1, midpoint=.5, 
                                                 cmap=cmocean.cm.balance), N=14)
fn = f'{path_results}/SST/SST_HadISST_trend_1968_2018'
make_map(xa=ds_trend_1968_2018*365.25*100, domain='ocn_had', proj='rob', cmap=cmap, minv=-2, maxv=5,
         label=label, filename=fn, text1=text1, text2=text2, rects=[], sig=None, clon=200)

In [None]:
from xr_regression import xr_lintrend

In [None]:
from timeseries import lowpass

In [None]:
plt.plot(ds.time/365+1870,         ds.sst.sel({'latitude':0, 'longitude':-120}, method='nearest')      )
plt.plot(ds.time/365+1870, lowpass(ds.sst.sel({'latitude':0, 'longitude':-120}, method='nearest'), 120))

# Removing the forced signal

In [None]:
forcing_natural = xr.open_dataarray(f'{path_samoc}/GMST/CMIP5_natural.nc', decode_times=False)
forcing_anthro  = xr.open_dataarray(f'{path_samoc}/GMST/CMIP5_anthro.nc' , decode_times=False)
forcing_all     = xr.open_dataarray(f'{path_samoc}/GMST/CMIP5_all.nc'    , decode_times=False)

In [None]:
for forcing in [forcing_natural, forcing_anthro, forcing_all]:
    print(len(forcing))
    forcing.coords['time'] = (forcing.time-9)*365

In [None]:
import statsmodels.api as sm

## detrending with one scaled signal (all forcings)

In [None]:
ds2[:, 100, 10].plot()
forcing_all[:].plot()

In [None]:
X = sm.add_constant(forcing_all[:].values)
y = ds2[:, 100, 10].values
model = sm.OLS(y, X).fit()
print(f'all forcing    R^2: {model.rsquared:4.2e} \n params:\n{model.params}\n')

In [None]:
model.summary()

## detrending with two scaled signals (anthropogenic + natural)

In [None]:
import pandas as pd

In [None]:
forcings = forcing_natural.to_dataframe(name='natural').join(
                     [forcing_anthro.to_dataframe( name='anthro'),
                      forcing_all.to_dataframe(name='all')])

In [None]:
forcings.plot()

In [None]:
X = sm.add_constant(forcings[['all']])
y = ds2[:, 100, 10].values
model = sm.OLS(y, X).fit()
print(f'all forcing    R^2: {model.rsquared:4.2e} \n params:\n{model.params}\n')

In [None]:
X = sm.add_constant(forcings[['anthro', 'natural']])
y = ds2[:, 100, 10].values
model = sm.OLS(y, X).fit()
print(f'all forcing    R^2: {model.rsquared:4.2e} \n params:\n{model.params}\n')

In [None]:
model.params['anthro']

In [None]:
model.summary()

In [None]:
ds3 = ds2.stack(z=('latitude', 'longitude'))
ds_anthro = ds3[0,:].squeeze().copy()
ds_natural = ds3[0,:].squeeze().copy()

In [None]:
%%time
X = sm.add_constant(forcings[['anthro', 'natural']])
for i, coordinate in enumerate(ds3.z):
    y = ds3[:, i].values
    model = sm.OLS(y, X).fit()
    ds_anthro[i] = model.params['anthro']
    ds_natural[i] = model.params['natural']

In [None]:
ds_anthro.unstack('z').plot()

In [None]:
ds_natural.unstack('z').plot()

In [None]:
ds_natural.unstack('z').mean(dim='longitude').plot()

in the Pacific there is negative values for the natural forcing coefficient, implying warming for negative forcings ...

In [None]:
ts = ds2.sel({'longitude':-140, 'latitude':0}, method='nearest')
ts -= ts.mean()
ts.plot()
forcing_natural.plot()

In [None]:
np.corrcoef(forcing_natural, ts)

Map of the difference between then two methods as Rˆ2.

In [None]:
das = xr.open_dataarray(f'{path_samoc}/SST/SST_GMST_sfdt_yrly_had.nc')
dat = xr.open_dataarray(f'{path_samoc}/SST/SST_GMST_tfdt_yrly_had.nc')

In [None]:
((das-dat)**2).sum(dim='time').plot(vmax=5)