# Pacific variability - PDO / IPO

original definition by _Mantua et al. (1997)_

> The leading EOF of monthly SST anomalies over the North Pacific (after removing the global mean SST anomaly) and its associated PC time series are termed the Pacific Decadal Oscillation (PDO)

---

0. create xr dataarrays of monthly Pacific data only  (from rect data for high res)
    1. North of 20 N
    2. North of Equator
    3. North of 38S

1. deseasonalize, detrend monthly SST data  (emphasis on consistency with other data analysis and not necessarily original definition)
    - HadISST:
        1. calculate monthly deviations (i.e. average difference) from annual mean, then remove this seasonal cycle
        2. two factor detrending with natural and anthropogenic forcing estimates at each grid point
    - CESM output:
        1. calculate monthly deviations (i.e. average difference) from annual mean, then remove this seasonal cycle
        2. remove quadratic trend at each grid point  (for different time segment)

2. EOF analysis of data

3. create annual index, lowpass filter index

4. analysis
    - spectra
    - regression patterns

In [None]:
import os
import sys
from tqdm import tqdm
import scipy as sp
import numpy as np
import pandas as pd
import xarray as xr
import cmocean
import cartopy
import cartopy.crs as ccrs
import matplotlib
import statsmodels.api as sm
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

In [None]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}
matplotlib.rc_file('../rc_file')
%load_ext autoreload
%autoreload 2
%aimport - numpy - scipy - matplotlib.pyplot

In [None]:
sys.path.append("..")
from paths import path_results, path_samoc, path_prace, file_HadISST
from filters import chebychev
from regions import boolean_mask, global_ocean, gl_ocean_rect, gl_ocean_low, mask_box_in_region
from timeseries import IterateOutputCESM
from xr_DataArrays import xr_AREA
from xr_regression import xr_quadtrend

In [None]:
from ab_derivation_SST import DeriveSST as DS
from bd_analysis_indices import AnalyzeIndex as AI

# 1. data preparation

### concatenate monthly SST fields into single file

In [None]:
# %%time
# lpd:  18 mins, years 154-566, 2.3 GB
# DS().generate_monthly_SST_files('ctrl')  # when all SST rect data available

### load monthly and yearly data

### yearly ocn rect SST

In [None]:
monthly_ctrl = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_ctrl.nc')
monthly_ctrl = monthly_ctrl.assign_coords(time=np.arange(1+1/24, 301, 1/12))
monthly_lpd  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_lpd.nc')  # proper datetime
monthly_had  = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_had.nc')

In [None]:
# %%time
# # 44.3 s
# t_bins = np.arange(0,len(monthly_ctrl)+1,12)
# yrly_ctrl = monthly_ctrl.groupby_bins('time', t_bins, right=False).mean(dim='time')
# yrly_ctrl = yrly_ctrl.assign_coords(time_bins=np.arange(1, 301)).rename({'time_bins':'time'})
# yrly_ctrl.to_netcdf(f'{path_prace}/SST/SST_yrly_rect_ctrl.nc')

In [None]:
yrly_ctrl = xr.open_dataarray(f'{path_prace}/SST_yrly_rect_ctrl.nc')
yrly_lpd  = xr.open_dataarray(f'{path_samoc}/SST/SST_yrly_lpd.nc')
yrly_had  = xr.open_dataarray(f'{path_samoc}/SST/SST_yrly_had.nc')

In [None]:
yrly_ctrl.time

# 2. deseasonalize and detrend

## deseasonalize

In [None]:
(monthly_had.isel(time=slice(0,-1,12)).assign_coords(time=yrly_had.time)-yrly_had).mean(dim='time').plot()
plt.title('avg January SST diff to yearly mean')

In [None]:
yrlys    = [yrly_ctrl, yrly_lpd, yrly_had]
monthlys = [monthly_ctrl, monthly_lpd, monthly_had]
runs     = ['ctrl', 'lpd', 'had']

In [None]:
# %%time
# # 1min 52s for ctrl
# for i in range(1):
#     print(runs[i])
#     monthly = monthlys[i]
#     yrly    = yrlys[i]
#     assert len(monthly)/len(yrly) == 12.0
#     temp = monthly.copy()
#     for j in tqdm(range(12)):
#         m = monthly.isel(time=slice(j,len(monthly)+1,12))
#         temp[j::12] -= (m-yrly.assign_coords(time=m.time)).mean(dim='time')
#     temp.to_netcdf(f'{path_prace}/SST/SST_monthly_deseasonalized_{runs[i]}.nc')

In [None]:
monthly_ds_ctrl = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_deseasonalized_ctrl.nc')
monthly_ds_lpd  = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_deseasonalized_lpd.nc' , decode_times=False)
monthly_ds_had  = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_deseasonalized_had.nc' , decode_times=False)

In [None]:
monthly_ds_ctrl[900:1100,100,100].plot()
monthly_ctrl[900:1100,100,100].plot()

## detrend

### ctrl/lpd: quadratic detrending

In [None]:
%%time
# 1min 26s for lpd
for i, da  in enumerate([monthly_ds_ctrl, monthly_ds_lpd]):
    print(i)
    if i==1: continue
    (da-xr_quadtrend(da)).to_netcdf(f'{path_prace}/SST/SST_monthly_ds_dt_{["ctrl","lpd"][i]}.nc')

In [None]:
monthly_ds_dt_ctrl = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_ds_dt_ctrl.nc')
monthly_ds_dt_lpd  = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_ds_dt_lpd.nc' )

### detrending for different time segments

### had: two-factor detrending with natural and anthropogenic forcing signal

In [None]:
MMM_natural = xr.open_dataarray(f'{path_samoc}/GMST/CMIP5_natural.nc', decode_times=False)
MMM_anthro  = xr.open_dataarray(f'{path_samoc}/GMST/CMIP5_anthro.nc' , decode_times=False)
monthly_MMM_natural = np.repeat(MMM_natural, 12)
monthly_MMM_anthro  = np.repeat(MMM_anthro , 12)
monthly_MMM_natural = monthly_MMM_natural.assign_coords(time=monthly_had.time)
monthly_MMM_anthro  = monthly_MMM_anthro .assign_coords(time=monthly_had.time)
monthly_MMM_natural.plot()
monthly_MMM_anthro .plot()

In [None]:
# %%time
# # 04:38
# forcings = monthly_MMM_natural.to_dataframe(name='natural').join(
#             monthly_MMM_anthro.to_dataframe(name='anthro'))

# SST_stacked = monthly_ds_had.stack(z=('latitude', 'longitude'))
# ds_anthro   = SST_stacked[0,:].squeeze().copy()
# ds_natural  = SST_stacked[0,:].squeeze().copy()

# # multiple linear regression
# X = sm.add_constant(forcings[['anthro', 'natural']])
# for i, coordinate in tqdm(enumerate(SST_stacked.z)):
#     y = SST_stacked[:, i].values
#     model = sm.OLS(y, X).fit()
#     ds_anthro[i] = model.params['anthro']
#     ds_natural[i] = model.params['natural']

# beta_anthro  = ds_anthro .unstack('z')
# beta_natural = ds_natural.unstack('z')

# ds = xr.merge([{'forcing_anthro': monthly_MMM_anthro}, {'beta_anthro': beta_anthro}])
# ds.to_netcdf(f'{path_prace}/SST/SST_beta_anthro_MMM_monthly_had.nc')

# ds = xr.merge([{'forcing_natural': monthly_MMM_natural}, {'beta_natural':beta_natural}])
# ds.to_netcdf(f'{path_prace}/SST/SST_beta_natural_MMM_monthly_had.nc')

In [None]:
f, ax = plt.subplots(1,2, figsize=(12,5))
beta_natural.plot(ax=ax[0])
beta_anthro.plot(ax=ax[1])
ax[0].set_title('natural')
ax[1].set_title('anthropogenic')

In [None]:
%%time
monthly_ds_dt_had = monthly_ds_had.assign_coords(time=monthly_MMM_anthro.time) \
                    - beta_anthro*monthly_MMM_anthro \
                    - beta_natural*monthly_MMM_natural
monthly_ds_dt_had.to_netcdf(f'{path_prace}/SST/SST_monthly_ds_tfdt_had.nc')

In [None]:
monthly_ds_dt_had = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_ds_tfdt_had.nc')

# 3. EOF analysis

## subselect Pacific data

In [None]:
def shift_had(da):
    """ shifts lons to [0,360] to make Pacific contiguous """
    return da.assign_coords(longitude=(da.longitude+360)%360).roll(longitude=180, roll_coords=True)

def focus_data(da):
    """ drops data outside rectangle around Pacific """
    if 't_lat' in da.coords:  # ctrl
        lat, lon = 't_lat', 't_lon'
    elif 'nlat' in da.coords:  # lpd
        lat, lon = 'nlat', 'nlon'
    elif 'latitude' in da.coords:  # had
        lat, lon = 'latitude', 'longitude'
    else:  raise ValueError('xr DataArray does not have the right lat/lon coords.')
    da = da.dropna(dim=lat, how='all')
    da = da.dropna(dim=lon, how='all')
    return da

In [None]:
shift_had(monthly_ds_dt_had[0,:,:]).plot()

### Pacific Masks

In [None]:
monthly_ds_dt = [monthly_ds_dt_ctrl, monthly_ds_dt_lpd, monthly_ds_dt_had]

In [None]:
%%time
# 4min 15s
f, ax = plt.subplots(3,3, figsize=(12,8), sharex='col')
for i, extent in enumerate(['38S', 'Eq', '20N']):
    if extent=='38S':     latS, lonE = -38, 300
    elif extent=='Eq':    latS, lonE =   0, 285
    elif extent=='20N':   latS, lonE =  20, 255
    for j, domain in enumerate(['ocn_rect', 'ocn_low', 'ocn_had']):
        run = ['ctrl', 'lpd', 'had'][j]
        da = monthly_ds_dt[j]
        AREA = xr_AREA(domain=domain)
        Pac_MASK = mask_box_in_region(domain=domain, mask_nr=2,
                                      bounding_lats=(latS,68),
                                      bounding_lons=(110,lonE))
        area = AREA.where(Pac_MASK)
        if j==2:  area = shift_had(area)
        area = focus_data(area)
        area.to_netcdf(f'{path_prace}/geometry/AREA_{extent}_{domain}.nc')
        
        Pac_MASK.plot(ax=ax[i,j])
        print(f'{domain:10}, {extent:10}, {AREA.where(Pac_MASK).sum().values:5.2e}')
        
        da = da.where(Pac_MASK)
        if j==2:  da = shift_had(da)
        da = focus_data(da)
        da.to_netcdf(f'{path_prace}/SST/SST_monthly_ds_dt_{extent}_{run}.nc')

## actual EOF analysis

In [None]:
%%time
# 4:45 for 38S_ctrl, 5:07 for 38S_lpd, 3:42 for 38S_had : total 11:08
# 2:50 for Eq_ctrl,  : total 11:08
# total: 22min 19s
for i, extent in tqdm(enumerate(['38S', 'Eq', '20N'])):
    for j, domain in tqdm(enumerate(['ocn_rect', 'ocn_low', 'ocn_had'])):
        run = ['ctrl', 'lpd', 'had'][j]
        da = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_ds_dt_{extent}_{run}.nc')
        AREA = xr.open_dataarray(f'{path_prace}/geometry/AREA_{extent}_{domain}.nc')
        fn = f'{path_prace}/SST/PMV_EOF_{extent}_{run}.nc'
        AI().EOF_SST_analysis(xa=da, weights=AREA, neofs=1, npcs=1, fn=fn)

In [None]:
# eof, pc = PMV_EOF_indices(run='lpd', extent='20N')

In [None]:
PMV_38S_ctrl = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_38S_ctrl.nc', decode_times=False)
PMV_38S_lpd  = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_38S_lpd.nc' , decode_times=False)
PMV_38S_had  = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_38S_had.nc' , decode_times=False)
PMV_Eq_ctrl  = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_Eq_ctrl.nc' , decode_times=False)
PMV_Eq_lpd   = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_Eq_lpd.nc'  , decode_times=False)
PMV_Eq_had   = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_Eq_had.nc'  , decode_times=False)
PMV_20N_ctrl = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_20N_ctrl.nc', decode_times=False)
PMV_20N_lpd  = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_20N_lpd.nc' , decode_times=False)
PMV_20N_had  = xr.open_dataset(f'{path_prace}/SST/PMV_EOF_20N_had.nc' , decode_times=False)

In [None]:
TPI_ctrl = xr.open_dataarray(f'{path_prace}/SST/TPI_ctrl.nc', decode_times=False)
TPI_lpd  = xr.open_dataarray(f'{path_prace}/SST/TPI_lpd.nc' , decode_times=False)
TPI_had  = xr.open_dataarray(f'{path_prace}/SST/TPI_had.nc' , decode_times=False)

In [None]:
TPI_had.time/365

In [None]:
path_prace

In [None]:
plt.plot(PMV_38S_ctrl.time[7:-7]    +100,  PMV_38S_ctrl.pcs[7:-7], c='C0', lw=2, ls='--', label='PC 38S')
plt.plot(PMV_38S_ctrl.time[7:-7]    +100,  chebychev(PMV_38S_ctrl.pcs, 13*12)[7:-7], c='C0', lw=2, ls='--', label='PC 38S')

In [None]:
plt.figure(figsize=(12,5))
plt.tick_params(labelsize=14)
plt.axhline(0, c='k', lw=.5)
L11, = plt.plot(PMV_38S_ctrl.time[7:-7]    +100,  chebychev(PMV_38S_ctrl.pcs, 13*12)[7:-7], c='C0', lw=2, ls='--', label='PC 38S')
L12, = plt.plot(PMV_38S_lpd .time[7:-7]/365+250,  chebychev(PMV_38S_lpd .pcs, 13*12)[7:-7], c='C1', lw=2, ls='--' )
L13, = plt.plot(PMV_38S_had .time[7:-7]/365    ,  chebychev(PMV_38S_had .pcs, 13*12)[7:-7], c='C2', lw=2, ls='--' )
L21, = plt.plot(PMV_Eq_ctrl .time[7:-7]    +100,  chebychev(PMV_Eq_ctrl .pcs, 13*12)[7:-7], c='C0', lw=2, ls=':' , label='PC Eq.')
L22, = plt.plot(PMV_Eq_lpd  .time[7:-7]/365+250,  chebychev(PMV_Eq_lpd  .pcs, 13*12)[7:-7], c='C1', lw=2, ls=':'  )
L23, = plt.plot(PMV_Eq_had  .time[7:-7]/365    ,  chebychev(PMV_Eq_had  .pcs, 13*12)[7:-7], c='C2', lw=2, ls=':'  )
L31, = plt.plot(PMV_20N_ctrl.time[7:-7]    +100,  chebychev(PMV_20N_ctrl.pcs, 13*12)[7:-7], c='C0', lw=2, ls='-.' , label='PC 20N')
L32, = plt.plot(PMV_20N_lpd .time[7:-7]/365+250, -chebychev(PMV_20N_lpd .pcs, 13*12)[7:-7], c='C1', lw=2, ls='-.'  )
L33, = plt.plot(PMV_20N_had .time[7:-7]/365    ,  chebychev(PMV_20N_had .pcs, 13*12)[7:-7], c='C2', lw=2, ls='-.'  )

L41, = plt.plot(TPI_ctrl.time[7:-7]/365+100, 5*chebychev(TPI_ctrl, 13)[7:-7]+1, c='C0', lw=2, ls='-', label='TPI')
L42, = plt.plot(TPI_lpd .time[7:-7]/365+250, 5*chebychev(TPI_lpd , 13)[7:-7]+1, c='C1', lw=2, ls='-')
L43, = plt.plot(TPI_had .time[7:-7]/365    , 5*chebychev(TPI_had , 13)[7:-7]+1, c='C2', lw=2, ls='-')

plt.xlabel('time [years]')
plt.ylabel('PDO/IPO/TPI indices')
plt.legend(handles=[L11, L21, L31, L41], ncol=4)
plt.tight_layout()
# plt.savefig(f'{path_results}/SST/SST_PMV_ctrl_rcp')

## correlation plots

In [None]:
# %%time
# SST_rect_ctrl = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_ctrl.nc', decode_times=False)
# SST_rect_rcp  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_rcp.nc' , decode_times=False)
# SST_rect_ds_dt_ctrl = lowpass(lowpass(notch(SST_rect_ctrl, 12), 12), 12) - SST_gm_rect_ds_ctrl[:-7]
# SST_rect_ds_dt_rcp  = lowpass(lowpass(notch(SST_rect_rcp , 12), 12), 12) - SST_gm_rect_ds_rcp[:-1]
# SST_rect_ds_dt_ctrl.to_netcdf(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_ctrl.nc')
# SST_rect_ds_dt_rcp .to_netcdf(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_rcp.nc' )

In [None]:
SST_rect_ds_dt_ctrl = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_ctrl.nc', decode_times=False)
SST_rect_ds_dt_rcp  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_rcp.nc' , decode_times=False)

In [None]:
%%time
# 2:25 min
# ds_20N_ctrl = lag_linregress_3D(Pac_20N_ctrl.pcs[:-7,0], SST_rect_ds_dt_ctrl[24:-(24+7)], dof_corr=1./(12*13))
ds_38S_ctrl = lag_linregress_3D(Pac_38S_ctrl.pcs[:-7,0], SST_rect_ds_dt_ctrl[24:-(24+7)], dof_corr=1./(12*13))
# ds_20N_rcp  = lag_linregress_3D(-Pac_20N_rcp.pcs[:-7,0], SST_rect_ds_dt_rcp [24:-(24+7)], dof_corr=1./(12*13))
ds_38S_rcp  = lag_linregress_3D(Pac_38S_rcp .pcs[:-7,0], SST_rect_ds_dt_rcp [24:-(24+7)], dof_corr=1./(12*13))


In [None]:
for ds in [ds_20N_ctrl, ds_38S_ctrl]:
    ds.attrs['first_year'] = 102
    ds.attrs['last_year']  = 297
for ds in [ds_20N_rcp, ds_38S_rcp]:
    ds.attrs['first_year'] = 2002
    ds.attrs['last_year']  = 2097

In [None]:
ds_20N_ctrl

In [None]:
regr_map(ds=ds_20N_ctrl, index='PDO', run='ctrl', fn=None)

In [None]:
regr_map(ds=ds_38S_ctrl, index='IPO', run='ctrl', fn=None)

In [None]:
regr_map(ds=ds_20N_rcp, index='PDO', run='rcp', fn=None)

In [None]:
regr_map(ds=ds_38S_rcp, index='IPO', run='rcp', fn=None)

In [None]:
cartopy.__version__