# Pacific variability - PDO / IPO

original definition by _Mantua et al. (1997)_

> The leading EOF of monthly SST anomalies over the North Pacific (after removing the global mean SST anomaly) and its associated PC time series are termed the Pacific Decadal Oscillation (PDO)

---

0. create xr dataarrays of monthly Pacific data only  (from rect data for high res)
    1. North of 20 N
    2. North of Equator
    3. North of 38S

1. deseasonalize, detrend monthly SST data  (emphasis on consistency with other data analysis and not necessarily original definition)
    - HadISST:
        1. calculate monthly deviations (i.e. average difference) from annual mean, then remove this seasonal cycle
        2. two factor detrending with natural and anthropogenic forcing estimates at each grid point
    - CESM output:
        1. calculate monthly deviations (i.e. average difference) from annual mean, then remove this seasonal cycle
        2. remove quadratic trend at each grid point  (for different time segment

2. EOF analysis of data

3. create annual index, lowpass filter index

4. analysis
    - spectra
    - regression patterns

In [None]:
import os
import sys
import tqdm
import scipy as sp
import numpy as np
import xarray as xr
import cmocean
import cartopy
import cartopy.crs as ccrs
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches

In [None]:
%matplotlib inline
%config InlineBackend.print_figure_kwargs={'bbox_inches':None}
matplotlib.rc_file('../rc_file')
%load_ext autoreload
%autoreload 2
%aimport - numpy - scipy - matplotlib.pyplot

In [None]:
sys.path.append("..")
# from SST import SST_index, EOF_SST_analysis, PMV_EOF_indices
# from maps import map_robinson, map_eq_earth, rect_polygon, regr_map
# from grid import find_array_idx
from paths import path_results, path_samoc, path_prace, file_HadISST
from regions import boolean_mask, global_ocean, gl_ocean_rect, gl_ocean_low, mask_box_in_region
# from plotting import shifted_color_map, discrete_cmap
from timeseries import IterateOutputCESM #, lowpass, chebychev, notch, deseasonalize
from xr_DataArrays import xr_AREA
# from xr_regression import xr_lintrend, xr_linear_trends_2D, xr_linear_trend, ocn_field_regression, lag_linregress_3D, xr_quadtrend

In [None]:
from ab_derivation_SST import DeriveSST as DS

# data preparation

### yearly ocn rect SST

In [None]:
# %%time
# t_bins = np.arange(0,len(ctrl_monthly)+1,12)
# ctrl_yrly = ctrl_monthly.groupby_bins('time', t_bins, right=False).mean(dim='time')
# ctrl_yrly = ctrl_yrly.assign_coords(time_bins=np.arange(100, 300)).rename({'time_bins':'time'})
# ctrl_yrly.to_netcdf(f'{path_results}/SST_yrly_rect_ctrl.nc')

### concatenate monthly SSt fields into single file

In [None]:
# %%time
# lpd:  18 mins, years 154-566, 2.3 GB
# DS().generate_monthly_SST_files('ctrl')  # when all SST rect data available

In [None]:
ctrl_monthly = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_ctrl.nc')  # 100_01-301_07
ctrl_monthly = ctrl_monthly.isel(time=slice(1,200*12+1))                   # 100_02-301_01
lpd_monthly  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_lpd.nc')
had_monthly  = xr.open_dataarray(f'{path_prace}/SST/SST_monthly_had.nc')

In [None]:
ctrl_yrly = xr.open_dataarray(f'{path_results}/SST_yrly_rect_ctrl.nc')
lpd_yrly  = xr.open_dataarray(f'{path_samoc}/SST/SST_yrly_lpd.nc')
had_yrly  = xr.open_dataarray(f'{path_samoc}/SST/SST_yrly_had.nc')

In [None]:
ctrl_yrly

In [None]:
yrlys    = [ctrl_yrly, lpd_yrly, had_yrly]
monthlys = [ctrl_monthly, lpd_monthly, had_monthly]
runs     = ['ctrl', 'lpd', 'had']

### deseasonalize

In [None]:
(had_monthly.isel(time=slice(0,-1,12)).assign_coords(time=had_yrly.time)-had_yrly).mean(dim='time').plot()
plt.title('avg January SST diff to yearly mean')

In [None]:
path_prace

In [None]:
%%time
for i in range(3):
    print(runs[i])
    monthly = monthlys[i]
    yrly    = yrlys[i]
    assert len(monthly)/len(yrly) == 12.0
    temp = monthly.copy()
    for j in tqdm(range(12)):
        m = monthly.isel(time=slice(j,len(monthly)+1,12))
        temp[j::12] -= (m-yrly.assign_coords(time=m.time)).mean(dim='time')
    temp.to_netcdf(f'{path_prace}/SST/SST_monthly_deseasonalized_{runs[i]}.nc')

## Pacific Masks

In [None]:
f, ax = plt.subplots(3,3, figsize=(12,8), sharex='col')
for i, extent in enumerate(['38S', 'Eq', '20N']):
#     print(extent)
    if extent=='38S':
        latS, lonE = -38, 300
    elif extent=='Eq':
        latS, lonE = 0, 285
    elif extent=='20N':
        latS, lonE = 20, 255
    for j, domain in enumerate(['ocn_rect', 'ocn_low', 'ocn_had']):
        AREA = xr_AREA(domain=domain)
        Pac_MASK = mask_box_in_region(domain=domain, mask_nr=2, bounding_lats=(latS,68), bounding_lons=(110,lonE))
        Pac_MASK.plot(ax=ax[i,j])
#         Pac_area = AREA.where(Pac_MASK).sum()
#         print(Pac_area.values)
# plt.tight_layout()

# 2. deseasonalize and detrend

### 1. global mean SST temperature series

- time series generated with `SST_data_generation.py`
- HadISST 60S-60N time series is generated in `SST_obs.ipynb`

### 2.1. creating file with monthly SST fields in North Pacific

In [None]:
SST_Pac_38S_ctrl = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_Pac_38S_rect_ctrl.nc', decode_times=False)
SST_Pac_Eq_ctrl  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_Pac_Eq_rect_ctrl.nc' , decode_times=False)
SST_Pac_20N_ctrl = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_Pac_20N_rect_ctrl.nc', decode_times=False)

### 2.2.1. deseasonalize monthly SST field

In [None]:
%%time
# 6 min
SST_Pac_38S_ds_ctrl = deseasonalize(SST_Pac_38S_ctrl)
SST_Pac_Eq_ds_ctrl  = deseasonalize(SST_Pac_Eq_ctrl )
SST_Pac_20N_ds_ctrl = deseasonalize(SST_Pac_20N_ctrl)

In [None]:
SST_Pac_Eq_ctrl[:120,100,100].plot()
SST_Pac_Eq_ds_ctrl[:120,100,100].plot()

the initial bias due to the arbitrary beginning month is the reason we cut off two years of the data on either side

### 3. EOF analysis

In [None]:
# eof, pc = PMV_EOF_indices(run='lpd', extent='20N')

In [None]:
Pac_38S_ctrl = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_38S_ctrl.nc', decode_times=False)
Pac_38S_rcp  = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_38S_rcp.nc' , decode_times=False)
Pac_38S_had  = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_38S_had.nc' , decode_times=False)

Pac_Eq_ctrl  = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_Eq_ctrl.nc' , decode_times=False)
Pac_Eq_rcp   = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_Eq_rcp.nc'  , decode_times=False)
Pac_Eq_had   = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_Eq_had.nc'  , decode_times=False)

Pac_20N_ctrl = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_20N_ctrl.nc', decode_times=False)
Pac_20N_rcp  = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_20N_rcp.nc' , decode_times=False)
Pac_20N_had  = xr.open_dataset(f'{path_results}/SST/SST_PDO_EOF_Pac_20N_had.nc' , decode_times=False)


In [None]:
%%time
# 12 mins for all
# cut off 2 year on either end due to problems with the erroneous trend introduced by the filtering
for j, run in enumerate(['ctrl', 'rcp', 'had']):
    object_list = [[SST_Pac_38S_ds_dt_dm_ctrl, SST_Pac_Eq_ds_dt_dm_ctrl, SST_Pac_20N_ds_dt_dm_ctrl],
                   [SST_Pac_38S_ds_dt_dm_rcp , SST_Pac_Eq_ds_dt_dm_rcp , SST_Pac_20N_ds_dt_dm_rcp ],
                   [SST_Pac_38S_ds_dt_dm_had , SST_Pac_Eq_ds_dt_dm_had , SST_Pac_20N_ds_dt_dm_had ]
                  ][j]
    domain = ['ocn_rect', 'ocn_rect', 'ocn_had'][j]
    if j<2:
        continue
        domain = 'ocn_rect'
        lonE_list = [285, 285, 285]
    elif j==2:
        domain =  'ocn_had'
        lonE_list = lonE = [300, 285, 255]
    for i, r in enumerate(['Pac_38S', 'Pac_Eq', 'Pac_20N']):
        print(i)
        SST_object = object_list[i]
        fn = f'{path_results}/SST/SST_PDO_EOF_{r}_{run}.nc'
        latS = [-38,0,20][i]
        lonE = lonE_list[i]
        print(SST_object)
        Pac_MASK = mask_box_in_region(domain=domain, mask_nr=2, bounding_lats=(latS,68), bounding_lons=(110,lonE))
        print(xr_AREA(domain).where(Pac_MASK, drop=True))
        if j<2:
            Pac_area = xr_AREA(domain).where(Pac_MASK, drop=True)
        elif j==2:
            Pac_area = xr_AREA(domain)#.where(Pac_MASK, drop=True)
        eof, pc  = EOF_SST_analysis(xa=SST_object[24:-24], weights=Pac_area, fn=fn)

In [None]:
Pac_20N_had.pcs.plot()
Pac_Eq_had.pcs.plot()
Pac_38S_had.pcs.plot()

In [None]:
TPI_ctrl = xr.open_dataarray(f'{path_results}/SST/TPI_ctrl.nc', decode_times=False)
TPI_rcp  = xr.open_dataarray(f'{path_results}/SST/TPI_rcp.nc' , decode_times=False)

In [None]:
plt.figure(figsize=(12,5))
plt.tick_params(labelsize=14)
plt.axhline(0, c='k', lw=.5)
L1, = plt.plot(Pac_38S_ctrl.time/12+100, chebychev(Pac_38S_ctrl.pcs, 13*12), c='C0', lw=1, ls='--', label='PC 38S')
L2, = plt.plot(Pac_38S_rcp .time/12+300, chebychev(Pac_38S_rcp .pcs, 13*12), c='C1', lw=1, ls='--' )
L3, = plt.plot(Pac_Eq_ctrl .time/12+100, chebychev(Pac_Eq_ctrl .pcs, 13*12), c='C0', lw=1, ls=':' , label='PC Eq.')
L4, = plt.plot(Pac_Eq_rcp  .time/12+300, chebychev(Pac_Eq_rcp  .pcs, 13*12), c='C1', lw=1, ls=':'  )
L5, = plt.plot(Pac_20N_ctrl.time/12+100, chebychev(Pac_20N_ctrl.pcs, 13*12), c='C0', lw=3, ls='-' , label='PC 20N')
L6, = plt.plot(Pac_20N_rcp .time/12+300, -chebychev(Pac_20N_rcp .pcs, 13*12), c='C1', lw=3, ls='-'  )

L7, = plt.plot(TPI_ctrl.time/365     , 5*chebychev(TPI_ctrl, 13), c='C0', lw=2, ls='-.', label='TPI')
L8, = plt.plot(TPI_rcp .time/365-1700, 5*chebychev(TPI_rcp , 13), c='C1', lw=2, ls='-.')

plt.xlabel('time [years]'             , fontsize=16)
plt.ylabel('PDO/IPO/TPI indices', fontsize=16)
plt.legend(handles=[L1, L3, L5, L7],ncol=4, fontsize=16)
plt.tight_layout()
plt.savefig(f'{path_results}/SST/SST_PMV_ctrl_rcp')

## correlation plots

In [None]:
# %%time
# SST_rect_ctrl = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_ctrl.nc', decode_times=False)
# SST_rect_rcp  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_rcp.nc' , decode_times=False)
# SST_rect_ds_dt_ctrl = lowpass(lowpass(notch(SST_rect_ctrl, 12), 12), 12) - SST_gm_rect_ds_ctrl[:-7]
# SST_rect_ds_dt_rcp  = lowpass(lowpass(notch(SST_rect_rcp , 12), 12), 12) - SST_gm_rect_ds_rcp[:-1]
# SST_rect_ds_dt_ctrl.to_netcdf(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_ctrl.nc')
# SST_rect_ds_dt_rcp .to_netcdf(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_rcp.nc' )

In [None]:
SST_rect_ds_dt_ctrl = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_ctrl.nc', decode_times=False)
SST_rect_ds_dt_rcp  = xr.open_dataarray(f'{path_samoc}/SST/SST_monthly_rect_ds_dt_rcp.nc' , decode_times=False)

In [None]:
%%time
# 2:25 min
# ds_20N_ctrl = lag_linregress_3D(Pac_20N_ctrl.pcs[:-7,0], SST_rect_ds_dt_ctrl[24:-(24+7)], dof_corr=1./(12*13))
ds_38S_ctrl = lag_linregress_3D(Pac_38S_ctrl.pcs[:-7,0], SST_rect_ds_dt_ctrl[24:-(24+7)], dof_corr=1./(12*13))
# ds_20N_rcp  = lag_linregress_3D(-Pac_20N_rcp.pcs[:-7,0], SST_rect_ds_dt_rcp [24:-(24+7)], dof_corr=1./(12*13))
ds_38S_rcp  = lag_linregress_3D(Pac_38S_rcp .pcs[:-7,0], SST_rect_ds_dt_rcp [24:-(24+7)], dof_corr=1./(12*13))


In [None]:
for ds in [ds_20N_ctrl, ds_38S_ctrl]:
    ds.attrs['first_year'] = 102
    ds.attrs['last_year']  = 297
for ds in [ds_20N_rcp, ds_38S_rcp]:
    ds.attrs['first_year'] = 2002
    ds.attrs['last_year']  = 2097

In [None]:
ds_20N_ctrl

In [None]:
regr_map(ds=ds_20N_ctrl, index='PDO', run='ctrl', fn=None)

In [None]:
regr_map(ds=ds_38S_ctrl, index='IPO', run='ctrl', fn=None)

In [None]:
regr_map(ds=ds_20N_rcp, index='PDO', run='rcp', fn=None)

In [None]:
regr_map(ds=ds_38S_rcp, index='IPO', run='rcp', fn=None)

In [None]:
cartopy.__version__