# Spectral decomposition

In this notebook we perform the EOF analysis of the optimal projection patterns of the gaussian approximation applied to the ERA5 dataset

In [None]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
%matplotlib widget
matplotlib.rc('font', size=18)
default_colors = plt.rcParams['axes.prop_cycle'].by_key()['color']


import pandas as pd
import xarray as xr

from tqdm.notebook import tqdm

import sys
sys.path.append('../../../Climate-Learning/')

import general_purpose.utilities as ut
import general_purpose.cartopy_plots as cplt
import general_purpose.uplotlib as uplt
# import general_purpose.tables as tbl

# log to stdout
import logging
logging.getLogger().level = logging.INFO
logging.getLogger().handlers = [logging.StreamHandler(sys.stdout)]
ut.indentation_sep = '  '

HOME = '../../'

## Load data

In [None]:
sys.path.append('../../../Climate-Learning/PLASIM/')

import Learn2_new as ln

import probabilistic_regression as pr
pr.enable()

In [None]:
## TODO: change mylocal
config_dict = ut.json2dict('config_T14_tau0_epsilon1.json')

ut.set_values_recursive(config_dict, dict(year_permutation=np.array(ut.extract_nested(config_dict, 'year_permutation'))))

In [None]:
trainer = ln.Trainer(config=config_dict)

In [None]:
_ = trainer.prepare_data()

In [None]:
fold = 0
X_tr, A_tr, X_va, A_va_ = ln.k_fold_cross_val_split(fold, trainer.X, trainer.Y, nfolds=ut.extract_nested(config_dict, 'nfolds'))

X_std = np.std(X_tr, axis=0)
reshaper = ut.Reshaper(X_std != 0)
X_std = reshaper.reshape(X_std)
X_mean = reshaper.reshape(np.mean(X_tr, axis=0))

X_tr = (reshaper.reshape(X_tr) - X_mean)/X_std
X_va = (reshaper.reshape(X_va) - X_mean)/X_std

X_tr.shape, X_va.shape

In [None]:
from scipy import sparse
W = sparse.load_npz('W.npz').toarray()
W.shape

In [None]:
lon = np.load('../../lon.npy')
lat = np.load('../../lat.npy')

LON, LAT = np.meshgrid(lon,lat)

## Compute covariance matrix

In [None]:
XAs = np.concatenate([X_tr,A_tr.reshape(-1,1)], axis=-1)
XAs_cov = np.cov(XAs.T)

Sigma_XX = XAs_cov[:-1,:-1]
Sigma_XA = XAs_cov[-1,:-1]

Sigma_XX.shape

## EOF decomposition

In [None]:
eigvals, eigvecs = np.linalg.eig(Sigma_XX)

isort = np.argsort(eigvals)[::-1]
eigvals = eigvals[isort]
eigvecs = eigvecs[:,isort]

cs = eigvecs.T @ Sigma_XA # EOF representation of Correlation map

### Figure S6

In [None]:
ns = np.array([1, 100, 300, 1000]) - 1
tits = [fr'$n = {n+1}, \lambda_n = {ut.scientific_notation(eigvals[n], 1)}$' for n in ns]
eos = np.concatenate([reshaper.inv_reshape(eigvecs[:,n]) for n in ns], axis=-1)

_ = cplt.mfp(LON,LAT, eos,
         titles = tits,
         projections = cplt.ccrs.Orthographic(central_latitude=90),
         extents=None,
         one_fig_layout=100 + 10*len(ns),
         figsize=(5*len(ns), 5),
        )

fig = _[0].get_figure()

# fig.savefig(f'{HOME}/eofs.pdf')

### Figure S7

In [None]:
def running_mean(a, win, mode='mirror'):
    assert win % 2, f'win must be odd for this algorithm'
    if mode == 'mirror':
        a_ = np.concatenate([a[1:win//2+1][::-1], a, a[-(win//2)-1:-1][::-1]])
        assert a_.shape[0] == a.shape[0] + win - 1, f'extended array.shape = {a_.shape}'
        return np.convolve(a_, np.ones(win)/win, mode='valid')
    elif mode == 'keep':
        return np.concatenate([a[:win//2], np.convolve(a, np.ones(win)/win, mode='valid'), a[-(win//2):]])

In [None]:
Sigma_XA.shape

In [None]:
epss = np.array([0, 0.001, 0.01, 0.1, 1, 1e1, 1e2, 1e3, 1e5])
css = []
chs = []

# L2 reg
for eps in epss:
    css.append(cs/(eigvals + eps))
    
# H2 reg
for eps in tqdm(epss):
    chs.append(eigvecs.T @ (np.linalg.inv(Sigma_XX + eps*W) @ Sigma_XA))

In [None]:
plt.close(10)
fig,ax = plt.subplots(num=10, figsize=(9,7))

win = 11
colors = plt.cm.jet(np.linspace(0,1, len(epss)))

xexp = 0.5

x = (np.arange(len(cs)) + 1)**xexp

for i, eps in enumerate(epss):
    sp = np.exp(running_mean(np.log(np.abs(css[i])), win))
    plt.plot(x, sp/sp[0], color=colors[i], label=fr'$\epsilon = {f"10^{{{np.log10(eps):.0f}}}" if eps else 0}$')
    
sp = np.exp(running_mean(np.log(np.abs(cs)), win))
plt.semilogy(x, sp/sp[0], color='black', label='composite')

for i, eps in enumerate(epss):
    plt.semilogy(x, (eigvals + eps)/(eigvals[0] + eps), linestyle='dashed', color=colors[i])

# plt.plot(x, (eigvals/eigvals[0]), color='green', linestyle='dashed', label=r'$\lambda_n$')

plt.legend(ncol=2)

plt.xlabel('$n$')
plt.ylabel('Normalized smoothed spectrum')
plt.title(f'$L_2$ regularization')


pticks = np.array([1, 10, 50, 200, 500, 1000, 2000, 2816])
ax.set_xticks((pticks)**xexp)
ax.set_xticklabels(pticks)
ax.set_xlim(1,2817**xexp)

plt.grid()


fig.tight_layout()

fig.savefig(f'{HOME}/EOF-L2.pdf')

In [None]:
We = eigvecs.T @ W @ eigvecs # representation of W in the EOF basis

In [None]:
plt.close(10)
fig,ax = plt.subplots(num=10, figsize=(9,7))

win = 11
colors = plt.cm.jet(np.linspace(0,1, len(epss)))

xexp = 0.5

x = (np.arange(len(cs)) + 1)**xexp

for i, eps in enumerate(epss):
    sp = np.exp(running_mean(np.log(np.abs(chs[i])), win))
    plt.plot(x, sp/sp[0], color=colors[i], label=fr'$\epsilon = {f"10^{{{np.log10(eps):.0f}}}" if eps else 0}$')
    
sp = np.exp(running_mean(np.log(np.abs(cs)), win))
plt.semilogy(x, sp/sp[0], color='black', label='composite')
    
for i, eps in enumerate(epss):
    
    sp = eigvals + eps*np.diag(We)
    plt.semilogy(x, sp/sp[0], color=colors[i], linestyle='dashed')
    # plt.semilogy(x, (eigvals + eps)/(eigvals[0] + eps), linestyle='dashed', color=colors[i])
    

# plt.plot(x, (eigvals/eigvals[0]), color='green', linestyle='dashed', label=r'$\lambda_n$')

plt.legend(ncol=2)

plt.xlabel('$n$')
plt.ylabel('Normalized smoothed spectrum')
plt.title(f'$H_2$ regularization')


pticks = np.array([1, 10, 50, 200, 500, 1000, 2000, 2816])
ax.set_xticks((pticks)**xexp)
ax.set_xticklabels(pticks)
ax.set_xlim(1,2817**xexp)

plt.grid()

fig.tight_layout()

fig.savefig(f'{HOME}/EOF-H2.pdf')