# SSA tutorial
_https://www.kaggle.com/jdarcy/introducing-ssa-for-time-series-decomposition#Decomposing-Time-Series-Data-With-Singular-Spectrum-Analysis_

In [None]:
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

from pymssa import MSSA
matplotlib.rc_file('../rc_file')
%matplotlib widget

# get it to work with xarray 

In [None]:
N = 200 # The number of time 'moments' in our toy series
t = np.arange(0,N)
trend = 0.001 * (t - 100)**2
p1, p2 = 20, 30
periodic1 = 2 * np.sin(2*np.pi*t/p1)
periodic2 = 0.75 * np.sin(2*np.pi*t/p2)

np.random.seed(123) # So we generate the same noisy time series every time.
noise = 2 * (np.random.rand(N) - 0.5)
F = trend + periodic1 + periodic2 + noise

# Plot everything
plt.plot(t, F, lw=2.5)
plt.plot(t, trend, alpha=0.75)
plt.plot(t, periodic1, alpha=0.75)
plt.plot(t, periodic2, alpha=0.75)
plt.plot(t, noise, alpha=0.5)
plt.legend(["Toy Series ($F$)", "Trend", "Periodic #1", "Periodic #2", "Noise"])
plt.xlabel("$t$")
plt.ylabel("$F(t)$")
plt.title("The Toy Time Series and its Components");

In [None]:
da = xr.DataArray(data=F, coords={'time':np.arange(200)}, dims='time')

In [None]:
mssa_F = MSSA(n_components=None, window_size=70, verbose=True)
mssa_F.fit(da)
print(np.shape(mssa_F.components_))

In [None]:
f, ax = plt.subplots(1,2)

for i in range(10):
    ax[0].plot(mssa_F.components_[0,:,i])
    
ax[1].imshow(mssa_F.w_correlation(mssa.components_[0, :, :]))

## making a test dataset

In [None]:
N = 11
A = np.zeros((200,N,N))
X, Y = np.meshgrid([np.arange(N)/(N-1)*2-1], [np.arange(N)*2/(N-1)-1])
R = np.sqrt(X**2+Y**2)

f, ax = plt.subplots(1,3)
for i, A in enumerate([X, Y, R]):
    im = ax[i].imshow(A, cmap=['RdBu', 'RdBu', 'plasma'][i])
    plt.colorbar(im, ax=ax[i], orientation='horizontal', fraction=0.25)    

In [None]:
def Gaussian_2D(x, y, x0, y0, sigmax, sigmay):
    return np.exp(-((x-x0)/sigmax)**2) * np.exp(-((y-y0)/sigmay)**2)
T = 200
A = np.zeros((T, N**2))

for m, t in enumerate(np.linspace(0, 10*np.pi, T)):
    x0 = 0.5*np.sin(t)
    y0 = 0.5*np.cos(t)
    for i, xi in enumerate(X.flatten()):
        yi = Y.flatten()[i]
        A[m,i] = Gaussian_2D(x=xi, y=yi, x0=x0, y0=y0, sigmax=.5, sigmay=.5)
A = A.reshape((T, N, N))

In [None]:
f, ax = plt.subplots(1, N, figsize=(12,3))
for i in range(N):
    ax[i].imshow(A[i*4,:,:])

In [None]:
fig, ax = plt.subplots(1,1)
im = plt.imshow(np.zeros((N,N)), vmin=0, vmax=1)

def animate(i):
    im.set_array(A[i,:,:])
    return [im]

ani = FuncAnimation(fig, animate, frames=int(T/5))

In [None]:
ani

## xarray

In [None]:
da = xr.DataArray(data=A,
                  coords={'time':np.arange(200),
                          'lat':np.arange(11),
                          'lon':np.arange(11)},
                  dims=('time','lat','lon')
                 )

In [None]:
da

In [None]:
mssa = MSSA(n_components=None,
            window_size=70,
            verbose=True)

In [None]:
# stack lat and lon into a single dimension called allpoints
stacked = da.stack(allpoints=['lat', 'lon'])
stacked

# trend = stacked.groupby('allpoints').apply(xr_linear_trend)
#         # unstack back to lat lon coordinates
# da_trend = trend.unstack('allpoints')

In [None]:
%%time
mssa.fit(stacked)

In [None]:
stacked

In [None]:
np.shape(mssa.components_)

In [None]:
f, ax = plt.subplots(3,2, figsize=(12,12))
for j, k in enumerate([0, 10, 100]):
    for i in range(10):
        ax[j,0].plot(mssa.components_[k,:,i])
        ax[j,1].imshow(mssa.w_correlation(mssa.components_[k, :, :]))

In [None]:
f, ax = plt.subplots(1,2)
ax[0].plot(mssa.explained_variance_)
ax[1].plot(mssa.explained_variance_ratio_)

In [None]:
plt.plot(mssa.component_ranks_[0,:20])

In [None]:
plt.imshow(mssa.component_ranks_, vmax=30)
plt.colorbar()

## (automatic) grouping of components

## recovering of pattern

# testing erformance