In [1]:
import sys, os
sys.path.append(os.path.join(os.path.dirname('.'), '..','src'))

import numpy as np
from basis import Bspline
from fda import fpca, mafr
from utils import GP, Matern, whiteNoise, structNoise
import tqdm
np.random.seed(3) 

# Forecasting Surface displacement Dataset

In this notebook we forecast our surface displacement data set, described [here]. We use the same technique as descibed in this [notebook]. The only difference being we can now only add different simulations of the noise component to our one real world dataset. 

[here] ./data_generation.ipynb
[notebook] ./simulation.ipynb


# Setup

In the following codeblock we setup the domain parameters, a basis system for our functional decomposition and the various inner products needed for the decomposition methodology. These are constant and unaffected by the noise processes so we can calculate them just once for the whole notebook. In particular we set our basis to have $25$ basis functions in each dimension of our spatial functional data.

In [2]:
## Domain parameters
S1, S2, T = 520, 531, 128
t = np.linspace(0,1,T)

## Basis system 
bs = Bspline((-1,1), 25, 4)
B = np.kron(bs(np.linspace(-1,1,S1)), bs(np.linspace(-1,1,S2)))
J = np.kron(np.eye(bs.K), bs.penalty(0)) + np.kron(bs.penalty(0), np.eye(bs.K))

## Penalties for regularisation and mafr. 
NDERIV = 2
P = np.kron(np.eye(bs.K), bs.penalty(NDERIV)) + np.kron(bs.penalty(NDERIV), np.eye(bs.K))
LOG_LAMBDA = -6.0

## Maximum forecast size, training data set, and components to use.
STEPS = 25
N_INIT = 90
N_COMP = 3

## Constant simulated data (nsimulations X datasets)
SIM_PATH = '../data/surf_disp.npz'
data = np.load(SIM_PATH)
Y = data['SD']

## Noise Generation
def generateNoise(label):
    if label=='ln':
        noise = whiteNoise(5.0, Y.shape)
    elif label =='hn':
        noise = whiteNoise(20.0, Y.shape)
    else:
        noise = structNoise(20.0, (Y.shape[0], S1, S2), l=0.5**2, scale_percent=10)
    return noise

# Models
The following codeblock runs the forecasting using an each decomposition for each of the low noise, high noise and structure noise scenario. We complete this for 100 simulations. 

In [3]:
NSIM = 100
noise_labels = ['ln', 'hn', 'sn']
models = ['fpca', 'mafr','reg-fpca','reg-mafr']
results={m+'_'+n:np.zeros((NSIM, np.max(STEPS))) for m in models for n in noise_labels}
for i in tqdm.tqdm(np.arange(NSIM)):
    for label in noise_labels:
        noise = generateNoise(label)
        Y_e = Y + noise
        for model in models:
            ll = LOG_LAMBDA if model.startswith('reg') else -14.0
            mafr_ind = True if 'mafr' in model else False
            Y_train = Y_e[:N_INIT]
            x_ob = t[:N_INIT]
            x_new = t[N_INIT:(N_INIT+STEPS)]
            Cbar, zeta, scores, w = fpca(Y_train.T, B, P, ll, NDERIV, J, N_COMP)
            if mafr_ind:
                zeta, scores, U = mafr(zeta, scores, P)
            mu_fors = []
            V_fors = []
            for score in scores.T:
                gp = GP(Matern(nu=1.5, rho=1.0, sigma=1.0))
                gp.fit(x_ob, score.T, bounds=[(-6,2), (-6,2), (-6, 2)], n_init=100)
                mu_for, V_for = gp.posterior(x_new)
                mu_fors.append(mu_for)
                V_fors.append(np.diag(V_for))
            recon = np.matmul(np.matmul(B, zeta), mu_fors) + np.matmul(B, Cbar)[:, np.newaxis]
            results[model+'_'+label][i,: ] = np.sqrt(np.mean((Y[N_INIT:(N_INIT+STEPS)].T-recon)**2, axis=0))
for key in results.keys():
    np.savez('../results/surf_disp_'+key+'.npz', results=results[key])

100%|██████████| 100/100 [1:16:30<00:00, 45.91s/it]
