In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from skfda.representation.grid import FDataGrid
from skfda.representation.basis import BSplineBasis
from skfda.preprocessing.dim_reduction import FPCA
from skfda.exploratory.visualization import FPCAPlot
from skfda.datasets import fetch_weather

from sklearn.decomposition import PCA

## Load data

In [None]:
weather, label = fetch_weather(return_X_y=True)

In [None]:
time = weather.grid_points[0]
temp_array = weather.data_matrix[:, :, 0]

In [None]:
fd_temperatures = weather.coordinates[0]
# rigid eigenfunctions
#fd_temperatures = FDataGrid(temp_array, np.arange(365))
# smooth eigenfunctions
basis = BSplineBasis(n_basis=7)
fd_temperatures = fd_temperatures.to_basis(basis) # smooth

## FPCA

In [None]:
n_components = 4
fpca = FPCA(n_components=n_components)
fpca_temp = fpca.fit_transform(fd_temperatures)

In [None]:
fpca.components_.plot();

In [None]:
FPCAPlot(fd_temperatures.mean(), fpca.components_, factor=300, n_rows=2).plot()
plt.tight_layout()

In [None]:
fpca.explained_variance_ratio_.sum()

In [None]:
for i in range(label.max()):
    plt.scatter(fpca_temp[:, 0][label==i], fpca_temp[:, 1][label==i], label=f'{i}')
plt.legend()

## Inverse transform

In [None]:
fpca_mean = fpca.mean_(time).flatten()
eigenfunc = fpca.components_(time).reshape(n_components, time.shape[0])
recon = np.dot(fpca_temp, eigenfunc) + fpca_mean
resid = temp_array - recon

In [None]:
ind = 0
plt.plot(time, temp_array[ind])
plt.plot(time, recon[ind])

## PCA (same as discretized FPCA)

In [None]:
pca = PCA(n_components=4)

In [None]:
pca_temp = pca.fit_transform(temp_array)

In [None]:
for comp in pca.components_:
    plt.plot(time, comp)

In [None]:
pca.explained_variance_ratio_.sum()

In [None]:
for i in range(label.max()):
    plt.scatter(pca_temp[:, 0][label==i], pca_temp[:, 1][label==i], label=f'{i}')
plt.legend()

In [None]:
recon_pca = pca.inverse_transform(pca_temp)
resid_pca = temp_array - recon_pca

In [None]:
ind = 0
plt.plot(time, temp_array[ind])
plt.plot(time, recon_pca[ind], color='C2')

## Compare models

In [None]:
ind = 0
fig, axs = plt.subplots(1,3,figsize=[15,5])
axs[0].plot(time, temp_array[ind], label='train')
axs[0].plot(time, recon[ind], label='fpca')
axs[0].plot(time, recon_pca[ind], '--', label='pca')
axs[0].legend()
axs[1].hist(resid[ind], color='C1', alpha=0.5, label='fpca')
axs[1].hist(resid_pca[ind], color='C2', alpha=0.5, label='pca')
axs[1].legend()
axs[2].plot(time, resid[ind], color='C1', alpha=0.5, label='fpca')
axs[2].plot(time, resid_pca[ind], color='C2', alpha=0.5, label='pca')
axs[2].legend()

In [None]:
print (f'\t fpca\t\t\t pca')
print (f'mse\t {np.mean(np.square(recon - temp_array))}\t {np.mean(np.square(recon_pca - temp_array))}')
print (f'mae\t {np.mean(np.abs(recon - temp_array))}\t {np.mean(np.abs(recon_pca - temp_array))}')
print (f'corr\t {np.corrcoef(recon, temp_array)[0, 1]}\t {np.corrcoef(recon_pca, temp_array)[0, 1]}')