# Singular Value Decomposition (SVD)

##### 2.c: evaluate the reconstruction performance on at least two IPSP datasets that were not part of the training

1. Load test data sets and U, subtract temporal mean from test datasets
3. Transform the test data into the reduced state with $$ X_{test,reduced} = U_{reduced}^T X_{test} $$
4. Reconstruct the reduced test data $ X_{test,reduced} $ with $$ X_{test,reconstr} = U_{reduced} X_{test,reduced}$$
5. Compare the reconstructed test set with the original test set using MSE and variance reconstruction

In [None]:
import sys
import os
from os.path import join
# include app directory into sys.path
parent_dir = os.path.abspath(join(os.getcwd(), os.pardir))
app_dir = join(parent_dir, "app")
if app_dir not in sys.path:
      sys.path.append(app_dir)

import torch as pt
from os.path import join
import matplotlib.pyplot as plt
from pathlib import Path
import torch.nn.functional as F
import random
import utils.config as config
from importlib import reload
reload(config)

random.seed(10)
plt.rcParams["figure.dpi"] = 180

DATA_PATH = join(parent_dir, "data", "SVD")
OUTPUT_PATH = join(parent_dir, "output", "SVD")

TIMESTEP = config.timestep_reconstruction
TIMESTEP_dimless = (TIMESTEP * config.U_inf) / (config.c_mean * config.timesteps_per_second)
print(TIMESTEP_dimless)
NEW_RES = config.target_resolution

test_keys = config.test_keys

#### 1. Load test datasets and U, subtract temporal mean from test datasets

In [None]:
# Load test datsets
X_test_1= pt.load(join(DATA_PATH, "X_test_1.pt"))                       # 3.00
X_test_1_temp_mean = X_test_1.mean(dim=1).unsqueeze(-1)
X_test_1_centered = X_test_1 - X_test_1_temp_mean
print("Min-Max of uncentered Test 1:        ", X_test_1.max(), X_test_1.min())
print("Min-Max of centered Test 1:          ", X_test_1_centered.max(), X_test_1_centered.min())
print("Var of uncentered Test 1:            ", pt.var(X_test_1))
print("Var of centered Test 1:              ", pt.var(X_test_1_centered))

X_test_2 = pt.load(join(DATA_PATH, "X_test_2.pt"))                      # 5.00
X_test_2_temp_mean = X_test_2.mean(dim=1).unsqueeze(-1)
X_test_2_centered = X_test_2 - X_test_2_temp_mean

# Load left singular vectors
U = pt.load(join(OUTPUT_PATH, "U.pt"))

#### 2 - 4 Reduce-Reconstruct datasets and compute metrics

In [None]:
# Create a list to loop over an increasing number of left singular vectors
ranks = range(1, 400, 5)

# Initialize lists to save the computed metrics
MSE_1 = []
MSE_2 = []
Var1 = []
Var2 = []

# compute the total variance of the test datasets
orig_Var1 = pt.var(X_test_1)
orig_Var2 = pt.var(X_test_2)

# Loop over the U ranks
for i, rank in enumerate(ranks):
    # Compute MSE for test dataset 1
    reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_1_centered
    reconstructed = U[:,:rank] @ reduced
    MSE_1.append(F.mse_loss(reconstructed + X_test_1_temp_mean, X_test_1).item())

    # Compute variance reconstruction for test dataset 1
    Var1.append((1 - (MSE_1[i] / orig_Var1)))

    # Compute MSE for test dataset 2
    reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_2_centered
    reconstructed = U[:,:rank] @ reduced
    MSE_2.append(F.mse_loss(reconstructed + X_test_2_temp_mean, X_test_2).item())

    # Compute variance reconstruction for test dataset 2
    Var2.append((1 - (MSE_2[i] / orig_Var2)))


In [None]:
# Plot the results and save the figure
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
ax1.plot(ranks, MSE_1, label="Test Dataset 1")
ax1.plot(ranks, MSE_2, label="Test Dataset 2")
ax1.set_title("MSE")
ax1.set_yscale("log")
ax1.set_ylim(config.plot_lims_MSE_general)

ax2.plot(ranks, Var1, label="Test Dataset 1")
ax2.plot(ranks, Var2, label="Test Dataset 2")
ax2.set_title(rf"Variance Reconstruction (R²)")
ax2.set_xlabel("rank")
ax2.set_ylim(config.plot_lims_R_squarred)
ax2.set_xticks(range(0, 401, 50))

handles, labels = ax2.get_legend_handles_labels()
fig.legend(handles, labels)
fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, "SVD_MSE_and_Rsquarred_with_rank.png"), bbox_inches = "tight")

#### How is the error temporarily distributed?


In [None]:
ranks = [1, 3, 10, 30, 100, 300, 500, 1000]

fig, ax1 = plt.subplots(1, 1, figsize = (8, 3))
timesteps = [(t * config.U_inf) / (config.c_mean * config.timesteps_per_second) for t in range(config.time_steps_per_cond)]

# Loop over the U ranks
for rank in ranks:
    # Compute MSE for test dataset 1
    reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_1_centered
    reconstructed = (U[:,:rank] @ reduced) + X_test_1_temp_mean
    mse = ((X_test_1 - reconstructed )**2).mean(0)
    ax1.plot(timesteps, mse, label="rank {}".format(rank))

# ax1.set_title("Test Dataset 1")
ax1.set_ylabel("MSE")
ax1.set_xlabel(rf"$\tau$")
ax1.set_yscale("log")
ax1.set_ylim(config.plot_lims_MSE_temporal)

fig.legend()
fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, "SVD_temporal_MSE_distribution.png"), bbox_inches = "tight")



#### How does the spatial error evolve for one specific timestep with inceasing rank?

In [None]:
# Load coordinates
coords = pt.load(join(Path(DATA_PATH).parent, "coords_interp.pt"))
x, y = coords

In [None]:
fig, axes = plt.subplots(2, 3, sharey=True)
vmin, vmax = config.plot_lims_MSE_spatial
levels = pt.linspace(vmin, vmax, 120)

# Loop over the U ranks
for i, rank in enumerate([3, 30, 300]):
    # reduce and reconstruct dataset
    reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_1_centered
    reconstructed = (U[:,:rank] @ reduced)  + X_test_1_temp_mean

    # compute the spatial MSE 
    mse1 = ((X_test_1 - reconstructed)**2).mean(1).unflatten(dim=0, sizes=NEW_RES)

    # reduce and reconstruct dataset
    reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_2_centered
    reconstructed = U[:,:rank] @ reduced + X_test_2_temp_mean

    # compute the spatial MSE 
    mse2 = ((X_test_2 - reconstructed)**2).mean(1).unflatten(dim=0, sizes=NEW_RES)

    # create the contour plot
    cont = axes[0][i].contourf(x, y, mse1, vmin=vmin, vmax=vmax, levels=levels, extend="both")
    cont = axes[1][i].contourf(x, y, mse2, vmin=vmin, vmax=vmax, levels=levels, extend="both")

    # formatting
    axes[0][i].set_title("rank = {}".format(rank))

    for row in range(2):
        axes[row][i].set_aspect("equal")
        axes[row][i].set_xticklabels([])
        axes[row][i].set_yticklabels([])

axes[0][0].set_ylabel("Test Dataset 1")
axes[1][0].set_ylabel("Test Dataset 2")

# add seperate subplot for color axis
fig.subplots_adjust(right=0.9)
cax = fig.add_axes([0.99, 0.042, 0.03, 0.885])
cbar = fig.colorbar(cont, cax=cax,label = "MSE")

fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, "SVD_spatial_MSE_distribution.png"), bbox_inches = "tight")

#### How does the reconstructed timestep look like for different ranks?

In [None]:
fig, axes = plt.subplots(2, 3, sharey=True)
vmin, vmax = config.plot_lims_cp
levels = pt.linspace(vmin, vmax, 120)

# Loop over the U ranks
for i, rank in enumerate([30, 300, "experimental"]):
    
    # create the contour plot
    if rank == "experimental":
        cont = axes[0][i].contourf(x, y, X_test_1[:, TIMESTEP].unflatten(dim=0, sizes=NEW_RES), vmin=vmin, vmax=vmax, levels=levels, extend="both")
        cont = axes[1][i].contourf(x, y, X_test_2[:, TIMESTEP].unflatten(dim=0, sizes=NEW_RES), vmin=vmin, vmax=vmax, levels=levels, extend="both")
        axes[0][i].set_title("Ground Truth")
    else:
        # reduce and reconstruct dataset
        reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_1_centered
        reconstructed = (U[:,:rank] @ reduced)  + X_test_1_temp_mean
        reconstructed_timestep1 = reconstructed[:, TIMESTEP].unflatten(dim=0, sizes=NEW_RES)

        # reduce and reconstruct dataset
        reduced = pt.transpose(U[:,:rank], 0, 1) @ X_test_2_centered
        reconstructed = (U[:,:rank] @ reduced)  + X_test_2_temp_mean
        reconstructed_timestep2 = reconstructed[:, TIMESTEP].unflatten(dim=0, sizes=NEW_RES)

        cont = axes[0][i].contourf(x, y, reconstructed_timestep1, vmin=vmin, vmax=vmax, levels=levels, extend="both")
        cont = axes[1][i].contourf(x, y, reconstructed_timestep2, vmin=vmin, vmax=vmax, levels=levels, extend="both")
        axes[0][i].set_title("rank = {}".format(rank))

    for row in range(2):
        axes[row][i].set_aspect("equal")
        axes[row][i].set_xticklabels([])
        axes[row][i].set_yticklabels([])

axes[0][0].set_ylabel("Test Dataset 1")
axes[1][0].set_ylabel("Test Dataset 2")

# add seperate subplot for color axis
fig.subplots_adjust(right=0.9)
cax = fig.add_axes([0.99, 0.042, 0.03, 0.885])
cbar = fig.colorbar(cont, cax=cax,label = r"$c_p$")

fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, "SVD_timestep_reconstruction.png"), bbox_inches = "tight")