# Evaluation of Reconstruction Performance

In [None]:
import sys
import os
from os.path import join
parent_dir = os.path.abspath(join(os.getcwd(), os.pardir))
app_dir = join(parent_dir, "app")
if app_dir not in sys.path:
      sys.path.append(app_dir)

from pathlib import Path
import torch as pt
from torch.utils.data import Subset
from torch.nn.functional import mse_loss
from CNN_VAE import ConvEncoder, ConvDecoder, Autoencoder
from utils.training_loop import train_cnn_vae
import utils.config as config
import matplotlib.pyplot as plt

pt.manual_seed(0)

plt.rcParams["figure.dpi"] = 180

# use GPU if possible
device = pt.device("cuda:0") if pt.cuda.is_available() else pt.device("cpu")
print(device)

TIMESTEP = 2

DATA_PATH = Path(os.path.abspath('')).parent / "data"
OUTPUT_PATH = Path(os.path.abspath('')).parent / "output" / "VAE"
MODEL_PATH = Path(os.path.abspath('')).parent / "output" / "VAE" / "latent_study"

In [None]:
# load test dataset
test_dataset = pt.load(join(DATA_PATH, "test_dataset.pt"))

# split test dataset into the two flow conditions
X_test_1 = Subset(test_dataset,                                 # ma0.84 alpha3.00 
                  list(range(0, int(len(test_dataset) / 2))))        
X_test_2 = Subset(test_dataset,                                 # ma0.84 alpha5.00
                  list(range(int(len(test_dataset) / 2), len(test_dataset))))    

# make tensors from datasets
X_test_1_tensor = pt.stack([X_test_1[i] for i in range(len(X_test_1))], dim=3).squeeze(0)
X_test_2_tensor = pt.stack([X_test_2[i] for i in range(len(X_test_2))], dim=3).squeeze(0)
print(X_test_1_tensor.shape)

#### MSE and Variance Reconstruction with varying number of bottleneck neurons

In [None]:
# function to create VAE model
def make_VAE_model(n_latent: int = 256) -> pt.nn.Module:
    encoder = ConvEncoder(
        in_size=config.target_resolution,
        n_channels=config.input_channels,
        n_latent=config.latent_size,
        variational=True,
        layernorm=True
    )

    decoder = ConvDecoder(
        in_size=config.target_resolution,
        n_channels=config.output_channels,
        n_latent=config.latent_size,
        layernorm=True,
        squash_output=True
    )

    autoencoder = Autoencoder(encoder, decoder)
    autoencoder.to(device)
    return autoencoder

In [None]:

# scan directory for trained models and extract paths as well as the latent size of the model
dirs = [os.path.join(MODEL_PATH, name, name) for name in os.listdir(MODEL_PATH) if os.path.isdir(os.path.join(MODEL_PATH, name))]
sorted_dirs = sorted(dirs, key=lambda x: int(os.path.basename(x)))
latent_sizes = [int(os.path.basename(dir)) for dir in sorted_dirs]

In [None]:
# Initialize lists to save the computed metrics
MSE_1 = []
MSE_2 = []
Var1 = []
Var2 = []

# compute the total variance of test datasets
orig_Var1 = pt.var(X_test_1_tensor)
orig_Var2 = pt.var(X_test_2_tensor)

for i, latent_size in enumerate(latent_sizes):
    print("Computing metrics for autoencoder with latent size ", latent_size)
    # load model
    autoencoder = make_VAE_model(latent_size)
    autoencoder.load(sorted_dirs[i])
    autoencoder.eval()

    # reconstruct test dataset 1
    with pt.no_grad():
        reconstructed = pt.stack([autoencoder(X_test_1[i].unsqueeze(0)).squeeze(0).detach() for i in range(len(X_test_1))], dim=3).squeeze(0)
        
    # compute MSE
    MSE_1.append(mse_loss(X_test_1_tensor, reconstructed).item())

    # compute variance reconstruction
    Var1.append(((1 - ((orig_Var1 - pt.var(reconstructed)) / orig_Var1)) * 100).item())

    # reconstruct test dataset 2
    with pt.no_grad():
        reconstructed = pt.stack([autoencoder(X_test_2[i].unsqueeze(0)).squeeze(0).detach() for i in range(len(X_test_2))], dim=3).squeeze(0)

    # compute MSE
    MSE_2.append(mse_loss(X_test_2_tensor, reconstructed).item())

    # compute variance reconstruction
    Var2.append(((1 - ((orig_Var2 - pt.var(reconstructed)) / orig_Var2)) * 100).item())

In [None]:
# Plot the results and save the figure
fig, (ax1, ax2) = plt.subplots(2, 1, sharex=True)
ax1.plot(latent_sizes, MSE_1, label="Test Dataset 1")
ax1.plot(latent_sizes, MSE_2, label="Test Dataset 2")
ax1.set_title("MSE")
ax2.plot(latent_sizes, Var1, label="Test Dataset 1")
ax2.plot(latent_sizes, Var2, label="Test Dataset 2")
ax2.set_title("Variance Reconstruction in %")
ax2.set_xlabel("number of bottleneck neurons")
ax2.set_xticks(range(0, 325, 25))
handles, labels = ax2.get_legend_handles_labels()
fig.legend(handles, labels)
fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, "MSE_and_Variance_with_latent_size.png"), bbox_inches = "tight")

#### Temporal MSE distribution with varying number of bottleneck neurons

In [None]:
if config.mini_datset:
    timesteps = range(config.mini_test_per_cond)
else:
    timesteps = range(500)

fig, ax1 = plt.subplots(1, 1, figsize = (8, 3))
    
for i, latent_size in enumerate(latent_sizes):
    print("Computing metrics for autoencoder with latent size ", latent_size)
    # load model
    autoencoder = make_VAE_model(latent_size)
    autoencoder.load(sorted_dirs[i])
    autoencoder.eval()

    # reconstruct test dataset 1
    with pt.no_grad():
        reconstructed = pt.stack([autoencoder(X_test_1[i].unsqueeze(0)).squeeze(0).detach() for i in range(len(X_test_1))], dim=3).squeeze(0)

    MSE = ((X_test_1_tensor - reconstructed)**2).mean(dim=[0, 1])
    ax1.plot(timesteps, MSE, label="bottleneck neurons {}".format(latent_size))

# ax1.set_title("Test Dataset 1")
ax1.set_ylabel("MSE")
ax1.set_xlabel("timestep")
# ax1.set_yscale("log")

fig.legend()
fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, "temporal_MSE_distribution_VAE.png"), bbox_inches = "tight")


#### Spatial MSE distribution with varying number of bottleneck neurons

In [None]:
# Load coordinates
coords = pt.load(join(DATA_PATH, "coords_interp.pt"))
xx, yy = coords

In [None]:
fig, axes = plt.subplots(2, 3, sharey=True)

for i, latent_size in enumerate([60, 110, 260]):
    print("Computing metrics for autoencoder with latent size ", latent_size)
    # load model
    autoencoder = make_VAE_model(latent_size)
    autoencoder.load(sorted_dirs[i])
    autoencoder.eval()

    # reconstruct test dataset 1
    with pt.no_grad():
        reconstructed = pt.stack([autoencoder(X_test_1[i].unsqueeze(0)).squeeze(0).detach() for i in range(len(X_test_1))], dim=3).squeeze(0)
        
    # compute MSE
    MSE_1.append(mse_loss(X_test_1_tensor, reconstructed).item())

    # compute variance reconstruction
    Var1.append(((1 - ((orig_Var1 - pt.var(reconstructed)) / orig_Var1)) * 100).item())

    # reconstruct test dataset 2
    with pt.no_grad():
        reconstructed = pt.stack([autoencoder(X_test_2[i].unsqueeze(0)).squeeze(0).detach() for i in range(len(X_test_2))], dim=3).squeeze(0)

#### Reconstructed pressure field compared to Ground Truth for two bottleneck sizes