## Imports

In [None]:
import os
from os.path import join
import sys
from pathlib import Path

# include app directory into sys.path
parent_dir = Path(os.path.abspath('')).parent
app_dir = join(parent_dir, "app")
if app_dir not in sys.path:
      sys.path.append(app_dir)

import torch as pt
from torch.nn.functional import mse_loss
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker 
import matplotlib.animation as animation
from IPython.display import HTML
from flowtorch.analysis import SVD
import numpy as np
from scipy.fft import fft, fftfreq

import utils.config as config
from utils.helper_funcs import load_datasets_end_to_end
from utils.DataWindow import DataWindow_end_to_end
from CNN_VAE.CNN_VAE import ConvDecoder, ConvEncoder
from LSTM.LSTM_model import LSTM
from end_to_end.CNN_VAE_LSTM import autoencoder_LSTM

plt.rcParams["figure.dpi"] = 180

# use GPU if possible
device = pt.device("cuda") if pt.cuda.is_available() else pt.device("cpu")
print(device)

# define prediction horizon and type of dimensionality reduction
PRED_HORIZON = 3
N_LATENT = 64
FC_MODEL = "1_32_64_2"

# define paths
DATA_PATH = join(parent_dir, "data", "end_to_end")
OUTPUT_PATH = join(parent_dir, "output", "end_to_end", "single")
MODEL_PATH = join(parent_dir, "output", "end_to_end", "single", f"pred_horizon_{PRED_HORIZON}")

## Pipeline Pre-Processing

In [None]:
# define FC model parameters
_, INPUT_WIDTH, HIDDEN_SIZE, N_STACKED_LAYERS = [int(param) for param in FC_MODEL.split("_")]

In [None]:
# timestep and index computation, transforming to dimensionsless time
TIMESTEP_1, TIMESTEP_2 = (INPUT_WIDTH + 9, INPUT_WIDTH + 49)
dimless_factor = config.U_inf / (config.c_mean * config.timesteps_per_second)

TIMESTEP_dimless_split = round((config.single_flow_cond_train_share * config.time_steps_per_cond) * dimless_factor, 2)
TIMESTEP_dimless_1= round((TIMESTEP_1 + (config.single_flow_cond_train_share * config.time_steps_per_cond)) * dimless_factor, 2)
TIMESTEP_dimless_2= round((TIMESTEP_2 + (config.single_flow_cond_train_share * config.time_steps_per_cond)) * dimless_factor, 2)

# compute prediction horizons to predict timestep 1 and 2
pred_horizon_1 = TIMESTEP_1 - INPUT_WIDTH + 1
pred_horizon_2 = TIMESTEP_2 - INPUT_WIDTH + 1

# set a prediction horizon for comparing latent and full space loss
pred_horizon_total = int(config.time_steps_per_cond - config.single_flow_cond_train_share * config.time_steps_per_cond - INPUT_WIDTH)

print(f"Test dataset comprises timesteps {int(config.single_flow_cond_train_share * config.time_steps_per_cond)} - {config.time_steps_per_cond}.")    
print(f"The end-to-end model takes the first {INPUT_WIDTH} timesteps as input.\n")     
print(f"Predicted timestep 1 (index) is:            {TIMESTEP_1}")
print(f"    which equals a dimensionless time:      {TIMESTEP_dimless_1}")
print(f"    and a prediction horizon of:            {pred_horizon_1}\n")
print(f"Predicted timestep 2 (index) is:            {TIMESTEP_2}")
print(f"    which equals a dimensionless time:      {TIMESTEP_dimless_2}")
print(f"    and a prediction horizon of:            {pred_horizon_2}")

In [None]:
# load experimental data
train, test = load_datasets_end_to_end(DATA_PATH)
print(train.shape, test.shape)

# load coordinate grids
coords = pt.load(join(Path(DATA_PATH).parent, "coords_interp.pt"))
xx, yy = coords

# load scalers 
E2E_scaler = pt.load(join(Path(DATA_PATH).parent, "VAE_LSTM_scaler.pt"))
VAE_scaler = pt.load(join(Path(DATA_PATH).parent, "VAE_scaler.pt"))

# load sequential model results
orig_loss_seq = pt.load(join(OUTPUT_PATH, "orig_loss_seq_model.pt"))
PDE_seq = pt.load(join(OUTPUT_PATH, "PDE_seq.pt"))

In [None]:
# feed reduced and scaled dataset into DataWindow class to create TimeSeriesTensorDatasets
data_window = DataWindow_end_to_end(train=train, test=test, input_width=INPUT_WIDTH, pred_horizon=pred_horizon_total)
input_idx, target_idx = data_window.rolling_window(test.shape[2])
target_idx = target_idx.tolist()

print(f"Input indices of first window range from:           {input_idx[0][0]} to {input_idx[0][-1]}")
print(f"Target indices of first window range from:          {target_idx[0][0]} to {target_idx[0][-1]}")
print(f"Number of possible windows:                            {len(input_idx)}")

test_windows = data_window.test_dataset

In [None]:
# initialize models
encoder = ConvEncoder(
    in_size=config.target_resolution,
    n_channels=config.VAE_input_channels,
    n_latent=N_LATENT,
    variational=True,
    layernorm=True
)
decoder = ConvDecoder(
    in_size=config.target_resolution,
    n_channels=config.VAE_output_channels,
    n_latent=N_LATENT,
    layernorm=True,
    squash_output=True
)
lstm = LSTM(
    latent_size=N_LATENT, 
    hidden_size=HIDDEN_SIZE, 
    num_layers=N_STACKED_LAYERS
    )

model = autoencoder_LSTM(encoder=encoder, LSTM=lstm, decoder=decoder)
model.load(join(MODEL_PATH, FC_MODEL))

## Reconstruct dataset

In [None]:
pred_horizon = pred_horizon_total
print(pred_horizon)

with pt.no_grad():
    inputs, targets = test_windows[0]

    # add batch dimension with unsqueeze(0)
    inputs = inputs.unsqueeze(0).to(device)
    test_reconstr = model(inputs, pred_horizon).squeeze().detach()


test_reconstr = VAE_scaler.scale(E2E_scaler.rescale(test_reconstr))
test_original = VAE_scaler.scale(E2E_scaler.rescale(test[:,:,INPUT_WIDTH:]))
orig_loss = [mse_loss(test_original[:, :, step], test_reconstr[:, :, step]) for step in range(pred_horizon_total)]

#### Plot Full Space Loss

In [None]:
color = '#FFA500'
fig = plt.subplots(1, 1, figsize=config.orig_vs_latent_loss_figsize)
plt.plot(range(1, pred_horizon_total + 1), orig_loss, ls=":", label="full space loss E2E", color=color)
plt.plot(range(1, pred_horizon_total + 1), orig_loss_seq, label="full space loss seq.", color="darkolivegreen")
plt.ylabel("Test MSE")
plt.xlabel("number of autoregressive predictions")
plt.yscale("log")
plt.ylim(config.plot_lims_orig_vs_latent_loss)
plt.legend(loc="upper right")
plt.tight_layout
plt.savefig(join(OUTPUT_PATH, f"E2E_single_origvslatentloss.png"), bbox_inches="tight")

#### Create animations of the reconstruction

In [None]:
SE = (test_original - test_reconstr)**2

fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(6, 2.5))
vmin_cp, vmax_cp = config.plot_lims_cp
vmin_MSE, vmax_MSE = config.plot_lims_MSE_reconstruction
levels_cp = pt.linspace(vmin_cp, vmax_cp, 120)
levels_MSE = pt.linspace(vmin_MSE, vmax_MSE, 120)

def update(frame):
    ax1.clear()
    ax2.clear()
    ax3.clear()
    
    ax1.contourf(xx, yy, test_original[:, :, frame], vmin=vmin_cp, vmax=vmax_cp, levels=levels_cp)
    ax2.contourf(xx, yy, test_reconstr[:, :, frame], vmin=vmin_cp, vmax=vmax_cp, levels=levels_cp)
    cont = ax3.contourf(xx, yy, SE[:, :, frame], vmin=vmin_MSE, vmax=vmax_MSE, levels=levels_MSE)

    ax1.set_title("Ground Truth")
    ax2.set_title("E2E CNN-VAE-LSTM")

    for ax in [ax1, ax2, ax3]:
        ax.set_aspect("equal")
        ax.set_xticklabels([])
        ax.set_yticklabels([])
        ax.set_yticks([])
        ax.set_xticks([])

ani = animation.FuncAnimation(fig, update, frames=SE.shape[2], interval=100)
ani.save(join(OUTPUT_PATH, f"E2E_reconstruction.gif"), writer='pillow')
plt.close(fig)
HTML(ani.to_jshtml())

#### Compare Power Spectra of POD Modes

In [None]:
# flatten original and reconstructed test dataset
test_original = test_original.flatten(0,1)
test_reconstr = test_reconstr.flatten(0,1)

In [None]:
svd_original= SVD(test_original - test_original.mean(dim=1).unsqueeze(-1), rank=1e5)
V_original = svd_original.V

svd_reconstr = SVD(test_reconstr - test_reconstr.mean(dim=1).unsqueeze(-1), rank=1e5)
V_reconstr = svd_reconstr.V

N = test_original.shape[1]
num_modes = 6
sample_rate = 2000          # [Hz]
y_lims = config.plot_lims_power_spectra_single
psd_mse = []

fig, ax = plt.subplots(3, 2, figsize=config.power_sepctra_figsize, sharex=True)
for row in range(3):
    for col in range(2):
        # Calculate the mode index and retrieve mode coefficients
        mode = row * 2 + col                   
        original_mode_coeffs = V_original[:, mode].numpy()
        reconstr_mode_coeffs = V_reconstr[:, mode].numpy()

        # Compute FFT and PSD
        original_fft = fft(original_mode_coeffs)
        original_psd = np.abs(original_fft)**2 / len(original_fft)
        reconstr_fft = fft(reconstr_mode_coeffs)
        reconstr_psd = np.abs(reconstr_fft)**2 / len(reconstr_fft)

        psd_mse.append(mse_loss(pt.from_numpy(original_psd), pt.from_numpy(reconstr_psd)))

        # Frequency values for plotting
        freq = fftfreq(len(original_mode_coeffs), d=1/sample_rate)* config.c_mean  / config.U_inf

        # Use only the positive frequencies (discard negative frequency half)
        freq = freq[:len(freq)//2]
        original_psd = original_psd[:len(original_psd)//2]
        reconstr_psd = reconstr_psd[:len(reconstr_psd)//2]

        # Plot the power spectra
        ax[row, col].semilogy(freq, original_psd, linewidth=0.5, color="black", label="Experimental Data")
        ax[row, col].semilogy(freq, PDE_seq[mode], linewidth=0.7, color="cornflowerblue", linestyle='dashed', label="seq. CNN-VAE-LSTM")
        ax[row, col].semilogy(freq, reconstr_psd, linewidth=1.2, color=color, linestyle='dotted', label="E2E  CNN-VAE-LSTM")
        ax[row, col].set_title(f"Mode Coefficient {mode + 1}")
        ax[row, col].grid()
        ax[row, col].set_yticklabels([])
        ax[row, col].set_yticks([])
        ax[row, col].set_ylim(y_lims)

        
ax[2, 0].set_xlabel(rf"Strouhal number $St$")
ax[2, 1].set_xlabel(rf"Strouhal number $St$")
handles, labels = ax[0, 0].get_legend_handles_labels()
fig.legend(handles, labels, loc="lower center", bbox_to_anchor=(0.5, -0.05), ncol=3) 

plt.xscale("log")

# Adjust spacing between subplots to accommodate the legend
plt.subplots_adjust(bottom=0.5)
fig.tight_layout()
fig.savefig(join(OUTPUT_PATH, f"E2E_single_power_spectra.png"), bbox_inches="tight")

print("MSE is:                  ", sum(psd_mse) / len(psd_mse))