In [1]:
# ============================================================
# BOX 1/3 — Reading files + building X (cropped consistently)
# ============================================================
from __future__ import annotations

from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


# Optional MPI (safe in serial)
try:
    from mpi4py import MPI
    comm = MPI.COMM_WORLD
    rank = comm.rank
except Exception:
    comm = None
    rank = 0

# ----------------------------
# USER SETTINGS
# ----------------------------
TIME_STEP_START = 200
TIME_STEP_END   = 300

PHI      = 0.40
LAT_SIZE = "025"
POST     = True

BASE_DIR  = Path("../isocontours")
VAR_NAME  = "T"
SORT_COLS = ["x", "y"]
COORD_TOL = 0.0

X_THESHOLD = 300  # keep only x > threshold

# ----------------------------
# Helpers
# ----------------------------
def _case_suffix(phi: float, lat_size: str) -> str:
    return f"h400x{lat_size}_ref"

def field_csv_path(base_dir: Path, phi: float, lat_size: str, time_step: int, post: bool) -> Path:
    suffix = _case_suffix(phi, lat_size)
    folder = base_dir / f"phi{phi:.2f}" / suffix
    fname = f"extracted_field_post_{time_step}.csv" if post else f"extracted_field_{time_step}.csv"
    return folder / fname

def read_field_sorted(path: Path, var_name: str, sort_cols: list[str]) -> tuple[np.ndarray, np.ndarray]:
    if not path.exists():
        raise FileNotFoundError(f"Missing file:\n  {path}")
    df = pd.read_csv(path)

    missing = [c for c in (sort_cols + [var_name]) if c not in df.columns]
    if missing:
        raise ValueError(f"{path.name}: missing columns {missing}")

    df = df.replace([np.inf, -np.inf], np.nan).dropna(subset=sort_cols + [var_name])
    df = df.sort_values(sort_cols, kind="mergesort").reset_index(drop=True)

    coords = df[sort_cols].to_numpy(dtype=np.float64)
    values = df[var_name].to_numpy(dtype=np.float64)
    return coords, values

# ----------------------------
# Build X: (n_points_cropped, n_snaps)
# ----------------------------
times = list(range(TIME_STEP_START, TIME_STEP_END + 1))

ref_path = field_csv_path(BASE_DIR, PHI, LAT_SIZE, times[0], POST)
coords_ref_full, snap0_full = read_field_sorted(ref_path, VAR_NAME, SORT_COLS)

x_ref = coords_ref_full[:, 0]
mask_x = x_ref > X_THESHOLD

coords_ref = coords_ref_full[mask_x]
snap0 = snap0_full[mask_x]

n_points = coords_ref.shape[0]
n_snaps = len(times)

if rank == 0:
    print(f"Reference timestep: {times[0]}")
    print(f"X_THESHOLD={X_THESHOLD} -> keeping {n_points}/{coords_ref_full.shape[0]} points")
    print(f"n_points={n_points}, n_snapshots={n_snaps}")
    print(f"Reading: {ref_path}")

snapshots = [snap0]
for t in times[1:]:
    path = field_csv_path(BASE_DIR, PHI, LAT_SIZE, t, POST)
    coords_t_full, snap_t_full = read_field_sorted(path, VAR_NAME, SORT_COLS)

    if coords_t_full.shape[0] != coords_ref_full.shape[0]:
        raise ValueError(
            f"Inconsistent n_points (full) at timestep {t}: {coords_t_full.shape[0]} vs {coords_ref_full.shape[0]}"
        )

    if COORD_TOL == 0.0:
        same = np.array_equal(coords_t_full, coords_ref_full)
    else:
        same = np.allclose(coords_t_full, coords_ref_full, atol=COORD_TOL, rtol=0.0)

    if not same:
        raise ValueError(
            f"Coordinates mismatch at timestep {t}.\n"
            f"Set COORD_TOL>0 or interpolate/regrid."
        )

    snap_t = snap_t_full[mask_x]
    if snap_t.shape[0] != n_points:
        raise RuntimeError("Masking produced inconsistent point count. Check X_THESHOLD and sorting.")

    snapshots.append(snap_t)

X = np.stack(snapshots, axis=1).astype(np.float64)  # (n_points, n_snaps)


Reference timestep: 200
X_THESHOLD=300 -> keeping 255744/839680 points
n_points=255744, n_snapshots=101
Reading: ..\isocontours\phi0.40\h400x025_ref\extracted_field_post_200.csv


In [2]:
# ============================================================
# BOX 2/3 — Running DeepKoopman (AE + linear latent operator)
# ============================================================
import torch
import torch.nn as nn
import torch.optim as optim

# ----------------------------
# Deep Koopman settings
# ----------------------------
DEVICE = "cuda"
DT = 1.0

USE_PCA = True
PCA_DIM = 24

Z_DIM = 16
ENC_HIDDEN = (256,)
DEC_HIDDEN = (256,)

EPOCHS = 3000
BATCHES_PER_EPOCH = 50
BATCH_SIZE = 16
ROLLOUT_LEN = 10

LR = 1e-3
WEIGHT_DECAY = 1e-7

W_RECON = 1.0
W_PRED  = 1.0
W_LAT   = 0.1
W_STAB  = 1e-3

SEED = 0
np.random.seed(SEED)
torch.manual_seed(SEED)

if DEVICE == "cuda" and not torch.cuda.is_available():
    DEVICE = "cpu"
device = torch.device(DEVICE)

if rank == 0:
    print("Using device:", device)
    if device.type == "cuda":
        print("GPU:", torch.cuda.get_device_name(0))

# Arrange as (T, n_points)
X_seq = X.T.astype(np.float32)  # (T, n_points)

# Normalize per spatial DOF over time
X_mean = X_seq.mean(axis=0, keepdims=True)
X_std  = X_seq.std(axis=0, keepdims=True) + 1e-6
Xn_seq = (X_seq - X_mean) / X_std

# Optional PCA
if USE_PCA:
    from sklearn.decomposition import PCA
    pca_ncomp = int(min(PCA_DIM, Xn_seq.shape[0], Xn_seq.shape[1]))
    if pca_ncomp < 1:
        raise ValueError("PCA_DIM too small after min(PCA_DIM, n_snaps, n_points).")
    pca = PCA(n_components=pca_ncomp, svd_solver="randomized", random_state=SEED)
    Xp_seq = pca.fit_transform(Xn_seq)  # (T, pca_dim)
    if rank == 0:
        evr = float(np.sum(pca.explained_variance_ratio_))
        print(f"PCA: n_components={pca_ncomp}, explained_var_sum={evr:.4f}")
else:
    pca = None
    Xp_seq = Xn_seq

x_dim = Xp_seq.shape[1]
T_total = Xp_seq.shape[0]
if ROLLOUT_LEN >= T_total:
    raise ValueError(f"ROLLOUT_LEN={ROLLOUT_LEN} must be < number of snapshots T={T_total}.")

Xp_torch = torch.from_numpy(Xp_seq).to(device)

def build_mlp(in_dim: int, hidden: tuple[int, ...], out_dim: int) -> nn.Sequential:
    layers: list[nn.Module] = []
    prev = in_dim
    for h in hidden:
        layers.append(nn.Linear(prev, h))
        layers.append(nn.Tanh())
        prev = h
    layers.append(nn.Linear(prev, out_dim))
    return nn.Sequential(*layers)

class DeepKoopman(nn.Module):
    def __init__(self, x_dim: int, z_dim: int, enc_hidden: tuple[int, ...], dec_hidden: tuple[int, ...]) -> None:
        super().__init__()
        self.enc = build_mlp(x_dim, enc_hidden, z_dim)
        self.dec = build_mlp(z_dim, dec_hidden, x_dim)
        self.K = nn.Parameter(torch.eye(z_dim))  # linear Koopman operator

    def encode(self, x: torch.Tensor) -> torch.Tensor:
        return self.enc(x)

    def decode(self, z: torch.Tensor) -> torch.Tensor:
        return self.dec(z)

    def step_latent(self, z: torch.Tensor) -> torch.Tensor:
        return z @ self.K.T

def spectral_radius(K: torch.Tensor) -> torch.Tensor:
    eigvals = torch.linalg.eigvals(K)
    return torch.max(torch.abs(eigvals)).real

def sample_batch_indices(T: int, rollout_len: int, batch_size: int) -> list[int]:
    max_start = T - (rollout_len + 1)
    if max_start < 0:
        raise ValueError(f"Not enough snapshots: T={T}, rollout_len={rollout_len}")
    return np.random.randint(0, max_start + 1, size=batch_size).tolist()

def make_batch(Xp_seq_torch: torch.Tensor, idx0: list[int], rollout_len: int) -> torch.Tensor:
    seqs = [Xp_seq_torch[i : i + rollout_len + 1] for i in idx0]
    return torch.stack(seqs, dim=0)  # (B, L+1, x_dim)

model = DeepKoopman(x_dim=x_dim, z_dim=Z_DIM, enc_hidden=ENC_HIDDEN, dec_hidden=DEC_HIDDEN).to(device)
opt = optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
mse = nn.MSELoss()

if rank == 0:
    print(f"Training DeepKoopman: x_dim={x_dim}, z_dim={Z_DIM}, T={T_total}, rollout_len={ROLLOUT_LEN}")

for epoch in range(1, EPOCHS + 1):
    model.train()
    losses = []

    for _ in range(BATCHES_PER_EPOCH):
        idx0 = sample_batch_indices(T_total, ROLLOUT_LEN, batch_size=BATCH_SIZE)
        Xb = make_batch(Xp_torch, idx0, ROLLOUT_LEN)  # (B, L+1, x_dim)

        B, Lp1, _ = Xb.shape
        L = Lp1 - 1

        z_true = model.encode(Xb.reshape(B * (L + 1), x_dim)).reshape(B, L + 1, Z_DIM)

        # Recon
        X_recon = model.decode(z_true.reshape(B * (L + 1), Z_DIM)).reshape(B, L + 1, x_dim)
        loss_recon = mse(X_recon, Xb)

        # Latent rollout from z0
        z_list = [z_true[:, 0, :]]
        for _k in range(L):
            z_list.append(model.step_latent(z_list[-1]))
        z_pred = torch.stack(z_list, dim=1)  # (B, L+1, Z_DIM)

        X_pred = model.decode(z_pred.reshape(B * (L + 1), Z_DIM)).reshape(B, L + 1, x_dim)

        loss_pred = mse(X_pred[:, 1:, :], Xb[:, 1:, :])
        loss_lat  = mse(z_pred[:, 1:, :], z_true[:, 1:, :])

        rho = spectral_radius(model.K)
        loss_stab = torch.relu(rho - 1.0) ** 2

        loss = W_RECON * loss_recon + W_PRED * loss_pred + W_LAT * loss_lat + W_STAB * loss_stab

        opt.zero_grad(set_to_none=True)
        loss.backward()
        opt.step()

        losses.append(loss.item())

    if rank == 0 and (epoch == 1 or epoch % 100 == 0):
        avg = float(np.mean(losses)) if losses else float("nan")
        with torch.no_grad():
            rho_val = float(spectral_radius(model.K).cpu().item())
        print(f"Epoch {epoch:5d}/{EPOCHS} | loss={avg:.6e} | rho(K)={rho_val:.6f}")

# ----------------------------
# One-step forecast at next timestep
# ----------------------------
model.eval()
t_next = times[-1] + 1

with torch.no_grad():
    x_last = Xp_torch[-1, :].unsqueeze(0)  # (1, x_dim)
    z_last = model.encode(x_last)          # (1, z_dim)
    z_next = model.step_latent(z_last)     # (1, z_dim)
    x_next_p = model.decode(z_next).cpu().numpy()[0]  # (x_dim,)

# Undo PCA + normalization
if USE_PCA:
    x_next_norm = pca.inverse_transform(x_next_p.reshape(1, -1))[0]
else:
    x_next_norm = x_next_p

x_next_pred = (x_next_norm * X_std.reshape(-1) + X_mean.reshape(-1)).astype(np.float64)  # (n_points,)

# Save prediction
suffix = _case_suffix(PHI, LAT_SIZE)
out_dir = BASE_DIR / f"phi{PHI:.2f}" / suffix
out_dir.mkdir(parents=True, exist_ok=True)

out = pd.DataFrame(coords_ref, columns=SORT_COLS)
out[f"{VAR_NAME}_pred"] = x_next_pred
out_path = out_dir / f"deepkoopman_pred_{VAR_NAME}_{t_next}_xgt{int(X_THESHOLD)}.csv"
out.to_csv(out_path, index=False)

if rank == 0:
    print("Wrote:", out_path)


Using device: cuda
GPU: NVIDIA GeForce RTX 3050 4GB Laptop GPU
PCA: n_components=24, explained_var_sum=0.9407
Training DeepKoopman: x_dim=24, z_dim=16, T=101, rollout_len=10
Epoch     1/3000 | loss=1.405581e+04 | rho(K)=1.133281
Epoch   100/3000 | loss=4.243264e+02 | rho(K)=1.091027
Epoch   200/3000 | loss=4.082211e+01 | rho(K)=1.055555
Epoch   300/3000 | loss=9.759146e+00 | rho(K)=1.045329
Epoch   400/3000 | loss=1.507081e+00 | rho(K)=1.034297
Epoch   500/3000 | loss=9.628716e-01 | rho(K)=1.030237
Epoch   600/3000 | loss=1.960929e+00 | rho(K)=1.031726
Epoch   700/3000 | loss=4.517496e-01 | rho(K)=1.027587
Epoch   800/3000 | loss=6.893076e-01 | rho(K)=1.026291
Epoch   900/3000 | loss=3.002577e-01 | rho(K)=1.025802
Epoch  1000/3000 | loss=3.380327e-01 | rho(K)=1.027940
Epoch  1100/3000 | loss=2.411925e-01 | rho(K)=1.026197
Epoch  1200/3000 | loss=2.979660e-01 | rho(K)=1.026547
Epoch  1300/3000 | loss=2.179507e-01 | rho(K)=1.024914
Epoch  1400/3000 | loss=7.548174e-01 | rho(K)=1.022635
E

In [6]:
# ============================================================
# BOX 3/3 — Plotting + comparison + error maps + saving (NO TITLES)
#   Saves:
#     prediction.png, true.png, predvstruth.png, error.png
# ============================================================
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.tri as mtri

# ---------- where to save figures ----------
FIG_DIR = Path(r"C:\Users\alexp\Documents\Bachelor Thesis\report_figures\results\Koopman")
FIG_DIR.mkdir(parents=True, exist_ok=True)

# ---------- build triangulation from coords_ref ----------
x_xy = coords_ref[:, 0].astype(float)
y_xy = coords_ref[:, 1].astype(float)
triang = mtri.Triangulation(x_xy, y_xy)
try:
    analyzer = mtri.TriAnalyzer(triang)
    triang.set_mask(analyzer.get_flat_tri_mask(min_circle_ratio=0.02))
except Exception:
    pass

def save_tricontour_field(vals, fname, cbar_label, vmin=None, vmax=None, dpi=250):
    fig = plt.figure(figsize=(7.2, 5.8))
    ax = fig.add_subplot(111)

    cf = ax.tricontourf(triang, vals, levels=60, vmin=vmin, vmax=vmax)
    ax.set_aspect("equal", adjustable="box")
    ax.set_xlabel("x")
    ax.set_ylabel("y")
    # no title

    cbar = fig.colorbar(cf, ax=ax, orientation="horizontal", pad=0.08, fraction=0.06)
    cbar.set_label(cbar_label)

    fig.tight_layout()
    outpath = FIG_DIR / fname
    fig.savefig(outpath, dpi=dpi, bbox_inches="tight")
    plt.close(fig)
    print("Saved:", outpath)

def save_scatter_true_vs_pred(true_vals, pred_vals, fname="predvstruth.png", dpi=250):
    fig = plt.figure(figsize=(5, 5))
    ax = fig.add_subplot(111)

    ax.scatter(true_vals, pred_vals, s=2)
    lo = float(min(true_vals.min(), pred_vals.min()))
    hi = float(max(true_vals.max(), pred_vals.max()))
    ax.plot([lo, hi], [lo, hi], linestyle="--")

    ax.set_xlabel(f"{VAR_NAME}_true")
    ax.set_ylabel(f"{VAR_NAME}_pred")
    # no title
    ax.grid(True, alpha=0.3)

    fig.tight_layout()
    outpath = FIG_DIR / fname
    fig.savefig(outpath, dpi=dpi, bbox_inches="tight")
    plt.close(fig)
    print("Saved:", outpath)

# ---------- always save prediction ----------
vmin_pred = float(np.percentile(x_next_pred, 0.5))
vmax_pred = float(np.percentile(x_next_pred, 99.5))
save_tricontour_field(
    x_next_pred,
    fname="prediction.png",
    cbar_label=f"{VAR_NAME} (pred)",
    vmin=vmin_pred,
    vmax=vmax_pred,
)

# ---------- load TRUE (if exists), then save true + scatter + abs error ----------
path_true = field_csv_path(BASE_DIR, PHI, LAT_SIZE, t_next, POST)

if path_true.exists():
    coords_true_full, snap_true_full = read_field_sorted(path_true, VAR_NAME, SORT_COLS)

    # Full coordinate check first
    if coords_true_full.shape[0] != coords_ref_full.shape[0]:
        raise ValueError(
            f"True next-step has different full point count: {coords_true_full.shape[0]} vs {coords_ref_full.shape[0]}"
        )

    if COORD_TOL == 0.0:
        same = np.array_equal(coords_true_full, coords_ref_full)
    else:
        same = np.allclose(coords_true_full, coords_ref_full, atol=COORD_TOL, rtol=0.0)

    if not same:
        raise ValueError("True next-step coordinates do not match reference coordinates.")

    snap_true = snap_true_full[mask_x].astype(np.float64)

    # metrics
    err = x_next_pred - snap_true
    abs_err = np.abs(err)
    rmse = float(np.sqrt(np.mean(err**2)))
    rel_l2 = float(np.linalg.norm(err) / (np.linalg.norm(snap_true) + 1e-12))
    print(f"Next-step compare at t={t_next}: RMSE={rmse:.6e}, relL2={rel_l2:.6e}")

    # save error CSV (optional)
    out_err = pd.DataFrame(coords_ref, columns=SORT_COLS)
    out_err[f"{VAR_NAME}_true"] = snap_true
    out_err[f"{VAR_NAME}_pred"] = x_next_pred
    out_err["err"] = err
    out_err["abs_err"] = abs_err
    err_path = out_dir / f"deepkoopman_err_{VAR_NAME}_{t_next}_xgt{int(X_THESHOLD)}.csv"
    out_err.to_csv(err_path, index=False)
    print("Wrote:", err_path)

    # shared color scale for true/pred visuals
    vmin_shared = float(min(np.percentile(snap_true, 0.5), np.percentile(x_next_pred, 0.5)))
    vmax_shared = float(max(np.percentile(snap_true, 99.5), np.percentile(x_next_pred, 99.5)))

    save_tricontour_field(
        snap_true,
        fname="true.png",
        cbar_label=f"{VAR_NAME} (true)",
        vmin=vmin_shared,
        vmax=vmax_shared,
    )

    save_scatter_true_vs_pred(
        true_vals=snap_true,
        pred_vals=x_next_pred,
        fname="predvstruth.png",
    )

    vmax_abs = float(np.percentile(abs_err, 99.0)) + 1e-30
    save_tricontour_field(
        abs_err,
        fname="error.png",
        cbar_label="Absolute error",
        vmin=0.0,
        vmax=vmax_abs,
    )

else:
    print("True next-step file does not exist; only prediction.png was saved.")


Saved: C:\Users\alexp\Documents\Bachelor Thesis\report_figures\results\Koopman\prediction.png
Next-step compare at t=301: RMSE=4.825157e-01, relL2=1.170921e-01
Wrote: ..\isocontours\phi0.40\h400x025_ref\deepkoopman_err_T_301_xgt300.csv
Saved: C:\Users\alexp\Documents\Bachelor Thesis\report_figures\results\Koopman\true.png
Saved: C:\Users\alexp\Documents\Bachelor Thesis\report_figures\results\Koopman\predvstruth.png
Saved: C:\Users\alexp\Documents\Bachelor Thesis\report_figures\results\Koopman\error.png
