In [1]:
# airfoil_model_torch.py
# PyTorch port of your TF2-compat (custom-BN) architecture.
# - Residual blocks: BN -> LeakyReLU -> Conv, with (2,1) downsample when requested
# - Depth schedule: 16 -> 32 -> 64 -> 128, with [2,2,2,2] blocks
# - Global average pool, Dense(128) + BN + LeakyReLU, Dense(2) + Sigmoid
# - Input expected as (N, n_points, 2)  -> we reshape to (N, 1, n_points, 2)

import torch
import torch.nn as nn
import torch.nn.functional as F

def _same_padding_supported():
    # PyTorch >= 1.10 supports padding='same' for Conv2d; we'll rely on it.
    return True

class BottleResBlock(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size=(4,2), downsample=False, bn_momentum=0.1):
        super().__init__()
        stride = (2,1) if downsample else (1,1)

        # BN -> LReLU -> Conv
        self.bn1 = nn.BatchNorm2d(in_ch, momentum=bn_momentum)
        self.act1 = nn.LeakyReLU(0.2, inplace=True)
        self.conv1 = nn.Conv2d(in_ch, out_ch, kernel_size=kernel_size, stride=stride,
                               padding='same' if _same_padding_supported() else 0, bias=False)

        # BN -> LReLU -> Conv
        self.bn2 = nn.BatchNorm2d(out_ch, momentum=bn_momentum)
        self.act2 = nn.LeakyReLU(0.2, inplace=True)
        self.conv2 = nn.Conv2d(out_ch, out_ch, kernel_size=kernel_size, stride=(1,1),
                               padding='same' if _same_padding_supported() else 0, bias=False)

        # Skip path: 1x1 if stride or channel change
        self.proj = None
        if downsample or in_ch != out_ch:
            self.proj = nn.Conv2d(in_ch, out_ch, kernel_size=1, stride=stride, bias=False)

    def forward(self, x):
        identity = x
        out = self.bn1(x)
        out = self.act1(out)
        out = self.conv1(out)

        out = self.bn2(out)
        out = self.act2(out)
        out = self.conv2(out)

        if self.proj is not None:
            identity = self.proj(identity)

        return out + identity

class AirfoilSurrogate(nn.Module):
    def __init__(self, n_points=64, depth=16, bn_momentum=0.1):
        """
        n_points: number of points along the airfoil curve (height dimension).
        Input shape during forward: (N, 1, n_points, 2)
        """
        super().__init__()
        k = (4, 2)

        # Stem: in_ch=1 -> depth
        self.stem = nn.Conv2d(1, depth, kernel_size=k, stride=1,
                              padding='same' if _same_padding_supported() else 0, bias=False)

        # Residual groups [2,2,2,2] with downsample at group entry (except group0)
        self.group0 = nn.Sequential(
            BottleResBlock(depth, depth, kernel_size=k, downsample=False, bn_momentum=bn_momentum),
            BottleResBlock(depth, depth, kernel_size=k, downsample=False, bn_momentum=bn_momentum),
        )
        self.group1 = nn.Sequential(
            BottleResBlock(depth, depth*2, kernel_size=k, downsample=True,  bn_momentum=bn_momentum),
            BottleResBlock(depth*2, depth*2, kernel_size=k, downsample=False, bn_momentum=bn_momentum),
        )
        self.group2 = nn.Sequential(
            BottleResBlock(depth*2, depth*4, kernel_size=k, downsample=True,  bn_momentum=bn_momentum),
            BottleResBlock(depth*4, depth*4, kernel_size=k, downsample=False, bn_momentum=bn_momentum),
        )
        self.group3 = nn.Sequential(
            BottleResBlock(depth*4, depth*8, kernel_size=k, downsample=True,  bn_momentum=bn_momentum),
            BottleResBlock(depth*8, depth*8, kernel_size=k, downsample=False, bn_momentum=bn_momentum),
        )

        # Tail BN + LeakyReLU
        self.tail_bn  = nn.BatchNorm2d(depth*8, momentum=bn_momentum)
        self.tail_act = nn.LeakyReLU(0.2, inplace=True)

        # Global average pool to (1,1)
        self.gap = nn.AdaptiveAvgPool2d((1,1))

        # Dense head: 128 -> BN -> LeakyReLU -> 2 -> Sigmoid
        self.fc1 = nn.Linear(depth*8, 128, bias=False)
        self.fc1_bn = nn.BatchNorm1d(128, momentum=bn_momentum)
        self.fc1_act = nn.LeakyReLU(0.2, inplace=True)
        self.fc2 = nn.Linear(128, 2)

        self._init_weights()

    def _init_weights(self):
        # He init (fan-in), matching VarianceScaling(scale=2.0, mode='fan_in')
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, a=0.2, mode='fan_in', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, a=0.2, mode='fan_in', nonlinearity='leaky_relu')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, (nn.BatchNorm2d, nn.BatchNorm1d)):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        """
        x: (N, 1, n_points, 2)  # channel-first
        returns: (N, 2) in [0,1] via sigmoid
        """
        x = self.stem(x)       # -> (N, depth, H, W)
        x = self.group0(x)
        x = self.group1(x)
        x = self.group2(x)
        x = self.group3(x)

        x = self.tail_bn(x)
        x = self.tail_act(x)

        x = self.gap(x)        # (N, C, 1, 1)
        x = torch.flatten(x, 1)  # (N, C)

        x = self.fc1(x)
        x = self.fc1_bn(x)
        x = self.fc1_act(x)

        x = self.fc2(x)
        x = torch.sigmoid(x)
        return x


In [2]:
# train_airfoil_surrogate_torch.py
# Training script that mirrors your TF setup, using num_workers=2 and GPU.
import os
import sys
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader, random_split
from torch.utils.tensorboard import SummaryWriter

from airfoil_model_torch import AirfoilSurrogate

# ===================== HYPERPARAMETERS =====================
HYPERPARAMS = {
    # Paths (Kaggle-style)
    "x_path": r"/kaggle/input/data-mpi/xs_train_MPI.npy",
    "y_path": r"/kaggle/input/data-mpi-y/ys_train_MPI.npy",
    "test_x_path": r"/kaggle/input/test-mpi/xs_test_MPI.npy",
    "test_y_path": r"/kaggle/input/test-mpi-y/ys_test_MPI.npy",

    # Geometry
    "n_points": 192,      # must match X.shape[1]

    # Train/val
    "use_separate_test": True,
    "val_split": 0.2,
    "seed": 42,

    # Optimization
    "steps": 10000,
    "batch_size": 256,
    "lr": 1e-4,
    "beta1": 0.5,         # Adam beta1 ~ TF
    "weight_decay": 1e-4, # L2 on weights (excludes BN & biases below)
    "save_interval": 0,

    # Output
    "outdir": "./runs/airfoil_run_torch",

    # DataLoader
    "num_workers": 2,     # as requested
    "pin_memory": True,
}
# ===========================================================

def load_array(path):
    if path.endswith(".npy"):
        return np.load(path, mmap_mode="r")
    if path.endswith(".npz"):
        data = np.load(path)
        for key in ("X", "Y", "arr_0"):
            if key in data:
                return data[key]
        raise ValueError(f"No suitable key in {path}: expected 'X'/'Y'")
    raise ValueError(f"Unsupported format for {path}. Use .npy or .npz")

def ensure_dir(path):
    os.makedirs(path, exist_ok=True)

def preprocess_numpy_to_torch(X):
    """
    Input X: (N, n_points, 2) float32/float64
    Returns torch.FloatTensor shaped (N, 1, n_points, 2) [NCHW]
    """
    if X.ndim != 3 or X.shape[2] != 2:
        raise ValueError(f"X must be (N, n_points, 2); got {X.shape}")
    X = X.astype(np.float32, copy=False)
    X = np.expand_dims(X, axis=1)  # (N, 1, n_points, 2)
    return torch.from_numpy(X)

def main(hp):
    torch.manual_seed(hp["seed"])
    np.random.seed(hp["seed"])

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if torch.cuda.is_available():
        # Optional: select GPU via environment CUDA_VISIBLE_DEVICES externally
        torch.backends.cudnn.benchmark = True

    ensure_dir(hp["outdir"])
    writer = SummaryWriter(log_dir=os.path.join(hp["outdir"], "logs"))

    # ---------- Load data ----------
    X = load_array(hp["x_path"])
    Y = load_array(hp["y_path"])
    if X.shape[1] != hp["n_points"]:
        raise ValueError(f"n_points ({hp['n_points']}) must match X.shape[1] ({X.shape[1]})")

    if hp["use_separate_test"]:
        X_train_np, Y_train_np = X, Y
        X_test_np  = load_array(hp["test_x_path"])
        Y_test_np  = load_array(hp["test_y_path"])
        if X_test_np.shape[1:] != X.shape[1:]:
            raise ValueError(f"Test X shape {X_test_np.shape} incompatible with train X {X.shape}")
    else:
        # simple split
        N = X.shape[0]
        idx = np.arange(N)
        rng = np.random.RandomState(hp["seed"])
        rng.shuffle(idx)
        split = int(N * (1.0 - hp["val_split"]))
        train_idx, test_idx = idx[:split], idx[split:]
        X_train_np, Y_train_np = X[train_idx], Y[train_idx]
        X_test_np,  Y_test_np  = X[test_idx],  Y[test_idx]

    # ---------- Numpy -> Torch ----------
    X_train = preprocess_numpy_to_torch(np.asarray(X_train_np))
    X_test  = preprocess_numpy_to_torch(np.asarray(X_test_np))
    Y_train = torch.from_numpy(np.asarray(Y_train_np, dtype=np.float32))
    Y_test  = torch.from_numpy(np.asarray(Y_test_np,  dtype=np.float32))

    train_ds = TensorDataset(X_train, Y_train)
    test_ds  = TensorDataset(X_test,  Y_test)

    train_loader = DataLoader(
        train_ds, batch_size=hp["batch_size"], shuffle=True,
        num_workers=hp["num_workers"], pin_memory=hp["pin_memory"], drop_last=False
    )
    test_loader = DataLoader(
        test_ds, batch_size=hp["batch_size"], shuffle=False,
        num_workers=hp["num_workers"], pin_memory=hp["pin_memory"], drop_last=False
    )

    # ---------- Model ----------
    model = AirfoilSurrogate(n_points=hp["n_points"], depth=16, bn_momentum=0.1).to(device)

    # L1 loss to mirror TF reduce_mean(|y_true - y_pred|)
    criterion = nn.L1Loss()

    # Weight decay (L2) on conv/linear weights only (not BN, not biases)
    decay, no_decay = [], []
    for name, p in model.named_parameters():
        if not p.requires_grad:
            continue
        if any(nd in name for nd in ["bn", "bias"]):
            no_decay.append(p)
        else:
            decay.append(p)
    optim = torch.optim.Adam(
        [{"params": decay, "weight_decay": hp["weight_decay"]},
         {"params": no_decay, "weight_decay": 0.0}],
        lr=hp["lr"], betas=(hp["beta1"], 0.999)
    )

    # ---------- Training loop (by steps, like TF) ----------
    global_step = 0
    model.train()
    print("Starting training...")
    while global_step < hp["steps"]:
        for xb, yb in train_loader:
            xb = xb.to(device, non_blocking=True)
            yb = yb.to(device, non_blocking=True)

            pred = model(xb)             # (B, 2) in [0,1]
            loss = criterion(pred, yb)

            optim.zero_grad(set_to_none=True)
            loss.backward()
            optim.step()

            # quick eval on test set occasionally (cheap L1)
            if global_step % 100 == 0:
                model.eval()
                with torch.no_grad():
                    test_losses = []
                    for xbt, ybt in test_loader:
                        xbt = xbt.to(device, non_blocking=True)
                        ybt = ybt.to(device, non_blocking=True)
                        predt = model(xbt)
                        test_losses.append(criterion(predt, ybt).item())
                    test_loss = float(np.mean(test_losses)) if test_losses else float("nan")
                model.train()

                print(f"{global_step}: train {loss.item():.6f}  test {test_loss:.6f}")
                writer.add_scalar("loss/train", loss.item(), global_step)
                writer.add_scalar("loss/test",  test_loss,  global_step)

            global_step += 1
            if global_step >= hp["steps"]:
                break

    # ---------- Save checkpoint ----------
    ckpt_path = os.path.join(hp["outdir"], "model.pt")
    torch.save({
        "model_state_dict": model.state_dict(),
        "n_points": hp["n_points"],
        "depth": 16,
    }, ckpt_path)
    print(f"Model saved to: {ckpt_path}")
    writer.close()

    # Sanity prediction
    model.eval()
    with torch.no_grad():
        xsm = X_test[:min(8, len(X_test))].to(device)
        pred = model(xsm).cpu().numpy()
        print("Sample predictions (first 5):")
        print(pred[:5])

if __name__ == "__main__":
    main(HYPERPARAMS)


2025-10-18 16:25:25.598640: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-10-18 16:25:25.632230: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2025-10-18 16:25:25.877682: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


ModuleNotFoundError: No module named 'airfoil_model_torch'