In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import griddata
import torch
import torch.nn as nn
import torch.autograd as autograd
import torch.optim as optim
import math
import random

In [2]:
nu = 5e-4            
rho = 1
U_inlet = 1.0

cyl_center = (0.5, 0.5)
cyl_radius = 0.05

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [3]:
import torch
import torch.nn as nn


class ResNetPINN(nn.Module):
    def __init__(self, layers=[3] + [128] * 20 + [3]):
        super().__init__()

        # Input layer
        self.input_layer = nn.Linear(layers[0], layers[1])

        # Hidden layers (residual blocks)
        self.hidden_layers = nn.ModuleList()
        for i in range(1, len(layers) - 2):
            self.hidden_layers.append(nn.Linear(layers[i], layers[i + 1]))

        # Output layer
        self.output_layer = nn.Linear(layers[-2], layers[-1])

        # Activation function
        self.activation = nn.Tanh()

        # Xavier initialization
        for m in self.modules():
            if isinstance(m, nn.Linear):
                nn.init.xavier_normal_(m.weight)
                nn.init.zeros_(m.bias)

    def forward(self, x):
        # Input layer
        h = self.activation(self.input_layer(x))

        # Residual connections through hidden layers
        for layer in self.hidden_layers:
            h_in = h
            h = self.activation(layer(h))

            # Add residual connection (only if dimensions match)
            if h.shape == h_in.shape:
                h = h + h_in

        # Output layer (no activation)
        out = self.output_layer(h)

        return out  # [u, v, p]


In [4]:
data_dir = "/kaggle/input/cfd-flow-pass-a-cylinder-0-01"
t_start = 0
t_end = 501
dt = 0.01

xyt_list = []
uvp_list = []

for i in range(t_start, t_end):
    csv_path = os.path.join(data_dir, f"result_{i}.csv")
    df = pd.read_csv(csv_path)

    # timestep
    t_val = i * dt
    t_column = np.full_like(df["Points:0"].values, fill_value=t_val, dtype=np.float32)

    # (x, y, t)
    xyt = np.stack([
        df["Points:0"].values,
        df["Points:1"].values,
        t_column
    ], axis=1)  

    # (u, v, p)
    uvp = np.stack([
        df["u:0"].values,
        df["u:1"].values,
        df["p"].values
    ], axis=1)  

    xyt_list.append(xyt)
    uvp_list.append(uvp)

xyt_tensor = torch.tensor(np.concatenate(xyt_list, axis=0), dtype=torch.float32)
uvp_tensor = torch.tensor(np.concatenate(uvp_list, axis=0), dtype=torch.float32)

print(xyt_tensor.shape)
print(uvp_tensor.shape)

torch.Size([20806530, 3])
torch.Size([20806530, 3])


In [5]:
time_col = xyt_tensor[:, 2]

# Mask for t = 0
mask_ic = (time_col == 0.0)

# Apply mask
xyt_tensor_ic = xyt_tensor[mask_ic]
uvp_tensor_ic = uvp_tensor[mask_ic]

print(xyt_tensor_ic.shape)
print(uvp_tensor_ic.shape)

torch.Size([41530, 3])
torch.Size([41530, 3])


In [6]:
def sample_points(inputs,
                  outputs,
                  N_per_timestep,
                  seed=None,
                  device="cuda"):

    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    obstacle_center=(0.5, 0.5)
    obstacle_radius=0.05

    # Refinement bounds and percents
    refine_1 = ((0.35, 0.65), (0.35, 0.65))
    percent_1 = 45

    refine_2 = ((0.65, 2.5), (0.35, 0.65))
    percent_2 = 35

    percent_3 = 100 - (percent_1 + percent_2)

    # Convert to numpy
    pts_np = inputs.cpu().numpy()
    uvp_np = outputs.cpu().numpy()

    # Mask out obstacle region
    dx = pts_np[:, 0] - obstacle_center[0]
    dy = pts_np[:, 1] - obstacle_center[1]
    mask = dx**2 + dy**2 >= obstacle_radius**2
    pts_np = pts_np[mask]
    uvp_np = uvp_np[mask]

    # Timesteps
    timesteps = np.unique(pts_np[:, 2])
    timesteps = np.random.choice(timesteps, size=20, replace=False)

    pts_final_list, uvp_final_list = [], []

    for t in timesteps:
        mask_t = np.abs(pts_np[:, 2] - t) < 1e-12
        pts_t, uvp_t = pts_np[mask_t], uvp_np[mask_t]

        # --- region 1
        mask_r1 = (
            (pts_t[:, 0] >= refine_1[0][0]) & (pts_t[:, 0] <= refine_1[0][1]) &
            (pts_t[:, 1] >= refine_1[1][0]) & (pts_t[:, 1] <= refine_1[1][1])
        )
        pts_r1, uvp_r1 = pts_t[mask_r1], uvp_t[mask_r1]

        # --- region 2
        mask_r2 = (
            (pts_t[:, 0] >= refine_2[0][0]) & (pts_t[:, 0] <= refine_2[0][1]) &
            (pts_t[:, 1] >= refine_2[1][0]) & (pts_t[:, 1] <= refine_2[1][1])
        )
        pts_r2, uvp_r2 = pts_t[mask_r2], uvp_t[mask_r2]

        # --- region 3 = rest
        mask_r3 = ~(mask_r1 | mask_r2)
        pts_r3, uvp_r3 = pts_t[mask_r3], uvp_t[mask_r3]

        # How many per region
        N1 = int(N_per_timestep * percent_1 / 100.0)
        N2 = int(N_per_timestep * percent_2 / 100.0)
        N3 = N_per_timestep - N1 - N2

        def sample_region(pts_region, uvp_region, N):
            if len(pts_region) == 0:  # if no points, fallback to random global
                return np.empty((0, 3)), np.empty((0, 3))
            if len(pts_region) < N:
                idx = np.random.choice(len(pts_region), size=N, replace=True)
            else:
                idx = np.random.choice(len(pts_region), size=N, replace=False)
            return pts_region[idx], uvp_region[idx]

        # Sample each region
        sp1, uv1 = sample_region(pts_r1, uvp_r1, N1)
        sp2, uv2 = sample_region(pts_r2, uvp_r2, N2)
        sp3, uv3 = sample_region(pts_r3, uvp_r3, N3)

        pts_final_list.append(np.vstack([sp1, sp2, sp3]))
        uvp_final_list.append(np.vstack([uv1, uv2, uv3]))

    # Concatenate
    pts_final = np.vstack(pts_final_list)
    uvp_final = np.vstack(uvp_final_list)

    # Shuffle
    perm = np.random.permutation(len(pts_final))
    pts_final = pts_final[perm]
    uvp_final = uvp_final[perm]

    # Torch tensors
    points = torch.tensor(pts_final, dtype=torch.float32, device=device)
    uvp = torch.tensor(uvp_final, dtype=torch.float32, device=device)
    u = uvp[:, 0:1]
    v = uvp[:, 1:2]
    p = uvp[:, 2:3]

    return points, u, v, p

In [7]:
import torch
import numpy as np

def sample_collocation_points(N_per_timestep,
                                   seed=None,
                                   device="cuda"):


    total_time=5.0
    dt=0.01
    n_timesteps_per_iter=20

    
    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    # --- Domain and geometry ---
    domain_x, domain_y = (0, 2.5), (0, 1)
    obstacle_center = (0.5, 0.5)
    obstacle_radius2 = 0.05 ** 2

    # --- Refinement regions ---
    refine_1 = ((0.35, 0.65), (0.35, 0.65))
    refine_2 = ((0.65, 2.5), (0.35, 0.65))
    w1, w2 = 0.40, 0.35   # weights
    w3 = 1.0 - (w1 + w2)

    timesteps = np.arange(0, total_time + dt, dt)
    # chosen_timesteps = np.random.choice(timesteps, size=n_timesteps_per_iter, replace=False)
    chosen_timesteps = np.random.choice(timesteps, size=n_timesteps_per_iter, replace=False)

    # --- Vectorized sampling ---
    # Total number of samples overall
    total_pts = N_per_timestep * n_timesteps_per_iter

    # Assign which region each point belongs to
    regions = np.random.choice(3, size=total_pts, p=[w1, w2, w3])

    # Preallocate
    x = np.empty(total_pts, dtype=np.float32)
    y = np.empty(total_pts, dtype=np.float32)
    t = np.repeat(chosen_timesteps, N_per_timestep).astype(np.float32)

    # Region 1
    mask1 = regions == 0
    n1 = mask1.sum()
    x[mask1] = np.random.uniform(*refine_1[0], n1)
    y[mask1] = np.random.uniform(*refine_1[1], n1)

    # Region 2
    mask2 = regions == 1
    n2 = mask2.sum()
    x[mask2] = np.random.uniform(*refine_2[0], n2)
    y[mask2] = np.random.uniform(*refine_2[1], n2)

    # Region 3 (rest of domain)
    mask3 = regions == 2
    n3 = mask3.sum()
    x[mask3] = np.random.uniform(*domain_x, n3)
    y[mask3] = np.random.uniform(*domain_y, n3)

    # --- Remove obstacle points (vectorized) ---
    dx = x - obstacle_center[0]
    dy = y - obstacle_center[1]
    mask_keep = dx**2 + dy**2 >= obstacle_radius2

    x, y, t = x[mask_keep], y[mask_keep], t[mask_keep]

    # If too few points (after obstacle removal), resample fast (no loops)
    n_needed = total_pts - len(x)
    if n_needed > 0:
        xr = np.random.uniform(*refine_2[0], n_needed)
        yr = np.random.uniform(*refine_2[1], n_needed)
        tr = np.random.choice(chosen_timesteps, size=n_needed)
        dxr = xr - obstacle_center[0]
        dyr = yr - obstacle_center[1]
        keep = dxr**2 + dyr**2 >= obstacle_radius2
        x = np.concatenate([x, xr[keep]])
        y = np.concatenate([y, yr[keep]])
        t = np.concatenate([t, tr[keep]])
        x, y, t = x[:total_pts], y[:total_pts], t[:total_pts]

    pts_np = np.stack([x, y, t], axis=1)
    np.random.shuffle(pts_np)

    return torch.tensor(pts_np, dtype=torch.float32, device=device)


In [8]:
def sample_inlet(N_per_timestep,
                 seed=None,
                 device="cuda"):

    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    total_time = 5.0
    dt = 0.01
    y_bounds = (0, 1)
    x_in = 0.0

    timesteps = np.arange(0, total_time + dt, dt)  # all time steps

    all_points = []

    for t in timesteps:
        y = np.random.uniform(y_bounds[0], y_bounds[1], N_per_timestep)
        x = np.full(N_per_timestep, x_in)
        t_vals = np.full(N_per_timestep, t)
        pts_np = np.stack([x, y, t_vals], axis=1)
        all_points.append(pts_np)

    all_points = np.vstack(all_points)

    # Shuffle
    np.random.shuffle(all_points)

    # Convert to torch
    points = torch.tensor(all_points, dtype=torch.float32, device=device)

    return points


In [9]:
def sample_cylinder_surface(N_per_timestep,
                            seed=None,
                            device="cuda"):

    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    total_time = 5.0
    dt = 0.01
    cyl_center = (0.5, 0.5)
    cyl_radius = 0.05

    timesteps = np.arange(0, total_time + dt, dt)
    all_points = []
    cx, cy = cyl_center

    for t in timesteps:
        # Sample angles uniformly around the circle
        theta = np.random.uniform(0, 2*np.pi, N_per_timestep)
        x = cx + cyl_radius * np.cos(theta)
        y = cy + cyl_radius * np.sin(theta)
        t_vals = np.full(N_per_timestep, t)

        pts_np = np.stack([x, y, t_vals], axis=1)
        all_points.append(pts_np)

    all_points = np.vstack(all_points)

    # Shuffle
    np.random.shuffle(all_points)

    # Convert to torch
    points = torch.tensor(all_points, dtype=torch.float32, device=device)

    return points


In [10]:
def sample_top_bottom(N_per_timestep,
                      seed=None,
                      device="cpu"):

    if seed is not None:
        torch.manual_seed(seed)
        np.random.seed(seed)

    total_time = 5.0
    dt = 0.01
    x_bounds = (0, 2.5)
    y_top = 1.0
    y_bot = 0.0

    timesteps = np.arange(0, total_time + dt, dt)

    # Split evenly between top and bottom
    N_top = N_per_timestep // 2
    N_bot = N_per_timestep - N_top  # handle odd numbers

    all_points = []

    for t in timesteps:
        # Top wall
        x_top = np.random.uniform(x_bounds[0], x_bounds[1], N_top)
        y_top_arr = np.full(N_top, y_top)
        t_top = np.full(N_top, t)
        pts_top = np.stack([x_top, y_top_arr, t_top], axis=1)

        # Bottom wall
        x_bot = np.random.uniform(x_bounds[0], x_bounds[1], N_bot)
        y_bot_arr = np.full(N_bot, y_bot)
        t_bot = np.full(N_bot, t)
        pts_bot = np.stack([x_bot, y_bot_arr, t_bot], axis=1)

        all_points.append(pts_top)
        all_points.append(pts_bot)

    all_points = np.vstack(all_points)

    # Shuffle
    np.random.shuffle(all_points)

    # Convert to torch
    points = torch.tensor(all_points, dtype=torch.float32, device=device)

    return points


In [11]:
def sample_initial(N,
                   xyt_tensor_ic, 
                   uvp_tensor_ic,
                   device="cuda"):


    # Cylinder info
    cyl_center = (0.5, 0.5)
    cyl_radius = 0.05

    # Hardcoded bounds
    bound1 = ((0.35, 0.65), (0.35, 0.65))  # region 1 (around cylinder)
    bound2 = ((0.65, 2.5), (0.35, 0.65))   # region 2 (wake)
    
    # Hardcoded percentages
    perc_r1, perc_r2, perc_r3 = 0.5, 0.3, 0.2

    N_r1 = int(N * perc_r1)
    N_r2 = int(N * perc_r2)
    N_r3 = N - N_r1 - N_r2  # ensure exact total

    # Unpack coordinates
    x = xyt_tensor_ic[:, 0]
    y = xyt_tensor_ic[:, 1]

    # Mask out cylinder
    cx, cy = cyl_center
    dx, dy = x - cx, y - cy
    mask_cyl = (dx**2 + dy**2) >= cyl_radius**2
    xyt_valid = xyt_tensor_ic[mask_cyl]
    uvp_valid = uvp_tensor_ic[mask_cyl]

    # --- Region 1 (refinement box around cylinder) ---
    mask_r1 = ((xyt_valid[:, 0] >= bound1[0][0]) & (xyt_valid[:, 0] <= bound1[0][1]) &
               (xyt_valid[:, 1] >= bound1[1][0]) & (xyt_valid[:, 1] <= bound1[1][1]))

    # --- Region 2 (wake region) ---
    mask_r2 = ((xyt_valid[:, 0] >= bound2[0][0]) & (xyt_valid[:, 0] <= bound2[0][1]) &
               (xyt_valid[:, 1] >= bound2[1][0]) & (xyt_valid[:, 1] <= bound2[1][1]))

    # --- Region 3 = rest ---
    mask_r3 = ~(mask_r1 | mask_r2)

    # Helper: sample points from region
    def sample_region(mask, N):
        xyt_reg = xyt_valid[mask]
        uvp_reg = uvp_valid[mask]
        if len(xyt_reg) == 0:
            return None, None
        replace = xyt_reg.shape[0] < N
        idx = torch.randint(0, xyt_reg.shape[0], (N,), device="cpu", dtype=torch.long) if replace else \
                torch.randperm(xyt_reg.shape[0])[:N]

        return xyt_reg[idx].to(device), uvp_reg[idx].to(device)

    # Sample each region
    xyt_r1, uvp_r1 = sample_region(mask_r1, N_r1)
    xyt_r2, uvp_r2 = sample_region(mask_r2, N_r2)
    xyt_r3, uvp_r3 = sample_region(mask_r3, N_r3)

    # Concatenate
    sampled_xyt = torch.cat([t for t in [xyt_r1, xyt_r2, xyt_r3] if t is not None], dim=0)
    sampled_uvp = torch.cat([t for t in [uvp_r1, uvp_r2, uvp_r3] if t is not None], dim=0)

    return sampled_xyt, sampled_uvp


In [12]:
def compute_residuals(model, X):

    nu = 5e-4            
    rho = 1

    X = X.clone().detach().requires_grad_(True)
    out = model(X)
    u = out[:,0:1]
    v = out[:,1:2]
    p = out[:,2:3]

    grads_u = torch.autograd.grad(u, X, grad_outputs=torch.ones_like(u), create_graph=True)[0]
    u_x = grads_u[:,0:1]
    u_y = grads_u[:,1:2]
    u_t = grads_u[:,2:3]

    grads_v = torch.autograd.grad(v, X, grad_outputs=torch.ones_like(v), create_graph=True)[0]
    v_x = grads_v[:,0:1]
    v_y = grads_v[:,1:2]
    v_t = grads_v[:,2:3]

    grads_p = torch.autograd.grad(p, X, grad_outputs=torch.ones_like(p), create_graph=True)[0]
    p_x = grads_p[:,0:1]
    p_y = grads_p[:,1:2]

    # second derivatives
    grads_ux = torch.autograd.grad(grads_u, X, torch.ones_like(grads_u), create_graph=True)[0]
    grads_vx = torch.autograd.grad(grads_v, X, torch.ones_like(grads_v), create_graph=True)[0]
    u_xx, u_yy = grads_ux[:,0:1], grads_ux[:,1:2]
    v_xx, v_yy = grads_vx[:,0:1], grads_vx[:,1:2]

    # continuity
    cont = u_x + v_y

    # momentum equations
    mom_u = u_t + (u * u_x + v * u_y) + (1/rho) * p_x - nu * (u_xx + u_yy)
    mom_v = v_t + (u * v_x + v * v_y) + (1/rho) * p_y - nu * (v_xx + v_yy)


    return cont, mom_u, mom_v

In [13]:
def data_loss(model, pts, u, v, p, device):

    # Model prediction
    pred = model(pts)
    u_pred, v_pred, p_pred = pred[:,0:1], pred[:,1:2], pred[:,2:3]

    mse = nn.MSELoss()
    return mse(u_pred, u) + mse(v_pred, v) + mse(p_pred, p)


In [14]:
os.makedirs('models', exist_ok=True)
Nf, Nf_data, Nic, Nd, Nn, Ni = 40000, 10000, 50000, 60, 700, 150

In [15]:
def compute_residuals_in_batches(model, X, batch_size=10000):
    """
    Compute PDE residuals in mini-batches to avoid CUDA OOM.
    """
    cont_list, mu_list, mv_list = [], [], []
    n = X.size(0)

    for i in range(0, n, batch_size):
        X_batch = X[i:i+batch_size].clone().detach().requires_grad_(True)
        cont, mu, mv = compute_residuals(model, X_batch)

        cont_list.append(cont.detach())
        mu_list.append(mu.detach())
        mv_list.append(mv.detach())

        # free up memory
        del cont, mu, mv, X_batch
        torch.cuda.empty_cache()

    # concatenate results back
    return (torch.cat(cont_list, dim=0),
            torch.cat(mu_list, dim=0),
            torch.cat(mv_list, dim=0))

In [16]:
device = torch.device("cuda")
model = ResNetPINN().to(device)

In [17]:
import torch
import torch.optim as optim
import torch.nn as nn
import os
from torch.cuda.amp import autocast, GradScaler


mse_loss = nn.MSELoss()
mae_loss = nn.L1Loss()

def train_adam(model, Nf, Nf_data, Nic, Nd, Nn, Ni,
               num_iters=40000, print_every=2000, save_every=2000,
               λ_data=1.0, alpha_pde=1.0, seed=None, device="cuda",
               checkpoint_dir="models"):

    print("Start training loop")

    optimizer = optim.Adam(model.parameters(), lr=1e-3)
    scaler = GradScaler()

    start_iter = 0

    # --- Try to resume from the latest checkpoint ---
    latest_ckpt = None
    if os.path.exists(checkpoint_dir):
        ckpts = [f for f in os.listdir(checkpoint_dir) if f.startswith("pinn_checkpoint_")]
        if ckpts:
            latest_ckpt = max(ckpts, key=lambda f: int(f.split("_")[-1].split(".")[0]))

    if latest_ckpt:
        path = os.path.join(checkpoint_dir, latest_ckpt)
        print(f"Loading checkpoint from {path}...")
        checkpoint = torch.load(path, map_location=device)
        model.load_state_dict(checkpoint["model_state"])
        optimizer.load_state_dict(checkpoint["optimizer_state"])
        start_iter = checkpoint["iter"] + 1
        print(f"Resuming training from iteration {start_iter}")


    for it in range(start_iter, num_iters):
        optimizer.zero_grad()

        with autocast():

            # Physics residual loss
            X_f = sample_collocation_points(N_per_timestep=Nf,
                                              seed=None,
                                              device="cuda")
            
            X_f_data, u, v, p = sample_points(inputs=xyt_tensor,
                                              outputs=uvp_tensor,
                                              N_per_timestep=Nf_data,
                                              seed=None,
                                              device="cuda")
        
    
    
            cont, mu, mv = compute_residuals_in_batches(model, X_f, batch_size=10000)
            
            loss_f = (mae_loss(cont, torch.zeros_like(cont)) + mae_loss(mu, torch.zeros_like(mu)) + mae_loss(mv, torch.zeros_like(mv)))
    
    
            # Data loss
            loss_data = data_loss(model, X_f_data, u, v, p, device="cuda")
    
    
            # Initial condition
            X_ic, Y_ic = sample_initial(N=Nic,xyt_tensor_ic=xyt_tensor_ic, uvp_tensor_ic=uvp_tensor_ic, device="cuda")
    
            
            out_ic = model(X_ic)
            loss_ic = (mse_loss(out_ic[:, 0:1], Y_ic[:, 0:1]) + mse_loss(out_ic[:, 1:2], Y_ic[:, 1:2]) + mse_loss(out_ic[:, 2:3], Y_ic[:, 2:3]))
    
    
            # Inlet condition
            X_in = sample_inlet(N_per_timestep=Ni, seed=None, device="cuda")
    
            
            out_in = model(X_in)
            loss_in = (mse_loss(out_in[:, 0:1], torch.ones_like(out_in[:, 0:1])) + mse_loss(out_in[:, 1:2], torch.zeros_like(out_in[:, 1:2])))
    
    
            # Cylinder surface
            X_cyl = sample_cylinder_surface(N_per_timestep=Nd, seed=None, device="cuda")
    
            
            out_cyl = model(X_cyl)
            loss_cyl = (mse_loss(out_cyl[:, 0:1], torch.zeros_like(out_cyl[:, 0:1])) + mse_loss(out_cyl[:, 1:2], torch.zeros_like(out_cyl[:, 1:2])))
        
    
            # Top/bottom boundaries
            X_tb = sample_top_bottom(N_per_timestep=Nn, seed=None, device="cuda")
    
            out_tb = model(X_tb)
            loss_tb = (mse_loss(out_tb[:, 1:2], torch.zeros_like(out_tb[:, 1:2])))
    
    
            # Total loss
            loss = alpha_pde * loss_f + λ_data * loss_data + loss_ic + loss_in + loss_cyl + loss_tb


        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()

        torch.cuda.empty_cache()


        if it % print_every == 0:
            print(f"Adam Iter {it}/{num_iters} - Total loss {loss.item():.6f}\n"
                  f"Data {loss_data.item():.6f} | PDE {loss_f.item():.4e}\n"
                  f"IC {loss_ic.item():.4e}, Inlet {loss_in.item():.4e}\n"
                  f"Cylinder {loss_cyl.item():.4e}, TopBottom {loss_tb.item():.4e}")

         # --- Save checkpoint every save_every iterations ---
        if (it + 1) % save_every == 0:
            os.makedirs(checkpoint_dir, exist_ok=True)
            ckpt_path = os.path.join(checkpoint_dir, f"resnet_pinn_checkpoint_{it+1}.pth")
            torch.save({
                "iter": it,
                "model_state": model.state_dict(),
                "optimizer_state": optimizer.state_dict(),
                "scaler_state": scaler.state_dict()
            }, ckpt_path)
            print(f"Checkpoint saved → {ckpt_path}")

         
            

    print(f"Adam Iter {num_iters-1} - Total loss {loss.item():.6f}\n"
              f"Data {loss_data.item():.6f} | PDE {loss_f.item():.4e}\n"
              f"IC {loss_ic.item():.4e}, Inlet {loss_in.item():.4e}\n"
              f"Cylinder {loss_cyl.item():.4e}, TopBottom {loss_tb.item():.4e}")


    # Final model save
    os.makedirs(checkpoint_dir, exist_ok=True)
    final_path = os.path.join(checkpoint_dir, "resnet_pinn_ns_adam_final.pth")
    torch.save(model.state_dict(), final_path)
    print(f"Final model saved at {final_path}")


In [18]:
train_adam(model, Nf, Nf_data, Nic, Nd, Nn, Ni,
               num_iters=3000, print_every=10, save_every=100,
               λ_data=1.0, alpha_pde=1.0, seed=None, device="cuda",
               checkpoint_dir="models")

Start training loop


  scaler = GradScaler()
  with autocast():


Adam Iter 0/3000 - Total loss 136.875443
Data 44.054211 | PDE 1.2036e+01
IC 3.2296e+01, Inlet 1.7896e+01
Cylinder 2.2676e+01, TopBottom 7.9169e+00
Adam Iter 10/3000 - Total loss 642.663574
Data 184.005219 | PDE 4.6360e+01
IC 9.1406e+01, Inlet 1.7070e+02
Cylinder 1.2545e+02, TopBottom 2.4741e+01
Adam Iter 20/3000 - Total loss 8.557334
Data 2.311998 | PDE 2.6549e+00
IC 1.5267e+00, Inlet 1.4089e+00
Cylinder 3.6732e-01, TopBottom 2.8745e-01
Adam Iter 30/3000 - Total loss 5.392131
Data 1.523679 | PDE 1.6828e+00
IC 3.9847e-01, Inlet 4.4518e-01
Cylinder 7.5360e-01, TopBottom 5.8845e-01
Adam Iter 40/3000 - Total loss 3.175578
Data 0.739210 | PDE 1.6358e+00
IC 3.6271e-01, Inlet 1.9639e-01
Cylinder 1.1812e-01, TopBottom 1.2336e-01
Adam Iter 50/3000 - Total loss 2.281927
Data 0.511267 | PDE 1.2822e+00
IC 3.2813e-01, Inlet 6.4624e-02
Cylinder 7.3811e-02, TopBottom 2.1885e-02
Adam Iter 60/3000 - Total loss 2.363166
Data 0.402008 | PDE 1.6140e+00
IC 1.4531e-01, Inlet 2.2630e-02
Cylinder 1.6382e-01, 

KeyboardInterrupt: 