# Navier-Stokes AI Model Training (GPU)

**AI-HPC Hybrid Project**

## For Kaggle:
1. Add your dataset: `/kaggle/input/hybrid-ai-hpc/`
2. Enable GPU: Settings → Accelerator → GPU
3. Run all cells

**Estimated time: 30-45 minutes for 50 epochs**

In [None]:
# Check GPU availability
import torch
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")

In [None]:
# ============================================
# CONFIGURATION - CHANGE PATHS HERE FOR KAGGLE
# ============================================

# For Kaggle: Use your dataset path
TRAIN_DATA_PATH = '/kaggle/input/hybrid-ai-hpc/train_data.npz'
VAL_DATA_PATH = '/kaggle/input/hybrid-ai-hpc/val_data.npz'

# For Colab: Upload files and use local paths
# TRAIN_DATA_PATH = 'train_data.npz'
# VAL_DATA_PATH = 'val_data.npz'

# Training configuration
CONFIG = {
    'epochs': 50,
    'batch_size': 8,
    'learning_rate': 1e-3,
    'seq_len': 5,
    'pred_len': 10,
    'hidden_dims': [64, 64, 64],
}

print("Data paths:")
print(f"  Train: {TRAIN_DATA_PATH}")
print(f"  Val: {VAL_DATA_PATH}")
print("\nConfiguration:")
for k, v in CONFIG.items():
    print(f"  {k}: {v}")

## 1. Define Models

In [None]:
import torch
import torch.nn as nn
import numpy as np
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt
import os

class ConvLSTMCell(nn.Module):
    """Convolutional LSTM Cell."""
    def __init__(self, input_dim, hidden_dim, kernel_size=3):
        super().__init__()
        self.hidden_dim = hidden_dim
        padding = kernel_size // 2
        
        self.conv = nn.Conv2d(
            input_dim + hidden_dim, 4 * hidden_dim,
            kernel_size=kernel_size, padding=padding, bias=True
        )
    
    def forward(self, x, hidden_state):
        h_cur, c_cur = hidden_state
        combined = torch.cat([x, h_cur], dim=1)
        gates = self.conv(combined)
        
        i, f, o, g = torch.chunk(gates, 4, dim=1)
        i = torch.sigmoid(i)
        f = torch.sigmoid(f)
        o = torch.sigmoid(o)
        g = torch.tanh(g)
        
        c_next = f * c_cur + i * g
        h_next = o * torch.tanh(c_next)
        
        return h_next, (h_next, c_next)
    
    def init_hidden(self, batch_size, height, width, device):
        h = torch.zeros(batch_size, self.hidden_dim, height, width, device=device)
        c = torch.zeros(batch_size, self.hidden_dim, height, width, device=device)
        return (h, c)


class ConvLSTM(nn.Module):
    """Multi-layer ConvLSTM for fluid flow prediction."""
    def __init__(self, input_dim=3, hidden_dims=[64, 64, 64], kernel_size=3):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dims = hidden_dims
        self.num_layers = len(hidden_dims)
        
        self.input_conv = nn.Conv2d(input_dim, hidden_dims[0], 3, padding=1)
        
        self.cells = nn.ModuleList()
        for i in range(self.num_layers):
            in_dim = hidden_dims[i]
            self.cells.append(ConvLSTMCell(in_dim, hidden_dims[i], kernel_size))
        
        self.output_conv = nn.Conv2d(hidden_dims[-1], input_dim, 1)
    
    def forward(self, x, future_steps=1, hidden_state=None):
        batch_size, channels, height, width = x.shape
        device = x.device
        
        if hidden_state is None:
            hidden_state = [cell.init_hidden(batch_size, height, width, device) 
                          for cell in self.cells]
        
        outputs = []
        current_input = x
        
        for step in range(future_steps):
            h = self.input_conv(current_input)
            
            for i, cell in enumerate(self.cells):
                h, hidden_state[i] = cell(h, hidden_state[i])
            
            output = self.output_conv(h)
            outputs.append(output)
            current_input = output
        
        return torch.stack(outputs, dim=1)


print("Models defined!")

## 2. Dataset

In [None]:
class NumpyDataset(Dataset):
    """Dataset loader for numpy data files."""
    def __init__(self, data_path, seq_len=5, pred_len=10):
        self.seq_len = seq_len
        self.pred_len = pred_len
        
        data = np.load(data_path)
        u = data['u']
        v = data['v']
        p = data['p']
        
        self.data = np.stack([u, v, p], axis=1).astype(np.float32)
        self.num_sequences = max(1, len(self.data) - seq_len - pred_len + 1)
        
        print(f"Loaded {data_path}: {self.data.shape}")
    
    def __len__(self):
        return self.num_sequences
    
    def __getitem__(self, idx):
        start = idx
        mid = start + self.seq_len
        end = mid + self.pred_len
        
        input_seq = self.data[start:mid]
        target_seq = self.data[mid:end]
        
        return torch.from_numpy(input_seq), torch.from_numpy(target_seq)


print("Dataset class defined!")

## 3. Load Data

In [None]:
# Check if files exist
import os

print("Checking data files...")
if os.path.exists(TRAIN_DATA_PATH):
    print(f"  Found: {TRAIN_DATA_PATH}")
else:
    print(f"  NOT FOUND: {TRAIN_DATA_PATH}")
    print("  Please check your dataset path!")

if os.path.exists(VAL_DATA_PATH):
    print(f"  Found: {VAL_DATA_PATH}")
else:
    print(f"  NOT FOUND: {VAL_DATA_PATH}")
    print("  Please check your dataset path!")

In [None]:
# Setup device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Load datasets using the paths from CONFIG
train_dataset = NumpyDataset(TRAIN_DATA_PATH, CONFIG['seq_len'], CONFIG['pred_len'])
val_dataset = NumpyDataset(VAL_DATA_PATH, CONFIG['seq_len'], CONFIG['pred_len'])

train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=2)

print(f"\nTrain batches: {len(train_loader)}")
print(f"Val batches: {len(val_loader)}")

## 4. Training

In [None]:
# Create model
model = ConvLSTM(input_dim=3, hidden_dims=CONFIG['hidden_dims']).to(device)
print(f"Model parameters: {sum(p.numel() for p in model.parameters()):,}")

# Setup training
optimizer = torch.optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=5, factor=0.5)
criterion = nn.MSELoss()

In [None]:
def train_epoch(model, loader, optimizer, criterion, device):
    model.train()
    total_loss = 0.0
    
    for inputs, targets in loader:
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        initial_state = inputs[:, -1]
        pred_len = targets.shape[1]
        
        optimizer.zero_grad()
        predictions = model(initial_state, future_steps=pred_len)
        loss = criterion(predictions, targets)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
        optimizer.step()
        
        total_loss += loss.item()
    
    return total_loss / len(loader)


def validate(model, loader, criterion, device):
    model.eval()
    total_loss = 0.0
    
    with torch.no_grad():
        for inputs, targets in loader:
            inputs = inputs.to(device)
            targets = targets.to(device)
            
            initial_state = inputs[:, -1]
            pred_len = targets.shape[1]
            
            predictions = model(initial_state, future_steps=pred_len)
            loss = criterion(predictions, targets)
            total_loss += loss.item()
    
    return total_loss / len(loader)

In [None]:
# Training loop
import time

train_losses = []
val_losses = []
best_val_loss = float('inf')

print("\n" + "="*60)
print("Starting Training")
print("="*60)

start_time = time.time()

for epoch in range(1, CONFIG['epochs'] + 1):
    epoch_start = time.time()
    
    train_loss = train_epoch(model, train_loader, optimizer, criterion, device)
    val_loss = validate(model, val_loader, criterion, device)
    
    train_losses.append(train_loss)
    val_losses.append(val_loss)
    
    scheduler.step(val_loss)
    current_lr = optimizer.param_groups[0]['lr']
    
    epoch_time = time.time() - epoch_start
    
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save({
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
        }, 'best_model.pth')
        print(f"Epoch {epoch:3d}/{CONFIG['epochs']} | Train: {train_loss:.6f} | Val: {val_loss:.6f} | LR: {current_lr:.2e} | Time: {epoch_time:.1f}s | * BEST *")
    else:
        print(f"Epoch {epoch:3d}/{CONFIG['epochs']} | Train: {train_loss:.6f} | Val: {val_loss:.6f} | LR: {current_lr:.2e} | Time: {epoch_time:.1f}s")

total_time = time.time() - start_time
print("\n" + "="*60)
print(f"Training Complete!")
print(f"Total time: {total_time/60:.1f} minutes")
print(f"Best val loss: {best_val_loss:.6f}")
print("="*60)

In [None]:
# Save training history
np.savez('history.npz', train_loss=train_losses, val_loss=val_losses)
print("Training history saved!")

## 5. Visualization

In [None]:
# Plot training history
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))

epochs = range(1, len(train_losses) + 1)

ax1.plot(epochs, train_losses, 'b-', label='Training', linewidth=2)
ax1.plot(epochs, val_losses, 'r-', label='Validation', linewidth=2)
ax1.set_xlabel('Epoch')
ax1.set_ylabel('MSE Loss')
ax1.set_title('Training History')
ax1.legend()
ax1.grid(True, alpha=0.3)

ax2.plot(epochs, train_losses, 'b-', label='Training', linewidth=2)
ax2.plot(epochs, val_losses, 'r-', label='Validation', linewidth=2)
ax2.set_xlabel('Epoch')
ax2.set_ylabel('MSE Loss (log)')
ax2.set_title('Training History (Log Scale)')
ax2.set_yscale('log')
ax2.legend()
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig('training_history.png', dpi=150)
plt.show()

In [None]:
# Prediction visualization
model.eval()

inputs, targets = val_dataset[0]
inputs = inputs.unsqueeze(0).to(device)
targets = targets.numpy()

with torch.no_grad():
    initial_state = inputs[:, -1]
    predictions = model(initial_state, future_steps=10)
    predictions = predictions[0].cpu().numpy()

step = 4
fig, axes = plt.subplots(3, 3, figsize=(14, 12))

fields = ['u-velocity', 'v-velocity', 'pressure']
for i, name in enumerate(fields):
    pred = predictions[step, i]
    true = targets[step, i]
    diff = pred - true
    
    vmin, vmax = true.min(), true.max()
    
    im1 = axes[i, 0].imshow(true, cmap='RdBu_r', vmin=vmin, vmax=vmax)
    axes[i, 0].set_title(f'{name} (Ground Truth)')
    plt.colorbar(im1, ax=axes[i, 0])
    
    im2 = axes[i, 1].imshow(pred, cmap='RdBu_r', vmin=vmin, vmax=vmax)
    axes[i, 1].set_title(f'{name} (AI Prediction)')
    plt.colorbar(im2, ax=axes[i, 1])
    
    diff_max = max(abs(diff.min()), abs(diff.max()))
    im3 = axes[i, 2].imshow(diff, cmap='RdBu_r', vmin=-diff_max, vmax=diff_max)
    axes[i, 2].set_title(f'{name} (Error)')
    plt.colorbar(im3, ax=axes[i, 2])

plt.suptitle(f'AI vs Ground Truth (Step {step+1})', fontsize=14)
plt.tight_layout()
plt.savefig('field_comparison.png', dpi=150)
plt.show()

In [None]:
# Compute final metrics
rmse_u = np.sqrt(np.mean((predictions[:, 0] - targets[:, 0])**2))
rmse_v = np.sqrt(np.mean((predictions[:, 1] - targets[:, 1])**2))
rmse_p = np.sqrt(np.mean((predictions[:, 2] - targets[:, 2])**2))
combined_rmse = np.sqrt(rmse_u**2 + rmse_v**2 + rmse_p**2)

print("\n" + "="*50)
print("FINAL METRICS")
print("="*50)
print(f"u-velocity RMSE: {rmse_u:.6f}")
print(f"v-velocity RMSE: {rmse_v:.6f}")
print(f"pressure RMSE:   {rmse_p:.6f}")
print(f"\nCombined RMSE:   {combined_rmse:.6f}")
print("="*50)

if combined_rmse < 0.05:
    print("Rating: EXCELLENT")
elif combined_rmse < 0.1:
    print("Rating: GOOD")
else:
    print("Rating: NEEDS IMPROVEMENT")

## 6. Save Outputs

On Kaggle, outputs are saved to `/kaggle/working/`

In [None]:
# List output files
import os

print("Output files saved:")
for f in ['best_model.pth', 'history.npz', 'training_history.png', 'field_comparison.png']:
    if os.path.exists(f):
        size = os.path.getsize(f) / 1024
        print(f"  {f}: {size:.1f} KB")

print("\nDownload from Kaggle: Output tab → Download All")