Ref Paper: [FOURIER NEURAL OPERATOR FOR PARAMETRIC PARTIAL DIFFERENTIAL EQUATIONS](https://arxiv.org/pdf/2010.08895) 

Ref Code: https://github.com/neuraloperator/neuraloperator/blob/master

# Imports

In [2]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.lines import Line2D
import functools
import operator
import pandas as pd
from tqdm import tqdm
import h5py
import math
import copy
import scipy
import pickle
from timeit import default_timer
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.nn.functional 
from torch.optim import Adam
from torch.utils.tensorboard import SummaryWriter
from torchsummary import summary
import scipy.io
from importlib import reload
from pathlib import Path
%matplotlib inline

In [3]:
parent_dir = os.path.dirname(os.path.realpath(os.getcwd()))
process_file = os.path.join(parent_dir,"dedalus/data_processing.py")
sys.path.append(process_file)
sys.path.append(parent_dir)

# Utils

In [4]:
units = {
    0: 'B',
    1: 'KiB',
    2: 'MiB',
    3: 'GiB',
    4: 'TiB'
}


def format_mem(x):
    """
    Takes integer 'x' in bytes and returns a number in [0, 1024) and
    the corresponding unit.

    """
    if abs(x) < 1024:
        return round(x, 2), 'B'

    scale = math.log2(abs(x)) // 10
    scaled_x = x / (1024 ** scale)
    unit = units[scale]

    if int(scaled_x) == scaled_x:
        return int(scaled_x), unit

    # rounding leads to 2 or fewer decimal places, as required
    return round(scaled_x, 2), unit


def format_tensor_size(x):
    val, unit = format_mem(x)
    return f'{val}{unit}'


In [5]:
class CudaMemoryDebugger():
    """
    Helper to track changes in CUDA memory.

    """
    DEVICE = 'cuda'
    LAST_MEM = 0
    ENABLED = True


    def __init__(self, print_mem):
        self.print_mem = print_mem
        if not CudaMemoryDebugger.ENABLED:
            return

        cur_mem = torch.cuda.memory_allocated(CudaMemoryDebugger.DEVICE)
        cur_mem_fmt, cur_mem_unit = format_mem(cur_mem)
        print(f'cuda allocated (initial): {cur_mem_fmt:.2f}{cur_mem_unit}')
        CudaMemoryDebugger.LAST_MEM = cur_mem

    def print(self,id_str=None):
        if not CudaMemoryDebugger.ENABLED:
            return

        desc = 'cuda allocated'

        if id_str is not None:
            desc += f' ({id_str})'

        desc += ':'

        cur_mem = torch.cuda.memory_allocated(CudaMemoryDebugger.DEVICE)
        cur_mem_fmt, cur_mem_unit = format_mem(cur_mem)

        diff = cur_mem - CudaMemoryDebugger.LAST_MEM
        if self.print_mem:
            if diff == 0:
                print(f'{desc} {cur_mem_fmt:.2f}{cur_mem_unit} (no change)')

            else:
                diff_fmt, diff_unit = format_mem(diff)
                print(f'{desc} {cur_mem_fmt:.2f}{cur_mem_unit}'
                      f' ({diff_fmt:+}{diff_unit})')

        CudaMemoryDebugger.LAST_MEM = cur_mem


In [6]:
def data_process(file, xStep=1, zStep=1):
    index = 0
    inputs = []
    filename = 'input_data.h5'
    with h5py.File(filename, "w") as data:
        iter_no = 1501
        print( file, iter_no)
        for t in range(iter_no):
            vel_t,b_t, p_t, write_no, iteration, sim_time, time_step, wall_time = rbc_data(file, t, True, True)
            inputs.append(np.concatenate((vel_t[0,::xStep,::zStep], vel_t[1,::xStep,::zStep], b_t, p_t), axis = 0))
            index = index + 1
        data['input'] = inputs
    data.close()
    return np.array(inputs)

In [7]:
def rbc_data( filename, time, tasks=False, scales=False):
    with h5py.File(filename, mode="r") as f:
        b_t = f["tasks/buoyancy"][time]
        vel_t = f["tasks/velocity"][time]
        p_t = f["tasks/pressure"][time]
        iteration = f["scales/iteration"][time]
        sim_time  = f["scales/sim_time"][time]
        time_step = f["scales/timestep"][time]
        wall_time = f["scales/wall_time"][time]
        write_no = f["scales/write_number"][time]

    f.close()
    if tasks and scales:
         return vel_t,b_t, p_t, write_no, iteration, sim_time, time_step, wall_time
    elif tasks:
         return vel_t,b_t, p_t
    elif scales:
         return write_no, iteration, sim_time, time_step, wall_time
    else:
         raise ValueError("Nothing to return!")

In [8]:
class LpLoss(object):
    def __init__(self, d=2, p=2, size_average=True, reduction=True):
        super(LpLoss, self).__init__()

        #Dimension and Lp-norm type are postive
        assert d > 0 and p > 0

        self.d = d
        self.p = p
        self.reduction = reduction
        self.size_average = size_average

    def abs(self, x, y):
        num_examples = x.size()[0]

        #Assume uniform mesh
        h = 1.0 / (x.size()[1] - 1.0)

        all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(all_norms)
            else:
                return torch.sum(all_norms)

        return all_norms

    def rel(self, x, y):
        num_examples = x.size()[0]

        diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1)
        y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(diff_norms/y_norms)
            else:
                return torch.sum(diff_norms/y_norms)

        return diff_norms/y_norms

    def __call__(self, x, y):
        return self.rel(x, y)


# Fourier Neural Operator for 2D spatial + 1D temporal equation

In [9]:
class SpectralConv3d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2, modes3):
        super(SpectralConv3d, self).__init__()

        """
        3D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
        """

        self.in_channels = in_channels  
        self.out_channels = out_channels
        # Number of Fourier modes to multiply, at most floor(N/2) + 1
        # k_max = 12 in paper 
        self.modes1 = modes1                 
        self.modes2 = modes2
        self.modes3 = modes3
        
        # R
        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))
        self.weights2 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))
        self.weights3 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))
        self.weights4 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat))

    # Complex multiplication
    def compl_mul3d(self, input, weights):
        # (batch, in_channel, x,y,t ), (in_channel, out_channel, x,y,t) -> (batch, out_channel, x,y,t)
        # summation along in_channel 
        return torch.einsum("bixyz,ioxyz->boxyz", input, weights)

    def forward(self, x):
        # x = [batchsize, width, size_x, size_y, T + padding]
        batchsize = x.shape[0]
        
        #Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfftn(x, dim=[-3,-2,-1]) 
        # [batchsize, width, size_x, size_y, if (T + padding) is even ((T + padding)/2 +1) else (T + padding)/2 ]
        
        # Multiply relevant Fourier modes (Corners of R) --> R.FFT(x)
        out_ft = torch.zeros(batchsize, self.out_channels, x.size(-3), x.size(-2), x.size(-1)//2 + 1, dtype=torch.cfloat, device=x.device)
        out_ft[:, :, :self.modes1, :self.modes2, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, :self.modes1, :self.modes2, :self.modes3], self.weights1)  # upper right
        out_ft[:, :, -self.modes1:, :self.modes2, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, -self.modes1:, :self.modes2, :self.modes3], self.weights2) # upper left
        out_ft[:, :, :self.modes1, -self.modes2:, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, :self.modes1, -self.modes2:, :self.modes3], self.weights3) # lower right
        out_ft[:, :, -self.modes1:, -self.modes2:, :self.modes3] = \
            self.compl_mul3d(x_ft[:, :, -self.modes1:, -self.modes2:, :self.modes3], self.weights4) # lower left

        #Return to physical space
        x = torch.fft.irfftn(out_ft, s=(x.size(-3), x.size(-2), x.size(-1))) # x = [batchsize, width, size_x, size_y, T + padding]
        return x

class MLP(nn.Module):
    def __init__(self, in_channels, out_channels, mid_channels):
        super(MLP, self).__init__()
        self.mlp1 = nn.Conv3d(in_channels, mid_channels, 1)
        self.mlp2 = nn.Conv3d(mid_channels, out_channels, 1)

    def forward(self, x):
        # input: [batchsize, in_channel=width, size_x, size_y, T + padding]
        # weight: [mid_channel=width, in_channel=width, 1,1,1]
        # output: [batchsize, out_channel=mid_channel, size_x, size_y, T + padding]
        x = self.mlp1(x)
        x = torch.nn.Functional.gelu(x)
        # output: [batchsize, out_channel=mid_channel, size_x, size_y, T + padding]
        x = self.mlp2(x)
        # input: [batchsize, in_channel=mid_channel, size_x, size_y, T + padding]
        # weight: [out_channel=width, mid_channel=width, 1, 1, 1]
        # output: [batchsize, out_channel=width, size_x, size_y, T + padding]
        return x

class FNO3d(nn.Module):
    def __init__(self, modes1, modes2, modes3, width):
        super(FNO3d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .
        
        input: the solution of the first 10 timesteps + 3 locations (u(1, x, y), ..., u(10, x, y),  x, y, t).
        It's a constant function in time, except for the last index.
        input shape: (batchsize,  x=sizex, y=sizey, t=40, c=13)
        output: the solution of the next 40 timesteps
        output shape: (batchsize, x=sizex, y=sizey, t=40, c=1)
        """

        self.modes1 = modes1
        self.modes2 = modes2
        self.modes3 = modes3
        self.width = width
        self.padding = 6 # pad the domain if input is non-periodic
        
        # x = (batchsize,   x=sizex, y=sizey, t=40, c=13)
        # input channel is 13: the solution of the first 10 timesteps + 3 locations (u(1, x, y), ..., u(10, x, y),  x, y, t)
        self.p = nn.Linear(13, self.width)
        self.conv0 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.conv1 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.conv2 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.conv3 = SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        self.mlp0 = MLP(self.width, self.width, self.width)
        self.mlp1 = MLP(self.width, self.width, self.width)
        self.mlp2 = MLP(self.width, self.width, self.width)
        self.mlp3 = MLP(self.width, self.width, self.width)
        self.w0 = nn.Conv3d(self.width, self.width, 1)
        self.w1 = nn.Conv3d(self.width, self.width, 1)
        self.w2 = nn.Conv3d(self.width, self.width, 1)
        self.w3 = nn.Conv3d(self.width, self.width, 1)
        self.q = MLP(self.width, 1, self.width * 4) # output channel is 1: u(x, y)

    def forward(self, x):
        grid = self.get_grid(x.shape, x.device) # [batchsize,   size_x, size_y, T, c=T_in] ---> [batchsize,   size_x, size_y, T, c=3]
        x = torch.cat((x, grid), dim=-1)        # [batchsize,   size_x, size_y, T, c=T_in+3]
        x = self.p(x)                           
        # input: [batchsize,   size_x, size_y, T, c=T_in+3], 
        # Weight: [width,T_in+3]
        # Output: [batchsize,   size_x, size_y, T, c=width]
        
        x = x.permute(0, 5, 1, 2, 3,4)            # [batchsize,   size_x, size_y, T, c=width] --> [batchsize, width,   size_x, size_y, T]
        x = torch.nn.functional.pad(x, [0,self.padding]) # pad the domain if input is non-periodic, padded along last dim of x
        
        # padding order:(padding_left,padding_right, 
        #                 padding_top,padding_bottom,
        #                 padding_front,padding_back)
                
        # [batchsize, width,   size_x, size_y, T + padding]
        
        x1 = self.conv0(x) # SpectralConv3d(self.width, self.width, self.modes1, self.modes2, self.modes3)
        
        # input: [batchsize, width,   size_x, size_y, T + padding]
        # weight: torch.rand(in_channels=width, out_channels=width, self.modes1, self.modes2, self.modes3, dtype=torch.cfloat)
        # Output: [batchsize, out_channel=width,   size_x, size_y, T + padding]
        
        x1 = self.mlp0(x1) # MLP(self.width, self.width, self.width)
        # input: [batchsize, in_channel=width,   size_x, size_y, T + padding]
        # output: [batchsize, out_channel=width,    size_x, size_y, T + padding]
        
        x2 = self.w0(x)   # nn.Conv3d(self.width, self.width, 1)
        # input: [batchsize, in_channel=width,   size_x, size_y, T + padding]
        # weight: [out_channel=width, in_channel=width, 1, 1,1]
        # output: [batchsize, out_channel=width,   size_x, size_y, T + padding]
        
        x = x1 + x2
        x = torch.nn.functional.gelu(x)

        x1 = self.conv1(x)
        x1 = self.mlp1(x1)
        x2 = self.w1(x)
        x = x1 + x2
        x = torch.nn.functional.gelu(x)

        x1 = self.conv2(x)
        x1 = self.mlp2(x1)
        x2 = self.w2(x)
        x = x1 + x2
        x = torch.nn.functional.gelu(x)

        x1 = self.conv3(x)
        x1 = self.mlp3(x1)
        x2 = self.w3(x)
        x = x1 + x2
        # output: [batchsize, out_channel=width,   size_x, size_y, T + padding]
        
        x = x[..., :-self.padding]
        # output: [batchsize, out_channel=width,   size_x, size_y, T]
        
        x = self.q(x) # MLP(self.width, 1, self.width * 4) # output channel is 1: u(x, y)
        
        # input: [batchsize, in_channel=width,  size_x, size_y, T ]
        # weight: [mid_channel=4*width, in_channel=width, 1,1,1]
        # output: [batchsize, out_channel=mid_channel=4*width,   size_x, size_y, T ]
        # x = self.mlp1(x)
        # x = torch.nn.Functional.gelu(x)
        # output: [batchsize, out_channel=mid_channel=4*width,  size_x, size_y, T]
        # x = self.mlp2(x)
        # input: [batchsize, in_channel=mid_channel=4*width,  size_x, size_y, T]
        # weight: [out_channel=1, mid_channel=4*width, 1, 1, 1]
        # output: [batchsize, out_channel=1,   size_x, size_y, T]
        
        x = x.permute(0, 2, 3,   5, 1) # pad the domain if input is non-periodic
        # output: [batchsize,   size_x, size_y, T, out_channel=1]
        return x


    def get_grid(self, shape, device):
        batchsize, size_x, size_y = shape[0], shape[1], shape[2]
        gridx = torch.tensor(np.linspace(0, 1, size_x), dtype=torch.float)
        gridx = gridx.reshape(1, size_x, 1, 1).repeat([batchsize, 1, size_y, 1])
        gridy = torch.tensor(np.linspace(0, 1, size_y), dtype=torch.float)
        gridy = gridy.reshape(1, 1, size_y, 1).repeat([batchsize, size_x, 1, 1])

        return torch.cat((gridx, gridy, gridz), dim=-1).to(device) # [batchsize,  size_x, size_y, size_z, 3]
        
    
    def print_size(self):
        properties = []

        for param in self.parameters():
            properties.append([list(param.size()+(2,) if param.is_complex() else param.size()), param.numel(), (param.data.element_size() * param.numel())/1000])
            
        elementFrame = pd.DataFrame(properties, columns = ['ParamSize', 'NParams', 'Memory(KB)'])
 
        print(f'Total number of model parameters: {elementFrame["NParams"].sum()} with (~{format_tensor_size(elementFrame["Memory(KB)"].sum()*1000)})')
        return elementFrame
    
    


In [9]:
model3d = FNO3d(8, 8, 8, 20)
model3d.print_size()

Total number of model parameters: 3283881 with (~25.03MiB)


Unnamed: 0,ParamSize,NParams,Memory(KB)
0,"[20, 13]",260,1.04
1,[20],20,0.08
2,"[20, 20, 8, 8, 8, 2]",204800,1638.4
3,"[20, 20, 8, 8, 8, 2]",204800,1638.4
4,"[20, 20, 8, 8, 8, 2]",204800,1638.4
5,"[20, 20, 8, 8, 8, 2]",204800,1638.4
6,"[20, 20, 8, 8, 8, 2]",204800,1638.4
7,"[20, 20, 8, 8, 8, 2]",204800,1638.4
8,"[20, 20, 8, 8, 8, 2]",204800,1638.4
9,"[20, 20, 8, 8, 8, 2]",204800,1638.4


# Fourier Neural Operator 2D Spatial + Recurrent in time

In [8]:
class SpectralConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1, modes2):
        super(SpectralConv2d, self).__init__()

        """
        2D Fourier layer. It does FFT, linear transform, and Inverse FFT.    
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1              #Number of Fourier modes to multiply, at most floor(N/2) + 1
        self.modes2 = modes2

        self.scale = (1 / (in_channels * out_channels))
        self.weights1 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))
        self.weights2 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, self.modes2, dtype=torch.cfloat))

    # Complex multiplication
    def compl_mul2d(self, input, weights):
        # (batch, in_channel, x,y ), (in_channel, out_channel, x,y) -> (batch, out_channel, x,y)
        return torch.einsum("bixy,ioxy->boxy", input, weights)

    def forward(self, x):
        batchsize = x.shape[0]
        #Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.fft.rfft2(x)

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.out_channels, x.size(-2), x.size(-1)//2 + 1, dtype=torch.cfloat, device=x.device)
        out_ft[:, :,  :self.modes1, :self.modes2] = \
            self.compl_mul2d(x_ft[:, :, :self.modes1, :self.modes2], self.weights1)
        out_ft[:, :, -self.modes1:, :self.modes2] = \
            self.compl_mul2d(x_ft[:, :, -self.modes1:, :self.modes2], self.weights2)

        #Return to physical space
        x = torch.fft.irfft2(out_ft, s=(x.size(-2), x.size(-1)))
        return x

class MLP(nn.Module):
    def __init__(self, in_channels, out_channels, mid_channels):
        super(MLP, self).__init__()
        self.mlp1 = nn.Conv2d(in_channels, mid_channels, 1)
        self.mlp2 = nn.Conv2d(mid_channels, out_channels, 1)

    def forward(self, x):
        x = self.mlp1(x)
        x = nn.functional.gelu(x)
        x = self.mlp2(x)
        return x

class FNO2d(nn.Module):
    memory = CudaMemoryDebugger(print_mem=True)
    
    def __init__(self, modes1, modes2, width):
        super(FNO2d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .
        
        input: the solution of the previous 10 timesteps + 2 locations (u(t-10, x, y), ..., u(t-1, x, y),  x, y)
        input shape: (batchsize, x=sizex, y=sizey, c=12)
        output: the solution of the next timestep
        output shape: (batchsize, x=sizex, y=sizey, c=1)
        """

        self.modes1 = modes1
        self.modes2 = modes2
        self.width = width
        self.padding = 8 # pad the domain if input is non-periodic

        self.p = nn.Linear(12, self.width) # input channel is 12: the solution of the previous 10 timesteps + 2 locations (u(t-10, x, y), ..., u(t-1, x, y),  x, y)
        self.conv0 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv1 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv2 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.conv3 = SpectralConv2d(self.width, self.width, self.modes1, self.modes2)
        self.mlp0 = MLP(self.width, self.width, self.width)
        self.mlp1 = MLP(self.width, self.width, self.width)
        self.mlp2 = MLP(self.width, self.width, self.width)
        self.mlp3 = MLP(self.width, self.width, self.width)
        self.w0 = nn.Conv2d(self.width, self.width, 1)
        self.w1 = nn.Conv2d(self.width, self.width, 1)
        self.w2 = nn.Conv2d(self.width, self.width, 1)
        self.w3 = nn.Conv2d(self.width, self.width, 1)
        self.norm = nn.InstanceNorm2d(self.width)
        self.q = MLP(self.width, 1, self.width * 4) # output channel is 1: u(x, y)

    def forward(self, x):
        grid = self.get_grid(x.shape, x.device)
        x = torch.cat((x, grid), dim=-1)
        x = self.p(x)
        memory.print("after p(x)")
        x = x.permute(0, 3, 1, 2)
        # x = F.pad(x, [0,self.padding, 0,self.padding]) # pad the domain if input is non-periodic

        x1 = self.norm(self.conv0(self.norm(x)))
        x1 = self.mlp0(x1)
        x2 = self.w0(x)
        x = x1 + x2
        x = nn.functional.gelu(x)
        memory.print("after FNO1")

        x1 = self.norm(self.conv1(self.norm(x)))
        x1 = self.mlp1(x1)
        x2 = self.w1(x)
        x = x1 + x2
        x = nn.functional.gelu(x)
        memory.print("after FNO2")
        
        x1 = self.norm(self.conv2(self.norm(x)))
        x1 = self.mlp2(x1)
        x2 = self.w2(x)
        x = x1 + x2
        x = nn.functional.gelu(x)
        memory.print("after FNO3")
        
        x1 = self.norm(self.conv3(self.norm(x)))
        x1 = self.mlp3(x1)
        x2 = self.w3(x)
        x = x1 + x2
        memory.print("after FNO4")
        
        # x = x[..., :-self.padding, :-self.padding] # pad the domain if input is non-periodic
        x = self.q(x)
        memory.print("after q(x)")
        x = x.permute(0, 2, 3, 1)
        return x

    def get_grid(self, shape, device):
        batchsize, size_x, size_y = shape[0], shape[1], shape[2]
        gridx = torch.tensor(np.linspace(0, 1, size_x), dtype=torch.float)
        gridx = gridx.reshape(1, size_x, 1, 1).repeat([batchsize, 1, size_y, 1])
        gridy = torch.tensor(np.linspace(0, 1, size_y), dtype=torch.float)
        gridy = gridy.reshape(1, 1, size_y, 1).repeat([batchsize, size_x, 1, 1])
        return torch.cat((gridx, gridy), dim=-1).to(device)
    
    def print_size(self):
        properties = []

        for param in self.parameters():
            properties.append([list(param.size()+(2,) if param.is_complex() else param.size()), param.numel(), (param.data.element_size() * param.numel())/1000])
            
        elementFrame = pd.DataFrame(properties, columns = ['ParamSize', 'NParams', 'Memory(KB)'])
 
        print(f'Total number of model parameters: {elementFrame["NParams"].sum()} with (~{format_tensor_size(elementFrame["Memory(KB)"].sum()*1000)})')
        return elementFrame
    


cuda allocated (initial): 0.00B


In [11]:
model2d= FNO2d(12,12,20)
model2d.print_size()

Total number of model parameters: 467861 with (~3.54MiB)


Unnamed: 0,ParamSize,NParams,Memory(KB)
0,"[20, 12]",240,0.96
1,[20],20,0.08
2,"[20, 20, 12, 12, 2]",57600,460.8
3,"[20, 20, 12, 12, 2]",57600,460.8
4,"[20, 20, 12, 12, 2]",57600,460.8
5,"[20, 20, 12, 12, 2]",57600,460.8
6,"[20, 20, 12, 12, 2]",57600,460.8
7,"[20, 20, 12, 12, 2]",57600,460.8
8,"[20, 20, 12, 12, 2]",57600,460.8
9,"[20, 20, 12, 12, 2]",57600,460.8


# Data

Data generated using [rbc_simulation.py](dedalus/rbc_simulation.py)

```
srun python data_generation.py \
        --dir_name "$DATA_DIR" \
        --rayleigh 1e7 \
        --res_factor 1 \
        --seed 1000 \
        --sim_time 150 
```

`xStep, zStep = 1` \
Each row in an array is  `np.concatenate((vel_t[0,::xStep,::zStep], vel_t[1,::xStep,::zStep], b_t, p_t), axis = 0)`


In [89]:
root_dir = '/p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1'

In [210]:
from dedalus import data_processing
reload(data_processing)

Setting RayleighNumber=10000000.0, Nx=256 and Nz=64


<module 'dedalus.data_processing' from '/p/project1/cexalab/john2/NeuralOperators/neural_operators/dedalus/data_processing.py'>

In [209]:
def data_arrange(root_dir, mode, samples):
    data  = []
    for i in tqdm(range(1, samples + 1)):
        folder = f'{root_dir}_{mode}_{i}'
        d = data_processing.OutputFiles(folder).data_process()
        data.append(d)
    s = np.array(data).shape
    data = np.array(data).reshape(s[0], s[2], s[3], s[1])
    return data

In [208]:
train_data = data_arrange(root_dir, "train", 100)
test_data = data_arrange(root_dir, "test", 50)
val_data = data_arrange(root_dir, "val", 50)

0 /p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_1/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_1_s1.h5 1501
0 /p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_2/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_2_s1.h5 1501
0 /p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_3/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_3_s1.h5 1501
0 /p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_4/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_4_s1.h5 1501
0 /p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_5/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1_train_5_s1.h5 1501
0 /p/project1/cexalab/john2/NeuralOperators/RayleighBernardC

In [212]:
print(train_data.shape, test_data.shape, val_data.shape)

(100, 1024, 64, 1501) (50, 1024, 64, 1501) (50, 1024, 64, 1501)


In [213]:
filename = '/p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/processed_data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1.h5'

In [214]:
with h5py.File(filename, "w") as data:
    data['train'] = train_data
    data['test'] = test_data
    data['val'] = val_data
    data.close()

In [215]:
with h5py.File(filename, "r") as data:
    print(data['train'].shape)

(100, 1024, 64, 1501)


# 2D+Recurrent Time Config

In [10]:
ntrain = 100
ntest = 50
nval = 50

modes = 12
width = 20

batch_size = 20
learning_rate = 0.001
epochs = 500
iterations = epochs*(ntrain//batch_size)

fno_path = Path(f'{os.getcwd()}/rbc_fno_2d_time_N{ntrain}_epoch{epochs}_m{modes}_w{width}')
fno_path.mkdir(parents=True, exist_ok=True)

path_train_err = Path(f'{fno_path}/results/train.txt')
path_train_err.mkdir(parents=True, exist_ok=True)

path_test_err = Path(f'{fno_path}/results/test.txt')
path_test_err.mkdir(parents=True, exist_ok=True)

path_image = Path(f'{fno_path}/image')
path_image.mkdir(parents=True, exist_ok=True)

# gridx = 4*256
# gridz = 64

xStep = 1
zStep = 1
tStep = 1

start_index = 500
T_in = 10
T = 10

In [10]:
# load data
data_path = '/p/project1/cexalab/john2/NeuralOperators/RayleighBernardConvection/processed_data/RBC2D_NX256_NZ64_TI0_TF150_Pr1_Ra1e7_dt0_1.h5'
reader = h5py.File(data_path, mode="r")
train_a = torch.tensor(reader['train'][:ntrain, ::xStep, ::zStep, start_index: start_index+T_in],dtype=torch.float)
train_u = torch.tensor(reader['train'][:ntrain, ::xStep, ::zStep, start_index+T_in:T+start_index+T_in], dtype=torch.float)

test_a = torch.tensor(reader['test'][:ntest, ::xStep, ::zStep, start_index: start_index+T_in],dtype=torch.float)
test_u = torch.tensor(reader['test'][:ntest, ::xStep, ::zStep, start_index+T_in:T+start_index+T_in],dtype=torch.float)

val_a = torch.tensor(reader['val'][:nval, ::xStep, ::zStep, start_index: start_index+T_in],dtype=torch.float)
val_u = torch.tensor(reader['val'][:nval, ::xStep, ::zStep, start_index+T_in:T+start_index+T_in],dtype=torch.float)

In [22]:
print(train_a.shape, val_a.shape, test_a.shape)
print(train_u.shape, val_u.shape, test_u.shape)
# assert (gridx == train_u.shape[-3])
# assert (gridz == train_u.shape[-2])

torch.Size([100, 1024, 64, 10]) torch.Size([50, 1024, 64, 10]) torch.Size([50, 1024, 64, 10])
torch.Size([100, 1024, 64, 10]) torch.Size([50, 1024, 64, 10]) torch.Size([50, 1024, 64, 10])


In [11]:
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_a, train_u), batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_a, test_u), batch_size=batch_size, shuffle=False)
val_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(val_a, val_u), batch_size=batch_size, shuffle=False)

In [12]:
for xx, yy in train_loader:
    print(xx.shape, yy.shape, xx.dtype)
    break

torch.Size([20, 1024, 64, 10]) torch.Size([20, 1024, 64, 10]) torch.float32


In [13]:
import gc
torch.cuda.empty_cache()
gc.collect()

28

In [14]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.cuda.empty_cache()
memory = CudaMemoryDebugger(print_mem=True)

cuda allocated (initial): 0.00B


In [15]:
model2d = FNO2d(modes, modes, width).to(device)
# print(model2d.print_size())
memory.print("after intialization")

cuda allocated (after intialization): 3.55MiB (+3.55MiB)


In [16]:
################################################################
# training and evaluation
################################################################
optimizer = torch.optim.Adam(model2d.parameters(), lr=learning_rate, weight_decay=1e-4)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=iterations)

myloss = LpLoss(size_average=False)
for ep in tqdm(range(1), desc=" Epoch loop"):#epochs
    model2d.train()
    memory.print("after model.train()")
    t1 = default_timer()
    train_l2_step = 0
    train_l2_full = 0
    for xx, yy in train_loader:
        loss = 0
        xx = xx.to(device)
        yy = yy.to(device)
        memory.print("after loading first batch")

        for t in tqdm(range(0,T, tStep), desc="Train loop"):
            y = yy[..., t:t + tStep]
            im = model2d(xx)
            print("ouput:",y.shape,"pred:", im.shape)
            loss += myloss(im.reshape(batch_size, -1), y.reshape(batch_size, -1))

            if t == 0:
                pred = im
            else:
                pred = torch.cat((pred, im), -1)

            xx = torch.cat((xx[..., tStep:], im), dim=-1)

        train_l2_step += loss.item()
        l2_full = myloss(pred.reshape(batch_size, -1), yy.reshape(batch_size, -1))
        train_l2_full += l2_full.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        scheduler.step()
        memory.print("after backwardpass")

    val_l2_step = 0
    val_l2_full = 0
    with torch.no_grad():
        for xx, yy in val_loader:
            loss = 0
            xx = xx.to(device)
            yy = yy.to(device)

            for t in tqdm(range(0, T, tStep), desc="Validation loop"):
                y = yy[..., t:t + tStep]
                im = model2d(xx)
                loss += myloss(im.reshape(batch_size, -1), y.reshape(batch_size, -1))

                if t == 0:
                    pred = im
                else:
                    pred = torch.cat((pred, im), -1)

                xx = torch.cat((xx[..., tStep:], im), dim=-1)
                print(xx.shape)

            val_l2_step += loss.item()
            val_l2_full += myloss(pred.reshape(batch_size, -1), yy.reshape(batch_size, -1)).item()
            memory.print("after val first batch")

    t2 = default_timer()
    print(ep, t2 - t1, train_l2_step / ntrain / (T / tStep), train_l2_full / ntrain, val_l2_step / nval / (T / tStep),
          val_l2_full / nval)

 Epoch loop:   0%|          | 0/1 [00:00<?, ?it/s]

cuda allocated (after model.train()): 3.55MiB (no change)
cuda allocated (after loading first batch): 103.55MiB (+100MiB)



Train loop:   0%|          | 0/10 [00:00<?, ?it/s][A

cuda allocated (after p(x)): 281.68MiB (+178.12MiB)



Train loop:  10%|█         | 1/10 [00:00<00:03,  2.68it/s][A

cuda allocated (after FNO1): 1.25GiB (+1000.89MiB)
cuda allocated (after FNO2): 1.94GiB (+700.89MiB)
cuda allocated (after FNO3): 2.62GiB (+700.89MiB)
cuda allocated (after FNO4): 3.21GiB (+600.89MiB)
cuda allocated (after q(x)): 3.99GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  20%|██        | 2/10 [00:00<00:01,  4.42it/s][A

cuda allocated (after p(x)): 3.96GiB (-35.0MiB)
cuda allocated (after FNO1): 4.94GiB (+1000.89MiB)
cuda allocated (after FNO2): 5.62GiB (+700.89MiB)
cuda allocated (after FNO3): 6.31GiB (+700.89MiB)
cuda allocated (after FNO4): 6.89GiB (+600.89MiB)
cuda allocated (after q(x)): 7.68GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  30%|███       | 3/10 [00:00<00:01,  4.63it/s][A

cuda allocated (after p(x)): 7.65GiB (-30.0MiB)
cuda allocated (after FNO1): 8.63GiB (+1000.89MiB)
cuda allocated (after FNO2): 9.31GiB (+700.89MiB)
cuda allocated (after FNO3): 10.00GiB (+700.89MiB)
cuda allocated (after FNO4): 10.58GiB (+600.89MiB)
cuda allocated (after q(x)): 11.37GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  40%|████      | 4/10 [00:00<00:01,  4.72it/s][A

cuda allocated (after p(x)): 11.34GiB (-35.0MiB)
cuda allocated (after FNO1): 12.31GiB (+1000.89MiB)
cuda allocated (after FNO2): 13.00GiB (+700.89MiB)
cuda allocated (after FNO3): 13.68GiB (+700.89MiB)
cuda allocated (after FNO4): 14.27GiB (+600.89MiB)
cuda allocated (after q(x)): 15.05GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  50%|█████     | 5/10 [00:01<00:01,  4.80it/s][A

cuda allocated (after p(x)): 15.02GiB (-35.0MiB)
cuda allocated (after FNO1): 16.00GiB (+1000.89MiB)
cuda allocated (after FNO2): 16.68GiB (+700.89MiB)
cuda allocated (after FNO3): 17.37GiB (+700.89MiB)
cuda allocated (after FNO4): 17.95GiB (+600.89MiB)
cuda allocated (after q(x)): 18.74GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 18.71GiB (-35.0MiB)



Train loop:  60%|██████    | 6/10 [00:01<00:00,  4.85it/s][A

cuda allocated (after FNO1): 19.68GiB (+1000.89MiB)
cuda allocated (after FNO2): 20.37GiB (+700.89MiB)
cuda allocated (after FNO3): 21.05GiB (+700.89MiB)
cuda allocated (after FNO4): 21.64GiB (+600.89MiB)
cuda allocated (after q(x)): 22.42GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  70%|███████   | 7/10 [00:01<00:00,  4.28it/s][A

cuda allocated (after p(x)): 22.39GiB (-35.0MiB)
cuda allocated (after FNO1): 23.37GiB (+1000.89MiB)
cuda allocated (after FNO2): 24.05GiB (+700.89MiB)
cuda allocated (after FNO3): 24.74GiB (+700.89MiB)
cuda allocated (after FNO4): 25.32GiB (+600.89MiB)
cuda allocated (after q(x)): 26.11GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 26.08GiB (-35.0MiB)
cuda allocated (after FNO1): 27.05GiB (+1000.89MiB)



Train loop:  80%|████████  | 8/10 [00:01<00:00,  4.51it/s][A

cuda allocated (after FNO2): 27.74GiB (+700.89MiB)
cuda allocated (after FNO3): 28.42GiB (+700.89MiB)
cuda allocated (after FNO4): 29.01GiB (+600.89MiB)
cuda allocated (after q(x)): 29.79GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  90%|█████████ | 9/10 [00:02<00:00,  4.59it/s][A

cuda allocated (after p(x)): 29.76GiB (-35.0MiB)
cuda allocated (after FNO1): 30.74GiB (+1000.89MiB)
cuda allocated (after FNO2): 31.42GiB (+700.89MiB)
cuda allocated (after FNO3): 32.11GiB (+700.89MiB)
cuda allocated (after FNO4): 32.69GiB (+600.89MiB)
cuda allocated (after q(x)): 33.48GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop: 100%|██████████| 10/10 [00:02<00:00,  4.35it/s][A

cuda allocated (after p(x)): 33.45GiB (-35.0MiB)
cuda allocated (after FNO1): 34.42GiB (+1000.89MiB)
cuda allocated (after FNO2): 35.11GiB (+700.89MiB)
cuda allocated (after FNO3): 35.79GiB (+700.89MiB)
cuda allocated (after FNO4): 36.38GiB (+600.89MiB)
cuda allocated (after q(x)): 37.17GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])





cuda allocated (after backwardpass): 235.47MiB (-36.94GiB)
cuda allocated (after loading first batch): 285.47MiB (+50.0MiB)



Train loop:   0%|          | 0/10 [00:00<?, ?it/s][A

cuda allocated (after p(x)): 405.47MiB (+120MiB)
cuda allocated (after FNO1): 1.37GiB (+1000.89MiB)



Train loop:  10%|█         | 1/10 [00:00<00:01,  4.94it/s][A


cuda allocated (after FNO2): 2.06GiB (+700.89MiB)
cuda allocated (after FNO3): 2.74GiB (+700.89MiB)
cuda allocated (after FNO4): 3.33GiB (+600.89MiB)
cuda allocated (after q(x)): 4.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 4.03GiB (-90.0MiB)
cuda allocated (after FNO1): 5.00GiB (+1000.89MiB)
cuda allocated (after FNO2): 5.69GiB (+700.89MiB)
cuda allocated (after FNO3): 6.37GiB (+700.89MiB)
cuda allocated (after FNO4): 6.96GiB (+600.89MiB)
cuda allocated (after q(x)): 7.75GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])


Train loop:  20%|██        | 2/10 [00:00<00:01,  5.03it/s][A
Train loop:  30%|███       | 3/10 [00:00<00:01,  4.11it/s][A

cuda allocated (after p(x)): 7.72GiB (-30.0MiB)
cuda allocated (after FNO1): 8.69GiB (+1000.89MiB)
cuda allocated (after FNO2): 9.38GiB (+700.89MiB)
cuda allocated (after FNO3): 10.06GiB (+700.89MiB)
cuda allocated (after FNO4): 10.65GiB (+600.89MiB)
cuda allocated (after q(x)): 11.44GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  40%|████      | 4/10 [00:00<00:01,  4.39it/s][A

cuda allocated (after p(x)): 11.40GiB (-35.0MiB)
cuda allocated (after FNO1): 12.38GiB (+1000.89MiB)
cuda allocated (after FNO2): 13.06GiB (+700.89MiB)
cuda allocated (after FNO3): 13.75GiB (+700.89MiB)
cuda allocated (after FNO4): 14.34GiB (+600.89MiB)
cuda allocated (after q(x)): 15.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  50%|█████     | 5/10 [00:01<00:01,  4.58it/s][A

cuda allocated (after p(x)): 15.09GiB (-35.0MiB)
cuda allocated (after FNO1): 16.06GiB (+1000.89MiB)
cuda allocated (after FNO2): 16.75GiB (+700.89MiB)
cuda allocated (after FNO3): 17.43GiB (+700.89MiB)
cuda allocated (after FNO4): 18.02GiB (+600.89MiB)
cuda allocated (after q(x)): 18.81GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 18.77GiB (-35.0MiB)



Train loop:  60%|██████    | 6/10 [00:01<00:00,  4.11it/s][A

cuda allocated (after FNO1): 19.75GiB (+1000.89MiB)
cuda allocated (after FNO2): 20.43GiB (+700.89MiB)
cuda allocated (after FNO3): 21.12GiB (+700.89MiB)
cuda allocated (after FNO4): 21.71GiB (+600.89MiB)
cuda allocated (after q(x)): 22.49GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 22.46GiB (-35.0MiB)
cuda allocated (after FNO1): 23.44GiB (+1000.89MiB)



Train loop:  70%|███████   | 7/10 [00:01<00:00,  4.39it/s][A

cuda allocated (after FNO2): 24.12GiB (+700.89MiB)
cuda allocated (after FNO3): 24.80GiB (+700.89MiB)
cuda allocated (after FNO4): 25.39GiB (+600.89MiB)
cuda allocated (after q(x)): 26.18GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 26.14GiB (-35.0MiB)



Train loop:  80%|████████  | 8/10 [00:01<00:00,  4.55it/s][A

cuda allocated (after FNO1): 27.12GiB (+1000.89MiB)
cuda allocated (after FNO2): 27.80GiB (+700.89MiB)
cuda allocated (after FNO3): 28.49GiB (+700.89MiB)
cuda allocated (after FNO4): 29.08GiB (+600.89MiB)
cuda allocated (after q(x)): 29.86GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  90%|█████████ | 9/10 [00:01<00:00,  4.66it/s][A

cuda allocated (after p(x)): 29.83GiB (-35.0MiB)
cuda allocated (after FNO1): 30.81GiB (+1000.89MiB)
cuda allocated (after FNO2): 31.49GiB (+700.89MiB)
cuda allocated (after FNO3): 32.17GiB (+700.89MiB)
cuda allocated (after FNO4): 32.76GiB (+600.89MiB)
cuda allocated (after q(x)): 33.55GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop: 100%|██████████| 10/10 [00:02<00:00,  4.54it/s][A

cuda allocated (after p(x)): 33.51GiB (-35.0MiB)
cuda allocated (after FNO1): 34.49GiB (+1000.89MiB)
cuda allocated (after FNO2): 35.17GiB (+700.89MiB)
cuda allocated (after FNO3): 35.86GiB (+700.89MiB)
cuda allocated (after FNO4): 36.45GiB (+600.89MiB)
cuda allocated (after q(x)): 37.23GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])





cuda allocated (after backwardpass): 235.47MiB (-37.0GiB)
cuda allocated (after loading first batch): 285.47MiB (+50.0MiB)



Train loop:   0%|          | 0/10 [00:00<?, ?it/s][A
Train loop:  10%|█         | 1/10 [00:00<00:01,  4.59it/s][A

cuda allocated (after p(x)): 405.47MiB (+120MiB)
cuda allocated (after FNO1): 1.37GiB (+1000.89MiB)
cuda allocated (after FNO2): 2.06GiB (+700.89MiB)
cuda allocated (after FNO3): 2.74GiB (+700.89MiB)
cuda allocated (after FNO4): 3.33GiB (+600.89MiB)
cuda allocated (after q(x)): 4.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  20%|██        | 2/10 [00:00<00:02,  3.97it/s][A

cuda allocated (after p(x)): 4.03GiB (-90.0MiB)
cuda allocated (after FNO1): 5.00GiB (+1000.89MiB)
cuda allocated (after FNO2): 5.69GiB (+700.89MiB)
cuda allocated (after FNO3): 6.37GiB (+700.89MiB)
cuda allocated (after FNO4): 6.96GiB (+600.89MiB)
cuda allocated (after q(x)): 7.75GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  30%|███       | 3/10 [00:00<00:01,  4.27it/s][A

cuda allocated (after p(x)): 7.72GiB (-30.0MiB)
cuda allocated (after FNO1): 8.69GiB (+1000.89MiB)
cuda allocated (after FNO2): 9.38GiB (+700.89MiB)
cuda allocated (after FNO3): 10.06GiB (+700.89MiB)
cuda allocated (after FNO4): 10.65GiB (+600.89MiB)
cuda allocated (after q(x)): 11.44GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  40%|████      | 4/10 [00:00<00:01,  3.91it/s][A

cuda allocated (after p(x)): 11.40GiB (-35.0MiB)
cuda allocated (after FNO1): 12.38GiB (+1000.89MiB)
cuda allocated (after FNO2): 13.06GiB (+700.89MiB)
cuda allocated (after FNO3): 13.75GiB (+700.89MiB)
cuda allocated (after FNO4): 14.34GiB (+600.89MiB)
cuda allocated (after q(x)): 15.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  50%|█████     | 5/10 [00:01<00:01,  3.72it/s][A

cuda allocated (after p(x)): 15.09GiB (-35.0MiB)
cuda allocated (after FNO1): 16.06GiB (+1000.89MiB)
cuda allocated (after FNO2): 16.75GiB (+700.89MiB)
cuda allocated (after FNO3): 17.43GiB (+700.89MiB)
cuda allocated (after FNO4): 18.02GiB (+600.89MiB)
cuda allocated (after q(x)): 18.81GiB (+805.88MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  60%|██████    | 6/10 [00:01<00:00,  4.02it/s][A

cuda allocated (after p(x)): 18.77GiB (-35.0MiB)
cuda allocated (after FNO1): 19.75GiB (+1000.89MiB)
cuda allocated (after FNO2): 20.44GiB (+700.89MiB)
cuda allocated (after FNO3): 21.12GiB (+700.89MiB)
cuda allocated (after FNO4): 21.71GiB (+600.89MiB)
cuda allocated (after q(x)): 22.49GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 22.46GiB (-35.0MiB)
cuda allocated (after FNO1): 23.44GiB (+1000.89MiB)



Train loop:  70%|███████   | 7/10 [00:01<00:00,  4.27it/s][A

cuda allocated (after FNO2): 24.12GiB (+700.89MiB)
cuda allocated (after FNO3): 24.80GiB (+700.89MiB)
cuda allocated (after FNO4): 25.39GiB (+600.89MiB)
cuda allocated (after q(x)): 26.18GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  80%|████████  | 8/10 [00:01<00:00,  3.96it/s][A

cuda allocated (after p(x)): 26.14GiB (-35.0MiB)
cuda allocated (after FNO1): 27.12GiB (+1000.89MiB)
cuda allocated (after FNO2): 27.81GiB (+700.89MiB)
cuda allocated (after FNO3): 28.49GiB (+700.89MiB)
cuda allocated (after FNO4): 29.08GiB (+600.89MiB)
cuda allocated (after q(x)): 29.86GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 29.83GiB (-35.87MiB)
cuda allocated (after FNO1): 30.81GiB (+1000.89MiB)



Train loop:  90%|█████████ | 9/10 [00:02<00:00,  4.26it/s][A

cuda allocated (after FNO2): 31.49GiB (+700.89MiB)
cuda allocated (after FNO3): 32.17GiB (+700.89MiB)
cuda allocated (after FNO4): 32.76GiB (+600.89MiB)
cuda allocated (after q(x)): 33.55GiB (+805.88MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 33.51GiB (-35.0MiB)
cuda allocated (after FNO1): 34.49GiB (+1000.89MiB)
cuda allocated (after FNO2): 35.18GiB (+700.89MiB)
cuda allocated (after FNO3): 35.86GiB (+700.89MiB)



Train loop: 100%|██████████| 10/10 [00:02<00:00,  4.18it/s][A

cuda allocated (after FNO4): 36.45GiB (+600.89MiB)
cuda allocated (after q(x)): 37.23GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])





cuda allocated (after backwardpass): 235.47MiB (-37.0GiB)
cuda allocated (after loading first batch): 285.47MiB (+50.0MiB)



Train loop:   0%|          | 0/10 [00:00<?, ?it/s][A

cuda allocated (after p(x)): 405.47MiB (+120MiB)
cuda allocated (after FNO1): 1.37GiB (+1000.89MiB)
cuda allocated (after FNO2): 2.06GiB (+700.89MiB)



Train loop:  10%|█         | 1/10 [00:00<00:02,  3.64it/s][A

cuda allocated (after FNO3): 2.74GiB (+700.89MiB)
cuda allocated (after FNO4): 3.33GiB (+600.89MiB)
cuda allocated (after q(x)): 4.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 4.03GiB (-90.0MiB)
cuda allocated (after FNO1): 5.00GiB (+1000.89MiB)
cuda allocated (after FNO2): 5.69GiB (+700.89MiB)



Train loop:  20%|██        | 2/10 [00:00<00:01,  4.31it/s][A

cuda allocated (after FNO3): 6.37GiB (+700.89MiB)
cuda allocated (after FNO4): 6.96GiB (+600.89MiB)
cuda allocated (after q(x)): 7.75GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  30%|███       | 3/10 [00:00<00:01,  4.46it/s][A

cuda allocated (after p(x)): 7.72GiB (-30.0MiB)
cuda allocated (after FNO1): 8.69GiB (+1000.89MiB)
cuda allocated (after FNO2): 9.38GiB (+700.89MiB)
cuda allocated (after FNO3): 10.06GiB (+700.89MiB)
cuda allocated (after FNO4): 10.65GiB (+600.89MiB)
cuda allocated (after q(x)): 11.44GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  40%|████      | 4/10 [00:00<00:01,  4.02it/s][A

cuda allocated (after p(x)): 11.40GiB (-35.0MiB)
cuda allocated (after FNO1): 12.38GiB (+1000.89MiB)
cuda allocated (after FNO2): 13.06GiB (+700.89MiB)
cuda allocated (after FNO3): 13.75GiB (+700.89MiB)
cuda allocated (after FNO4): 14.34GiB (+600.89MiB)
cuda allocated (after q(x)): 15.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 15.09GiB (-34.12MiB)



Train loop:  50%|█████     | 5/10 [00:01<00:01,  4.31it/s][A

cuda allocated (after FNO1): 16.07GiB (+1000.89MiB)
cuda allocated (after FNO2): 16.75GiB (+700.89MiB)
cuda allocated (after FNO3): 17.43GiB (+700.89MiB)
cuda allocated (after FNO4): 18.02GiB (+600.89MiB)
cuda allocated (after q(x)): 18.81GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 18.77GiB (-35.0MiB)
cuda allocated (after FNO1): 19.75GiB (+1000.89MiB)
cuda allocated (after FNO2): 20.44GiB (+700.89MiB)



Train loop:  60%|██████    | 6/10 [00:01<00:00,  4.51it/s][A
Train loop:  70%|███████   | 7/10 [00:01<00:00,  4.69it/s][A

cuda allocated (after FNO3): 21.12GiB (+700.89MiB)
cuda allocated (after FNO4): 21.71GiB (+600.89MiB)
cuda allocated (after q(x)): 22.49GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 22.46GiB (-35.0MiB)
cuda allocated (after FNO1): 23.44GiB (+1000.89MiB)
cuda allocated (after FNO2): 24.12GiB (+700.89MiB)
cuda allocated (after FNO3): 24.80GiB (+700.89MiB)
cuda allocated (after FNO4): 25.39GiB (+600.89MiB)
cuda allocated (after q(x)): 26.18GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  80%|████████  | 8/10 [00:01<00:00,  4.70it/s][A

cuda allocated (after p(x)): 26.14GiB (-35.0MiB)
cuda allocated (after FNO1): 27.12GiB (+1000.89MiB)
cuda allocated (after FNO2): 27.81GiB (+700.89MiB)
cuda allocated (after FNO3): 28.49GiB (+700.89MiB)
cuda allocated (after FNO4): 29.08GiB (+600.89MiB)
cuda allocated (after q(x)): 29.86GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 29.83GiB (-35.0MiB)
cuda allocated (after FNO1): 30.81GiB (+1000.89MiB)



Train loop:  90%|█████████ | 9/10 [00:01<00:00,  4.82it/s][A

cuda allocated (after FNO2): 31.49GiB (+700.89MiB)
cuda allocated (after FNO3): 32.18GiB (+700.89MiB)
cuda allocated (after FNO4): 32.76GiB (+600.89MiB)
cuda allocated (after q(x)): 33.55GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop: 100%|██████████| 10/10 [00:02<00:00,  4.39it/s][A

cuda allocated (after p(x)): 33.51GiB (-35.0MiB)
cuda allocated (after FNO1): 34.49GiB (+1000.89MiB)
cuda allocated (after FNO2): 35.18GiB (+700.89MiB)
cuda allocated (after FNO3): 35.86GiB (+700.89MiB)
cuda allocated (after FNO4): 36.45GiB (+600.89MiB)
cuda allocated (after q(x)): 37.23GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])





cuda allocated (after backwardpass): 235.47MiB (-37.0GiB)
cuda allocated (after loading first batch): 285.47MiB (+50.0MiB)



Train loop:   0%|          | 0/10 [00:00<?, ?it/s][A
Train loop:  10%|█         | 1/10 [00:00<00:01,  5.20it/s][A

cuda allocated (after p(x)): 405.47MiB (+120MiB)
cuda allocated (after FNO1): 1.37GiB (+1000.89MiB)
cuda allocated (after FNO2): 2.06GiB (+700.89MiB)
cuda allocated (after FNO3): 2.74GiB (+700.89MiB)
cuda allocated (after FNO4): 3.33GiB (+600.89MiB)
cuda allocated (after q(x)): 4.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  20%|██        | 2/10 [00:00<00:01,  4.99it/s][A

cuda allocated (after p(x)): 4.03GiB (-90.0MiB)
cuda allocated (after FNO1): 5.00GiB (+1000.89MiB)
cuda allocated (after FNO2): 5.69GiB (+700.89MiB)
cuda allocated (after FNO3): 6.37GiB (+700.89MiB)
cuda allocated (after FNO4): 6.96GiB (+600.89MiB)
cuda allocated (after q(x)): 7.75GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 7.72GiB (-29.12MiB)
cuda allocated (after FNO1): 8.70GiB (+1000.89MiB)
cuda allocated (after FNO2): 9.38GiB (+700.89MiB)



Train loop:  30%|███       | 3/10 [00:00<00:01,  5.00it/s][A

cuda allocated (after FNO3): 10.06GiB (+700.89MiB)
cuda allocated (after FNO4): 10.65GiB (+600.89MiB)
cuda allocated (after q(x)): 11.44GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])
cuda allocated (after p(x)): 11.40GiB (-35.87MiB)
cuda allocated (after FNO1): 12.38GiB (+1000.89MiB)
cuda allocated (after FNO2): 13.06GiB (+700.89MiB)
cuda allocated (after FNO3): 13.75GiB (+700.89MiB)
cuda allocated (after FNO4): 14.34GiB (+600.89MiB)



Train loop:  40%|████      | 4/10 [00:00<00:01,  4.95it/s][A

cuda allocated (after q(x)): 15.12GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  50%|█████     | 5/10 [00:01<00:01,  4.29it/s][A

cuda allocated (after p(x)): 15.09GiB (-35.0MiB)
cuda allocated (after FNO1): 16.06GiB (+1000.89MiB)
cuda allocated (after FNO2): 16.75GiB (+700.89MiB)
cuda allocated (after FNO3): 17.43GiB (+700.89MiB)
cuda allocated (after FNO4): 18.02GiB (+600.89MiB)
cuda allocated (after q(x)): 18.81GiB (+805.88MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  60%|██████    | 6/10 [00:01<00:00,  4.48it/s][A

cuda allocated (after p(x)): 18.77GiB (-35.0MiB)
cuda allocated (after FNO1): 19.75GiB (+1000.89MiB)
cuda allocated (after FNO2): 20.44GiB (+700.89MiB)
cuda allocated (after FNO3): 21.12GiB (+700.89MiB)
cuda allocated (after FNO4): 21.71GiB (+600.89MiB)
cuda allocated (after q(x)): 22.49GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  70%|███████   | 7/10 [00:01<00:00,  4.61it/s][A

cuda allocated (after p(x)): 22.46GiB (-35.0MiB)
cuda allocated (after FNO1): 23.44GiB (+1000.89MiB)
cuda allocated (after FNO2): 24.12GiB (+700.89MiB)
cuda allocated (after FNO3): 24.80GiB (+700.89MiB)
cuda allocated (after FNO4): 25.39GiB (+600.89MiB)
cuda allocated (after q(x)): 26.18GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  80%|████████  | 8/10 [00:01<00:00,  4.14it/s][A

cuda allocated (after p(x)): 26.14GiB (-35.0MiB)
cuda allocated (after FNO1): 27.12GiB (+1000.89MiB)
cuda allocated (after FNO2): 27.81GiB (+700.89MiB)
cuda allocated (after FNO3): 28.49GiB (+700.89MiB)
cuda allocated (after FNO4): 29.08GiB (+600.89MiB)
cuda allocated (after q(x)): 29.86GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop:  90%|█████████ | 9/10 [00:01<00:00,  4.36it/s][A

cuda allocated (after p(x)): 29.83GiB (-35.87MiB)
cuda allocated (after FNO1): 30.81GiB (+1000.89MiB)
cuda allocated (after FNO2): 31.49GiB (+700.89MiB)
cuda allocated (after FNO3): 32.17GiB (+700.89MiB)
cuda allocated (after FNO4): 32.76GiB (+600.89MiB)
cuda allocated (after q(x)): 33.55GiB (+805.88MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])



Train loop: 100%|██████████| 10/10 [00:02<00:00,  4.38it/s][A

cuda allocated (after p(x)): 33.51GiB (-35.0MiB)
cuda allocated (after FNO1): 34.49GiB (+1000.89MiB)
cuda allocated (after FNO2): 35.18GiB (+700.89MiB)
cuda allocated (after FNO3): 35.86GiB (+700.89MiB)
cuda allocated (after FNO4): 36.45GiB (+600.89MiB)
cuda allocated (after q(x)): 37.23GiB (+805MiB)
ouput: torch.Size([20, 1024, 64, 1]) pred: torch.Size([20, 1024, 64, 1])





cuda allocated (after backwardpass): 235.47MiB (-37.0GiB)



Validation loop:   0%|          | 0/10 [00:00<?, ?it/s][A
Validation loop:  10%|█         | 1/10 [00:00<00:01,  5.20it/s][A

cuda allocated (after p(x)): 345.47MiB (+110.0MiB)
cuda allocated (after FNO1): 545.47MiB (+200MiB)
cuda allocated (after FNO2): 545.47MiB (no change)
cuda allocated (after FNO3): 545.47MiB (no change)
cuda allocated (after FNO4): 545.47MiB (no change)
cuda allocated (after q(x)): 450.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 295.47MiB (-155.0MiB)



Validation loop:  20%|██        | 2/10 [00:00<00:01,  5.05it/s][A

cuda allocated (after FNO1): 495.47MiB (+200MiB)
cuda allocated (after FNO2): 495.47MiB (no change)
cuda allocated (after FNO3): 495.47MiB (no change)
cuda allocated (after FNO4): 495.47MiB (no change)
cuda allocated (after q(x)): 400.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 305.47MiB (-95MiB)
cuda allocated (after FNO1): 505.47MiB (+200MiB)
cuda allocated (after FNO2): 505.47MiB (no change)
cuda allocated (after FNO3): 505.47MiB (no change)
cuda allocated (after FNO4): 505.47MiB (no change)
cuda allocated (after q(x)): 410.47MiB (-95MiB)



Validation loop:  30%|███       | 3/10 [00:00<00:01,  5.05it/s][A

torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 310.47MiB (-100MiB)
cuda allocated (after FNO1): 510.47MiB (+200MiB)
cuda allocated (after FNO2): 510.47MiB (no change)
cuda allocated (after FNO3): 510.47MiB (no change)
cuda allocated (after FNO4): 510.47MiB (no change)



Validation loop:  40%|████      | 4/10 [00:00<00:01,  5.01it/s][A
Validation loop:  50%|█████     | 5/10 [00:00<00:00,  5.06it/s][A

cuda allocated (after q(x)): 415.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 315.47MiB (-100MiB)
cuda allocated (after FNO1): 515.47MiB (+200MiB)
cuda allocated (after FNO2): 515.47MiB (no change)
cuda allocated (after FNO3): 515.47MiB (no change)
cuda allocated (after FNO4): 515.47MiB (no change)
cuda allocated (after q(x)): 420.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  60%|██████    | 6/10 [00:01<00:00,  4.98it/s][A

cuda allocated (after p(x)): 320.47MiB (-100MiB)
cuda allocated (after FNO1): 520.47MiB (+200MiB)
cuda allocated (after FNO2): 520.47MiB (no change)
cuda allocated (after FNO3): 520.47MiB (no change)
cuda allocated (after FNO4): 520.47MiB (no change)
cuda allocated (after q(x)): 425.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  70%|███████   | 7/10 [00:01<00:00,  4.33it/s][A

cuda allocated (after p(x)): 325.47MiB (-100MiB)
cuda allocated (after FNO1): 525.47MiB (+200MiB)
cuda allocated (after FNO2): 525.47MiB (no change)
cuda allocated (after FNO3): 525.47MiB (no change)
cuda allocated (after FNO4): 525.47MiB (no change)
cuda allocated (after q(x)): 430.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  80%|████████  | 8/10 [00:01<00:00,  4.54it/s][A

cuda allocated (after p(x)): 330.47MiB (-100MiB)
cuda allocated (after FNO1): 530.47MiB (+200MiB)
cuda allocated (after FNO2): 530.47MiB (no change)
cuda allocated (after FNO3): 530.47MiB (no change)
cuda allocated (after FNO4): 530.47MiB (no change)
cuda allocated (after q(x)): 435.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  90%|█████████ | 9/10 [00:01<00:00,  4.60it/s][A

cuda allocated (after p(x)): 335.47MiB (-100MiB)
cuda allocated (after FNO1): 535.47MiB (+200MiB)
cuda allocated (after FNO2): 535.47MiB (no change)
cuda allocated (after FNO3): 535.47MiB (no change)
cuda allocated (after FNO4): 535.47MiB (no change)
cuda allocated (after q(x)): 440.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 340.47MiB (-100MiB)
cuda allocated (after FNO1): 540.47MiB (+200MiB)
cuda allocated (after FNO2): 540.47MiB (no change)
cuda allocated (after FNO3): 540.47MiB (no change)



Validation loop: 100%|██████████| 10/10 [00:02<00:00,  4.77it/s][A


cuda allocated (after FNO4): 540.47MiB (no change)
cuda allocated (after q(x)): 445.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after val first batch): 235.47MiB (-210MiB)



Validation loop:   0%|          | 0/10 [00:00<?, ?it/s][A
Validation loop:  10%|█         | 1/10 [00:00<00:01,  5.11it/s][A

cuda allocated (after p(x)): 345.47MiB (+110.0MiB)
cuda allocated (after FNO1): 545.47MiB (+200MiB)
cuda allocated (after FNO2): 545.47MiB (no change)
cuda allocated (after FNO3): 545.47MiB (no change)
cuda allocated (after FNO4): 545.47MiB (no change)
cuda allocated (after q(x)): 450.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  20%|██        | 2/10 [00:00<00:01,  4.87it/s][A

cuda allocated (after p(x)): 295.47MiB (-155.0MiB)
cuda allocated (after FNO1): 495.47MiB (+200MiB)
cuda allocated (after FNO2): 495.47MiB (no change)
cuda allocated (after FNO3): 495.47MiB (no change)
cuda allocated (after FNO4): 495.47MiB (no change)
cuda allocated (after q(x)): 400.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  30%|███       | 3/10 [00:00<00:01,  4.14it/s][A

cuda allocated (after p(x)): 305.47MiB (-95MiB)
cuda allocated (after FNO1): 505.47MiB (+200MiB)
cuda allocated (after FNO2): 505.47MiB (no change)
cuda allocated (after FNO3): 505.47MiB (no change)
cuda allocated (after FNO4): 505.47MiB (no change)
cuda allocated (after q(x)): 410.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  40%|████      | 4/10 [00:00<00:01,  4.40it/s][A

cuda allocated (after p(x)): 310.47MiB (-100MiB)
cuda allocated (after FNO1): 510.47MiB (+200MiB)
cuda allocated (after FNO2): 510.47MiB (no change)
cuda allocated (after FNO3): 510.47MiB (no change)
cuda allocated (after FNO4): 510.47MiB (no change)
cuda allocated (after q(x)): 415.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])



Validation loop:  50%|█████     | 5/10 [00:01<00:01,  4.01it/s][A

cuda allocated (after p(x)): 315.47MiB (-100MiB)
cuda allocated (after FNO1): 515.47MiB (+200MiB)
cuda allocated (after FNO2): 515.47MiB (no change)
cuda allocated (after FNO3): 515.47MiB (no change)
cuda allocated (after FNO4): 515.47MiB (no change)
cuda allocated (after q(x)): 420.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 320.47MiB (-100MiB)



Validation loop:  60%|██████    | 6/10 [00:01<00:00,  4.30it/s][A

cuda allocated (after FNO1): 520.47MiB (+200MiB)
cuda allocated (after FNO2): 520.47MiB (no change)
cuda allocated (after FNO3): 520.47MiB (no change)
cuda allocated (after FNO4): 520.47MiB (no change)
cuda allocated (after q(x)): 425.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 325.47MiB (-100MiB)
cuda allocated (after FNO1): 525.47MiB (+200MiB)



Validation loop:  70%|███████   | 7/10 [00:01<00:00,  4.50it/s][A

cuda allocated (after FNO2): 525.47MiB (no change)
cuda allocated (after FNO3): 525.47MiB (no change)
cuda allocated (after FNO4): 525.47MiB (no change)
cuda allocated (after q(x)): 430.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 330.47MiB (-100MiB)
cuda allocated (after FNO1): 530.47MiB (+200MiB)
cuda allocated (after FNO2): 530.47MiB (no change)



Validation loop:  80%|████████  | 8/10 [00:01<00:00,  4.65it/s][A

cuda allocated (after FNO3): 530.47MiB (no change)
cuda allocated (after FNO4): 530.47MiB (no change)
cuda allocated (after q(x)): 435.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 335.47MiB (-100MiB)
cuda allocated (after FNO1): 535.47MiB (+200MiB)



Validation loop:  90%|█████████ | 9/10 [00:01<00:00,  4.72it/s][A

cuda allocated (after FNO2): 535.47MiB (no change)
cuda allocated (after FNO3): 535.47MiB (no change)
cuda allocated (after FNO4): 535.47MiB (no change)
cuda allocated (after q(x)): 440.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after p(x)): 340.47MiB (-100MiB)
cuda allocated (after FNO1): 540.47MiB (+200MiB)



Validation loop: 100%|██████████| 10/10 [00:02<00:00,  4.55it/s][A

cuda allocated (after FNO2): 540.47MiB (no change)
cuda allocated (after FNO3): 540.47MiB (no change)
cuda allocated (after FNO4): 540.47MiB (no change)
cuda allocated (after q(x)): 445.47MiB (-95MiB)
torch.Size([20, 1024, 64, 10])
cuda allocated (after val first batch): 235.47MiB (-210MiB)




Validation loop:   0%|          | 0/10 [00:00<?, ?it/s][A
Validation loop:  10%|█         | 1/10 [00:00<00:02,  3.48it/s][A

cuda allocated (after p(x)): 240.47MiB (+5.0MiB)
cuda allocated (after FNO1): 340.47MiB (+100MiB)
cuda allocated (after FNO2): 340.47MiB (no change)
cuda allocated (after FNO3): 340.47MiB (no change)
cuda allocated (after FNO4): 340.47MiB (no change)
cuda allocated (after q(x)): 292.97MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])
cuda allocated (after p(x)): 187.97MiB (-105.0MiB)



Validation loop:  20%|██        | 2/10 [00:00<00:01,  4.32it/s][A

cuda allocated (after FNO1): 287.97MiB (+100MiB)
cuda allocated (after FNO2): 287.97MiB (no change)
cuda allocated (after FNO3): 287.97MiB (no change)
cuda allocated (after FNO4): 287.97MiB (no change)
cuda allocated (after q(x)): 240.47MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])
cuda allocated (after p(x)): 192.97MiB (-47.5MiB)
cuda allocated (after FNO1): 292.97MiB (+100MiB)



Validation loop:  30%|███       | 3/10 [00:00<00:01,  4.57it/s][A

cuda allocated (after FNO2): 292.97MiB (no change)
cuda allocated (after FNO3): 292.97MiB (no change)
cuda allocated (after FNO4): 292.97MiB (no change)
cuda allocated (after q(x)): 245.47MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])
cuda allocated (after p(x)): 195.47MiB (-50MiB)



Validation loop:  40%|████      | 4/10 [00:00<00:01,  4.71it/s][A

cuda allocated (after FNO1): 295.47MiB (+100MiB)
cuda allocated (after FNO2): 295.47MiB (no change)
cuda allocated (after FNO3): 295.47MiB (no change)
cuda allocated (after FNO4): 295.47MiB (no change)
cuda allocated (after q(x)): 247.97MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])
cuda allocated (after p(x)): 197.97MiB (-50MiB)



Validation loop:  50%|█████     | 5/10 [00:01<00:01,  4.14it/s][A

cuda allocated (after FNO1): 297.97MiB (+100MiB)
cuda allocated (after FNO2): 297.97MiB (no change)
cuda allocated (after FNO3): 297.97MiB (no change)
cuda allocated (after FNO4): 297.97MiB (no change)
cuda allocated (after q(x)): 250.47MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])



Validation loop:  60%|██████    | 6/10 [00:01<00:00,  4.36it/s][A

cuda allocated (after p(x)): 200.47MiB (-50MiB)
cuda allocated (after FNO1): 300.47MiB (+100MiB)
cuda allocated (after FNO2): 300.47MiB (no change)
cuda allocated (after FNO3): 300.47MiB (no change)
cuda allocated (after FNO4): 300.47MiB (no change)
cuda allocated (after q(x)): 252.97MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])



Validation loop:  70%|███████   | 7/10 [00:01<00:00,  4.55it/s][A

cuda allocated (after p(x)): 203.84MiB (-49.12MiB)
cuda allocated (after FNO1): 303.84MiB (+100MiB)
cuda allocated (after FNO2): 303.84MiB (no change)
cuda allocated (after FNO3): 303.84MiB (no change)
cuda allocated (after FNO4): 303.84MiB (no change)
cuda allocated (after q(x)): 256.34MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])



Validation loop:  80%|████████  | 8/10 [00:01<00:00,  4.64it/s][A

cuda allocated (after p(x)): 205.47MiB (-50.88MiB)
cuda allocated (after FNO1): 305.47MiB (+100MiB)
cuda allocated (after FNO2): 305.47MiB (no change)
cuda allocated (after FNO3): 305.47MiB (no change)
cuda allocated (after FNO4): 305.47MiB (no change)
cuda allocated (after q(x)): 257.97MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])



Validation loop:  90%|█████████ | 9/10 [00:02<00:00,  4.21it/s][A

cuda allocated (after p(x)): 208.84MiB (-49.12MiB)
cuda allocated (after FNO1): 308.84MiB (+100MiB)
cuda allocated (after FNO2): 308.84MiB (no change)
cuda allocated (after FNO3): 308.84MiB (no change)
cuda allocated (after FNO4): 308.84MiB (no change)
cuda allocated (after q(x)): 261.34MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])
cuda allocated (after p(x)): 210.47MiB (-50.88MiB)



Validation loop: 100%|██████████| 10/10 [00:02<00:00,  4.39it/s][A
 Epoch loop: 100%|██████████| 1/1 [00:39<00:00, 39.15s/it]

cuda allocated (after FNO1): 310.47MiB (+100MiB)
cuda allocated (after FNO2): 310.47MiB (no change)
cuda allocated (after FNO3): 310.47MiB (no change)
cuda allocated (after FNO4): 310.47MiB (no change)
cuda allocated (after q(x)): 262.97MiB (-47.5MiB)
torch.Size([10, 1024, 64, 10])
cuda allocated (after val first batch): 157.97MiB (-105MiB)
0 39.14516788022593 0.9505900421142577 0.950590991973877 1.1083130798339844 1.1083130645751953





In [23]:
memory.print("after one epoch")

cuda allocated (after one epoch): 157.97MiB (no change)


# Inference Plotting

In [None]:

row = 1
col = 5
i = np.arange(0,64,1)
t = t_start + 1
write_no, iteration, sim_time, time_step, wall_time = rbc_scales('multi', file1, 0, t)
print(k,t,write_no, iteration, sim_time, time_step, wall_time)
print('-------------------------------')
fig = plt.figure(figsize=(16, 10))
ax1 = fig.add_subplot(row, col, 1)
ax1.set_title('Input u(x,z)_{t+1} vs model_{t}')
ax1.plot(x1[:,i,0],color='b',label="u_x")
ax1.plot(x1[:,i,1],color='b',label="u_z")
ax1.plot(z1[:,i,0],'.',color='g',label="u_x")
ax1.plot(z1[:,i,1],'.',color='g',label="u_z")
ax1.grid()

ax2 = fig.add_subplot(row, col, 2)
ax2.set_title(f'Velocity x-component at t={np.round(sim_time,2)}')
ax2.plot(y[:,i,0],color='b',label="Dedalus")
ax2.plot(z[:,i,0],'.',color='g',label="FNO")
ax2.grid()


ax3 = fig.add_subplot(row, col, 3)
ax3.set_title(f'Velocity z-component at t={np.round(sim_time,2)}')
ax3.plot(y[:,i,1],color='b',label="Dedalus")
ax3.plot(z[:,i,1],'.',color='g',label="FNO")
ax3.grid()

ax4 = fig.add_subplot(row, col, 4)
ax4.set_title(f'Buoyancy at t={np.round(sim_time,2)}')
ax4.plot(y[:,i,2],color='b',label="Dedalus")
ax4.plot(z[:,i,2],'.',color='g',label="FNO")
ax4.grid()

ax5 = fig.add_subplot(row, col, 5)
ax5.set_title(f'Pressure at t={np.round(sim_time,2)}')
ax5.plot(y[:,i,3],color='b', label="Dedalus")
ax5.plot(z[:,i,3],'.',color='g', label="FNO")
ax5.grid()


fig.suptitle(f'RBC-2D with {sx}'+r'$\times$'+f'{sz} grid and $Ra=10^4, Pr=0.8$ with inputs at $t={np.round(sim_time1,2)}$',x=0.6, y =0.98)#\n *(FNO=green["."],Dedalus=["-"])')
ded_patch = Line2D([0], [0], label='Dedalus', color='b')
fno_patch = Line2D([0], [0], label='FNO',marker='.', color='g')
fig.legend(handles=[ded_patch, fno_patch], loc="upper right")
fig.tight_layout()
fig.show()