## 1. Set up the experiment

### 1-1. Import modules

In [1]:
import  os, time
from    pathlib             import  Path
from    tqdm.notebook       import  tqdm
import  pickle
import  yaml

import  numpy       as  np
import  torch
from    torch       import  nn
from    torch.utils.data            import  TensorDataset, DataLoader

from    custom_modules.utils                import  get_time_str
from    custom_modules.utils                import  GridGenerator, npzReader, GaussianNormalizer
from    custom_modules.pytorch.neuralop     import  DeepONetUnstructured  as  DeepONet
from    custom_modules.pytorch.torch_utils  import  count_parameters
from    custom_modules.pytorch.gradients    import  *


time_str = get_time_str()

### 1-2. Load the configurations

In [2]:
time_str = get_time_str()
with open("config_train.yaml") as f:
    config      = yaml.load(f, Loader = yaml.FullLoader)
    _exp        = config['experiment']
    _data       = config['pde_dataset']
    _deeponet   = config['deeponet']

### 1-3. Set the experiment

In [3]:
# NOTE Training and data preprocess


BATCH_SIZE      = _exp['batch_size']
NUM_EPOCHS      = _exp['num_epochs']
LEARNING_RATE   = _exp['learning_rate']
TRAIN_SIZE      = _exp['train_size']
VAL_SIZE        = _exp['val_size']
DEVICE          = torch.device(f"cuda:{_exp['cuda_index']}")


RESOLUTION      = _data['resolution']
TRAIN_PATH      = Path(_data['path'])
__RANDOM_CHOICE = np.random.choice(1024, TRAIN_SIZE + VAL_SIZE, replace = False)
TRAIN_MASK      = __RANDOM_CHOICE[:TRAIN_SIZE]
VAL_MASK        = __RANDOM_CHOICE[-VAL_SIZE:]


DOWNSAMPLE      = _data['downsample']
GRID            = (RESOLUTION - 1) // DOWNSAMPLE + 1
grid            = torch.stack(
                        torch.meshgrid(
                            torch.linspace(0, 1, GRID),
                            torch.linspace(0, 1, GRID),
                            indexing = 'ij',
                        ),
                        dim = -1
                    ).requires_grad_()   # Shape: (GRID, GRID, dim_domain = 2)
grid            = grid.reshape(-1, 2).to(DEVICE)

## 2. Preprocess data

### 2-1. Instantiate the storages

In [4]:
train_data: dict[str, torch.Tensor] = {
    'coeff':        None,
    'Kcoeff':       None,
    'Kcoeff_x':     None,
    'Kcoeff_y':     None,
    'sol':          None,
}
val_data: dict[str, torch.Tensor] = {
    'coeff':        None,
    'Kcoeff':       None,
    'Kcoeff_x':     None,
    'Kcoeff_y':     None,
    'sol':          None,
}


normalizer: dict[str, GaussianNormalizer] = {
    'coeff':    None,
    'Kcoeff':   None,
    # FOR THE DERIVATIVES, THE NORMALIZER FOR THE MOLLIFIED COEFFICIENT FUNCTIONS IS USED
    'sol':      None,
}

### 2-2. Load the train data

In [5]:
# Train data
reader = npzReader(TRAIN_PATH)
for k in tqdm(train_data.keys(), desc = "Preprocessing the train data"):   
    # Step 1. Load data
    train_data[k] = torch.from_numpy(reader.get_field(k)[TRAIN_MASK, ::DOWNSAMPLE, ::DOWNSAMPLE])
    train_data[k] = train_data[k].type(torch.float)
    train_data[k] = train_data[k].reshape(TRAIN_SIZE, -1).to(DEVICE)
    
    # Step 2. Normalize data
    normalizer[k] = GaussianNormalizer(train_data[k])
    normalizer[k].to(DEVICE)
    if k not in ('Kcoeff_x', 'Kcoeff_y'):
        train_data[k] = normalizer[k].encode(train_data[k])


train_data['Kcoeff_grad'] = torch.stack([train_data['Kcoeff_x'], train_data['Kcoeff_y']], dim = -1)
train_data['Kcoeff_grad'] = normalizer['coeff'].encode(train_data['Kcoeff_grad'])
train_data.pop('Kcoeff_x')
train_data.pop('Kcoeff_y')
train_data['grid'] = grid.unsqueeze(0).repeat(len(train_data['coeff']), 1, 1)


# Validation data
for cnt, k in tqdm(enumerate(val_data.keys()), desc = "Preprocessing the validation data"):
    # Step 1. Load data
    val_data[k] = torch.from_numpy(reader.get_field(k)[VAL_MASK, ::DOWNSAMPLE, ::DOWNSAMPLE])
    val_data[k] = val_data[k].type(torch.float)
    val_data[k] = val_data[k].reshape(VAL_SIZE, -1).to(DEVICE)
    
    # Step 2. Normalize data (NOTE: Uses the normalizers for the train dataset)
    if k not in ('Kcoeff_x', 'Kcoeff_y'):
        val_data[k] = normalizer[k].encode(val_data[k])


val_data['Kcoeff_grad'] = torch.stack([val_data['Kcoeff_x'], val_data['Kcoeff_y']], dim = -1)
val_data['Kcoeff_grad'] = normalizer['coeff'].encode(val_data['Kcoeff_grad'])
val_data.pop('Kcoeff_x')
val_data.pop('Kcoeff_y')
val_data['grid'] = grid.unsqueeze(0).repeat(len(val_data['coeff']), 1, 1)

print(train_data.keys())
print(val_data.keys())

Preprocessing the train data:   0%|          | 0/5 [00:00<?, ?it/s]

Preprocessing the validation data: 0it [00:00, ?it/s]

dict_keys(['coeff', 'Kcoeff', 'sol', 'Kcoeff_grad', 'grid'])
dict_keys(['coeff', 'Kcoeff', 'sol', 'Kcoeff_grad', 'grid'])


### 2-3. Instantiate dataloaders

In [6]:
train_dataset = TensorDataset(
                    train_data['grid'], train_data['coeff'],
                    train_data['Kcoeff'], train_data['Kcoeff_grad'],
                    train_data['sol']
                )
val_dataset   = TensorDataset(
                    val_data['grid'], val_data['coeff'],
                    val_data['Kcoeff'], val_data['Kcoeff_grad'],
                    val_data['sol']
                )
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
val_loader   = torch.utils.data.DataLoader(  val_dataset,  batch_size = BATCH_SIZE, shuffle = True) 

## 3. Train the model

### 3-1. Initialize the model and instantiate the loss function and the optimizer

In [7]:
deeponet = DeepONet(**_deeponet).to(DEVICE)
print(f"The number of the parameters in the model\n>>> {count_parameters(deeponet)}")
print(deeponet)

for p in deeponet.parameters():
    if p.ndim == 1:
        torch.nn.init.zeros_(p)
    else:
        torch.nn.init.xavier_uniform_(p)

criterion = torch.nn.MSELoss(reduction = 'mean')
optimizer = torch.optim.Adam(params = deeponet.parameters(), lr = _exp['learning_rate'])

The number of the parameters in the model
>>> 1018881
DeepONet(
    unstructured,
    branch=MLP(layer=(961, 512, 512, 256), bias=True, activation=relu),
    trunk =MLP(layer=(2, 256, 256, 256), bias=True, activation=relu),
)


In [8]:
def compute_loss_gov(
        grid:       torch.Tensor,
        pred:       torch.Tensor,
        data:       torch.Tensor,
        data_grad:  torch.Tensor,
        verbose:    bool = False,
    ) -> torch.Tensor:
    """
    Note that the forcing term is set to be the unity.
    
    * `grid` is of shape `(B, num_nodes, dim_space)`
    * `pred` is of shape `(B, num_nodes, channel=1)`
    * `data` is of shape `(B, num_nodes, channel=1)`
    * `data_grad` is of shape `(B, num_nodes, dim_space)`
    """
    forcing_term    = 1
    pred_grad       = compute_grad(pred, grid)
    pred_laplacian  = torch.stack(
                            [
                                compute_grad(pred_grad[..., d], grid)[..., d]
                                for d in range(grid.shape[-1])
                            ], dim = -1
                        ).sum( dim = -1 )
    if verbose:
        print(f"grid.shape\n>>> {grid.shape}")
        print(f"pred.shape\n>>> {pred.shape}")
        print(f"pred_grad.shape\n>>> {pred_grad.shape}")
        print(f"data.shape\n>>> {data.shape}")
        print(f"data_grad.shape\n>>> {data_grad.shape}")
        print(f"pred_laplacian.shape\n>>> {pred_laplacian.shape}")
    
    assert data_grad.shape == pred_grad.shape, \
        f"{data_grad.shape} and {pred_grad.shape}"
    v1 = (data_grad * pred_grad).sum(dim = -1)
    
    assert data.shape == pred_laplacian.shape, \
        f"{data.shape} and {pred_laplacian.shape}"
    v2 = data * pred_laplacian
    
    assert v1.shape == v2.shape
    lhs = -(v1 + v2)
    return torch.pow(lhs - forcing_term, 2).mean()
    

def compute_loss_bc(
        pred:       torch.Tensor,
        target:     torch.Tensor,
        boundary:   torch.Tensor | slice,
    ) -> torch.Tensor:
    return

In [9]:
mygrid = torch.meshgrid(torch.linspace(0, 1, 11), torch.linspace(0, 1, 11), indexing = 'ij')
mygrid = torch.stack(mygrid, dim = -1)
mygrid.requires_grad_()


u = mygrid[..., 0] ** 2 + mygrid[..., 1] ** 3
d1u = compute_grad(u, mygrid)
d2u = torch.stack([compute_grad(d1u[..., d], mygrid)[..., d] for d in range(mygrid.size(-1))], dim = -1)
lapu = d2u.sum(-1)
print(lapu)

tensor([[2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.8000, 4.4000, 5.0000, 5.6000, 6.2000, 6.8000,
         7.4000, 8.0000],
        [2.0000, 2.6000, 3.2000, 3.80

### 3-2. Train the model

In [10]:
train_history = {
    'train_loss':   [],
    'train_error':  [],
    'val_loss':     [],
    'val_error':    [],
    'train_time':   0.0,
}
normalizer['sol'].to(DEVICE)

elapsed_time = time.time()
for epoch in tqdm(range(1, NUM_EPOCHS + 1)):
    # NOTE: Train
    deeponet.train()
    _train_time = time.time()
    train_epoch_loss:  torch.Tensor = 0
    train_epoch_error: torch.Tensor = 0
    
    grid: torch.Tensor; data: torch.Tensor; Kdata: torch.Tensor; Kdata_grad: torch.Tensor; target: torch.Tensor
    for grid, data, Kdata, Kdata_grad, target in train_loader:
        num_data = len(data)
        
        train_pred      = deeponet.forward((data, grid))
        train_loss_data = criterion.forward(train_pred, target)
        
        grid = grid.clone()
        grid.requires_grad_()
        _train_pred_gov = deeponet.forward((data, grid))
        # train_loss_gov  = 0
        train_loss_gov  = compute_loss_gov(grid, _train_pred_gov, Kdata, Kdata_grad)
        # assert False
        train_loss_bc   = 0
        train_loss      = train_loss_data + train_loss_gov + train_loss_bc
        
        optimizer.zero_grad()
        train_loss.backward()
        optimizer.step()
        
        train_epoch_loss = train_epoch_loss + train_loss * num_data
        train_pred  = normalizer['sol'].decode(train_pred)
        target      = normalizer['sol'].decode(target)
        train_epoch_error = train_epoch_error + (
            torch.linalg.norm(train_pred - target) / (1e-8 + torch.linalg.norm(target))
        ) * num_data
    _train_time = time.time() - _train_time
    train_history['train_time'] += _train_time
    train_epoch_loss    = train_epoch_loss / TRAIN_SIZE
    train_epoch_error   = train_epoch_error / TRAIN_SIZE
    train_history['train_loss'].append(train_epoch_loss.item())
    train_history['train_error'].append(train_epoch_error.item())
    
    
    # NOTE: Validation
    deeponet.eval()
    val_epoch_loss:     torch.Tensor = 0
    val_epoch_error:    torch.Tensor = 0
    with torch.no_grad():
        for grid, data, Kdata, Kdata_grad, target in val_loader:
            num_data = len(data)
            
            val_pred = deeponet.forward((data, grid))
            val_loss_data   = criterion.forward(val_pred, target)
            val_loss_gov    = 0     # compute_loss_gov(grid, val_pred, Kdata, Kdata_grad)
            val_loss_bc     = 0
            val_loss = val_loss_data + val_loss_gov + val_loss_bc
            
            val_epoch_loss  = val_epoch_loss + val_loss * num_data
            val_pred = normalizer['sol'].decode(val_pred)
            target   = normalizer['sol'].decode(target)
            val_epoch_error     = val_epoch_error + (
                                        torch.linalg.norm(val_pred - target) / (1e-8 + torch.linalg.norm(target))
                                    ) * num_data
    val_epoch_loss      = val_epoch_loss / VAL_SIZE
    val_epoch_error     = val_epoch_error / VAL_SIZE
    train_history['val_loss'].append(val_epoch_loss.item())
    train_history['val_error'].append(val_epoch_error.item())
    
    if epoch % 10 == 0 or epoch == 1:
        print(f"[ Epoch {epoch} / {NUM_EPOCHS} ]")
        for k in train_history.keys():
            if k == "train_time":
                continue
            print(f"* {k:15s}: {train_history[k][-1]:.4e}")
        print()
    
elapsed_time = time.time() - elapsed_time
print(f"Elapsed time: {int(elapsed_time)} seconds")

  0%|          | 0/300 [00:00<?, ?it/s]

[ Epoch 1 / 300 ]
* train_loss     : 1.6734e+01
* train_error    : 6.4041e-01
* val_loss       : 9.6595e-01
* val_error      : 5.9124e-01

[ Epoch 10 / 300 ]
* train_loss     : 1.6393e+00
* train_error    : 5.8958e-01
* val_loss       : 9.5616e-01
* val_error      : 5.8801e-01

[ Epoch 20 / 300 ]
* train_loss     : 1.5746e+00
* train_error    : 5.8920e-01
* val_loss       : 9.5347e-01
* val_error      : 5.8718e-01

[ Epoch 30 / 300 ]
* train_loss     : 1.5661e+00
* train_error    : 5.8892e-01
* val_loss       : 9.5322e-01
* val_error      : 5.8732e-01



KeyboardInterrupt: 

In [None]:
a = torch.arange(5, dtype = torch.float, requires_grad = True).reshape(-1, 1)
b = torch.cat([a, a ** 2]).sum()
# print(b)
compute_grad(b, a)

### 3-3. Save the model and the train history

In [None]:
deeponet.cpu()

# Save the model
os.makedirs(f"./{time_str}", exist_ok = True)
torch.save(deeponet.state_dict(), f"{time_str}/physics_informed_deeponet_darcy{RESOLUTION}_res{GRID}.pth")

# Save the normalizer, which will also be used in prediction
normalizer['sol'].cpu()
torch.save(normalizer, f"{time_str}/physics_informed_deeponet_darcy{RESOLUTION}_res{GRID}_normalizer.pth")

# Save the history
with open(f"{time_str}/physics_informed_deeponet_darcy{RESOLUTION}_res{GRID}.pickle", "wb") as f:
    pickle.dump(train_history, f)

## End of file