In [1]:
import time
import torch.nn as nn
from pathlib import Path
import torch.optim as optim
from neuralop.models import FNO
from accelerate import Accelerator
from generator import get_dataloader

## Initialization

In [2]:
accelerator = Accelerator()
device = accelerator.device

## Data

In [3]:
data_dir = Path("./data")
training_file = data_dir / "training_data.npz"
testing_file = data_dir / "testing_data.npz"

# prepare data generator for GPU device
get_dataloader = accelerator.prepare(get_dataloader)


# build training data
harmonic_ratio = 0.5
batch_size = 100
train_dataloader = get_dataloader(training_file, batch_size, balance_batches=True, harmonic_ratio=harmonic_ratio)


# build testing data
test_dataloader = get_dataloader(testing_file, batch_size=batch_size)

## Model

In [4]:
MAX_EIGENVALUES = 2

learning_rate = 1e-3
num_epochs = 1

In [5]:
# Initialize the model
fno_model = FNO(
    n_modes=(8, 8),
    hidden_channels=128,
    in_channels=1,
    out_channels=MAX_EIGENVALUES,
    lifting_channels=64,
    projection_channels=64,
    n_layers=64
)

In [6]:
# Create optimizer and loss function
optimizer = optim.Adam(fno_model.parameters(), lr=learning_rate)
loss_fn = nn.MSELoss()


fno_model, optimizer, loss_fn = accelerator.prepare(fno_model, optimizer, loss_fn)

In [7]:
for i,j,k in train_dataloader:
    print(i.unsqueeze(1).shape)
    break

torch.Size([100, 1, 64, 64])


## Training

In [8]:
# batch_potentials, batch_eigenvalues, batch_labels = next(train_dataloader)
# test_potentials, test_eigenvalues, test_labels = next(test_dataloader)

In [9]:
start = time.time()
# Training Loop
losses = []
for epoch in range(num_epochs):
    for batch_potentials, batch_eigenvalues, batch_labels in train_dataloader:
        print(batch_potentials.shape)
        print(batch_eigenvalues.shape)
        print(batch_labels.shape)
        
        # Set the model to training mode
        fno_model.train()

        predicted_eigen_vals = fno_model(batch_potentials.unsqueeze(1).to(device))
        break

    # # Extract the eigenvalues by averaging over the spatial dimensions
    # predicted_eigen_vals = predicted_eigen_vals.mean(dim=[2, 3])  # Shape: [500, 25]

    # # Compute the loss
    # # train_eigen_vals = torch.tensor(train_eigen_vals, dtype=torch.float32).to(device)
    # loss = loss_fn(predicted_eigen_vals, train_eigen_vals)
    # # compute relative error
    # loss = loss -  torch.mean(train_eigen_vals)
    # losses.append(loss)

    # # Backward pass and optimization
    # optimizer.zero_grad()  # Clear previous gradients
    # loss.backward()        # Compute gradients
    # optimizer.step()       # Update model parameters

    # # Print loss every 10 epochs
    # if (epoch + 1) % 10 == 0:
    #     print(f'Epoch [{epoch + 1}/{num_epochs}], Loss: {loss.item():.4f}')

end = time.time()
print(f'Time taken: {end - start}')

torch.Size([100, 64, 64])
torch.Size([100, 2])
torch.Size([100])


RuntimeError: MPS backend out of memory (MPS allocated: 17.20 GB, other allocations: 838.66 MB, max allowed: 18.13 GB). Tried to allocate 206.25 MB on private pool. Use PYTORCH_MPS_HIGH_WATERMARK_RATIO=0.0 to disable upper limit for memory allocations (may cause system failure).