In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scipy
import torch
import torch.nn as nn

from scipy.interpolate import griddata
from sklearn.datasets import make_regression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.optim import Adam, LBFGS
from torch.optim.lr_scheduler import SequentialLR, LinearLR, StepLR
from torch.utils.data import DataLoader, TensorDataset
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

from actnet.ActNet import ActNet
from actnet.utils import adaptive_gradient_clipping, pinns_loss

## SOTA Comparisons

### 2D Poisson PDE

In [None]:
BATCH_SIZE = 128 # BATCH_SIZE = 5000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
N_EPOCHS_TRAIN = 30000
N_EPOCHS_FINE_TUNE = 100
W = 32

N_u = 100
N_f = 1000

In [None]:
data = pd.read_csv(f"./data/poisson/poisson_w{W}.csv")

x = torch.Tensor(data["x"])
y = torch.Tensor(data["y"])
u_exact = torch.Tensor(data["u"])

X_star = torch.hstack((x.flatten().reshape(-1, 1), y.flatten().reshape(-1, 1)))

boundary = torch.where(
                (X_star[:, 0] == -1) | (X_star[:, 0] == 1) | (X_star[:, 1] == -1) | (X_star[:, 1] == 1)
            )[0]
indices = np.random.choice(boundary, N_u, replace=False)
x_train = X_star[indices]
u_train = u_exact[indices]

indices = np.random.choice([i for i in range(len(X_star)) if i not in boundary], N_f, replace=False)
x_train_f = X_star[indices]

poisson_data = DataLoader(x_train_f, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
d_in = 2
d_out = 1
d = 256
m = 32
N = 8
L = 4

model_poisson = ActNet(d_in, d_out, d, m, N, L).to(DEVICE)

optimizer = Adam(model_poisson.parameters(), lr=1e-7)
scheduler = SequentialLR(optimizer, [LinearLR(optimizer, 1e-7, 5e-3, 1000), StepLR(optimizer, 1000, 0.75)], [1000])

In [None]:
writer = SummaryWriter()
model_poisson.train()
for epoch in tqdm(range(N_EPOCHS_TRAIN)):
    total_loss = 0
    for inputs_f in poisson_data:
        inputs, inputs_f, targets = x_train.to(DEVICE), inputs_f.to(DEVICE), u_train.to(DEVICE)
        inputs_f.requires_grad = True

        u_pred = model_poisson(inputs)
        outputs_f = model_poisson(inputs_f)

        u_pred = u_pred * (1 - inputs[:, 0]**2) * (1 - inputs[:, 1]**2)
        outputs_f = outputs_f * (1 - inputs_f[:, 0]**2) * (1 - inputs_f[:, 1]**2)

        u_grad = torch.autograd.grad(outputs_f, inputs_f, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
        u_xx = torch.autograd.grad(u_grad[:, 0], inputs_f, grad_outputs=torch.ones_like(u_grad[:, 0]), create_graph=True)[0][:, 0]
        u_yy = torch.autograd.grad(u_grad[:, 1], inputs_f, grad_outputs=torch.ones_like(u_grad[:, 1]), create_graph=True)[0][:, 1]
        
        f_pred = u_xx + u_yy - 2 * np.pi**2 * W**2 * torch.sin(np.pi * W * inputs_f[:, 0]) * torch.sin(np.pi * W * inputs_f[:, 1])

        loss = pinns_loss(u_pred, targets, f_pred)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        adaptive_gradient_clipping(model_poisson.parameters())
        optimizer.step()
    scheduler.step()
    writer.add_scalar("Loss/Adam/Poisson", total_loss / len(poisson_data), epoch)
writer.close()

In [None]:
def loss_closure():
    optimizer.zero_grad()

    inputs, inputs_f, targets = x_train.to(DEVICE), x_train_f.to(DEVICE), u_train.to(DEVICE)
    inputs_f.requires_grad = True

    u_pred = model_poisson(inputs)
    outputs_f = model_poisson(inputs_f)

    u_pred = u_pred * (1 - inputs[:, 0]**2) * (1 - inputs[:, 1]**2)
    outputs_f = outputs_f * (1 - inputs_f[:, 0]**2) * (1 - inputs_f[:, 1]**2)

    u_xy = torch.autograd.grad(outputs_f, inputs_f, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
    u_xx = torch.autograd.grad(u_xy[:, 0], inputs_f, grad_outputs=torch.ones_like(u_xy[:, 0]), create_graph=True)[0][:, 0]
    u_yy = torch.autograd.grad(u_xy[:, 1], inputs_f, grad_outputs=torch.ones_like(u_xy[:, 1]), create_graph=True)[0][:, 1]
    
    f_pred = u_xx + u_yy - 2 * np.pi**2 * W**2 * torch.sin(np.pi * W * inputs_f[:, 0]) * torch.sin(np.pi * W * inputs_f[:, 1])

    loss = pinns_loss(u_pred, targets, f_pred)
    writer.add_scalar("Loss/LBFGS/Poisson", loss.item(), epoch)

    loss.backward()
    return loss

optimizer = LBFGS(model_poisson.parameters(), lr=1e-3, max_iter=100)

writer = SummaryWriter()
for epoch in tqdm(range(N_EPOCHS_FINE_TUNE)):
    optimizer.step(loss_closure)
writer.close()

In [None]:
model_poisson.eval()
u_pred = model_poisson(X_star.to(DEVICE))

print("L2 error: ", ((u_pred - u_exact)**2).mean())
print("Relative L2 error: ", ((u_pred - u_exact)**2).mean() / (u_exact**2).mean())

### Allen-Cahn PDE

In [None]:
BATCH_SIZE = 256 # BATCH_SIZE = 10000
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
N_EPOCHS_TRAIN = 100000
N_EPOCHS_FINE_TUNE = 100
W = 32

N_u = 100
N_f = 1000

In [None]:
data = scipy.io.loadmat("data/allen_cahn.mat")
usol = torch.Tensor(data["uu"].flatten().reshape(-1, 1))

t_star = torch.Tensor(data["tt"][0])
x_star = torch.Tensor(data["x"][0])
TT, XX = torch.meshgrid(t_star, x_star)

X_star = torch.hstack((XX.flatten().reshape(-1, 1), TT.flatten().reshape(-1, 1)))

boundary = torch.where(
                (X_star[:, 0] == -1) | (X_star[:, 0] == 1) | (X_star[:, 1] == 0) | (X_star[:, 1] == 1)
            )[0]
indices = np.random.choice(boundary, N_u, replace=False)
x_train = X_star[indices]
u_train = usol[indices]

indices = np.random.choice([i for i in range(len(X_star)) if i not in boundary], N_f, replace=False)
x_train_f = X_star[indices]

allen_cahn_data = DataLoader(x_train_f, batch_size=BATCH_SIZE, shuffle=True)

In [None]:
d_in = 2
d_out = 1
d = 256
m = 80
N = 8
L = 4

model_allen_cahn = ActNet(d_in, d_out, d, m, N, L).to(DEVICE)

optimizer = Adam(model_allen_cahn.parameters(), lr=1e-7)
scheduler = SequentialLR(optimizer, [LinearLR(optimizer, 1e-7, 5e-3, 1000), StepLR(optimizer, 1000, 0.9)], [1000])

In [None]:
writer = SummaryWriter()
model_allen_cahn.train()
for epoch in tqdm(range(N_EPOCHS_TRAIN)):
    total_loss = 0
    for inputs_f in allen_cahn_data:
        inputs, inputs_f, targets = x_train.to(DEVICE), inputs_f.to(DEVICE), u_train.to(DEVICE)

        inputs_f.requires_grad = True

        u_pred = model_allen_cahn(inputs)
        outputs_f = model_allen_cahn(inputs_f)

        u_pred = (1 - inputs[:, 1]) * (inputs[:, 1]**2 * torch.cos(np.pi * inputs[:, 0])) + inputs[:, 1] * ((1 - inputs[:, 0]**2) * u_pred - 1)
        outputs_f = (1 - inputs_f[:, 1]) * (inputs_f[:, 1]**2 * torch.cos(np.pi * inputs_f[:, 0])) + inputs_f[:, 1] * ((1 - inputs_f[:, 0]**2) * outputs_f - 1)

        u_xt = torch.autograd.grad(outputs_f, inputs_f, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
        u_t = u_xt[:, 1]
        u_xx = torch.autograd.grad(u_xt[:, 0], inputs_f, grad_outputs=torch.ones_like(u_xt[:, 0]), create_graph=True)[0][:, 0]
        
        f_pred = u_t - 0.0001 * u_xx + 5 * (outputs_f**3 - outputs_f)

        loss = pinns_loss(u_pred, targets, f_pred)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        adaptive_gradient_clipping(model_allen_cahn.parameters())
        optimizer.step()
    scheduler.step()
    writer.add_scalar("Loss/Adam/Allen-Cahn", total_loss / len(allen_cahn_data), epoch)
writer.close()

In [None]:
def loss_closure():
    optimizer.zero_grad()

    inputs, inputs_f, targets = x_train.to(DEVICE), x_train_f.to(DEVICE), u_train.to(DEVICE)
    inputs_f.requires_grad = True

    u_pred = model_allen_cahn(inputs)
    outputs_f = model_allen_cahn(inputs_f)

    u_pred = (1 - inputs[:, 1]) * (inputs[:, 1]**2 * torch.cos(np.pi * inputs[:, 0])) + inputs[:, 1] * ((1 - inputs[:, 0]**2) * u_pred - 1)
    outputs_f = (1 - inputs_f[:, 1]) * (inputs_f[:, 1]**2 * torch.cos(np.pi * inputs_f[:, 0])) + inputs_f[:, 1] * ((1 - inputs_f[:, 0]**2) * outputs_f - 1)

    u_xt = torch.autograd.grad(outputs_f, inputs_f, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
    u_t = u_xt[:, 1]
    u_xx = torch.autograd.grad(u_xt[:, 0], inputs_f, grad_outputs=torch.ones_like(u_xt[:, 0]), create_graph=True)[0][:, 0]
    
    f_pred = u_t - 0.0001 * u_xx + 5 * (outputs_f**3 - outputs_f)

    loss = pinns_loss(u_pred, targets, f_pred)
    writer.add_scalar("Loss/LBFGS/Allen-Cahn", loss.item(), epoch)

    loss.backward()
    return loss

optimizer = LBFGS(model_allen_cahn.parameters(), lr=1e-3, max_iter=100)

writer = SummaryWriter()
for epoch in tqdm(range(N_EPOCHS_FINE_TUNE)):
    optimizer.step(loss_closure)
writer.close()

In [None]:
model_allen_cahn.eval()
u_pred = model_allen_cahn(X_star.to(DEVICE)).detach().cpu()

fig = plt.figure()
ax = fig.add_subplot(111)

ax.pcolor(TT, XX, u_pred, cmap="jet")

ax.set_xlabel("$t$", size=20)
ax.set_ylabel("$x$", size=20)
ax.set_title("Prediction", fontsize=20) 

fig.colorbar()

plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

ax.pcolor(TT, XX, usol, cmap="jet")

ax.set_xlabel("$t$", size=20)
ax.set_ylabel("$x$", size=20)
ax.set_title("Reference Solution", fontsize=20) 

fig.colorbar()

plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

ax.pcolor(TT, XX, (usol - u_pred).abs(), cmap="jet")

ax.set_xlabel("$t$", size=20)
ax.set_ylabel("$x$", size=20)
ax.set_title("Absolute Error", fontsize=20) 

fig.colorbar()

plt.show()

In [None]:
print("L2 Error: ", torch.mean((usol - u_pred)**2))
print("Relative L2 Error: ", torch.mean((usol - u_pred)**2) / torch.mean(usol**2))

### Burgers

In [None]:
BATCH_SIZE = 128
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
N_EPOCHS_TRAIN = 10000
N_EPOCHS_FINE_TUNE = 100
W = 32

N_u = 100
N_f = 1000

In [None]:
data = scipy.io.loadmat("./data/burgers_shock.mat")

t = torch.Tensor(data["t"].flatten())
x = torch.Tensor(data["x"].flatten())
Exact = torch.Tensor(data["usol"]).T

T, X = torch.meshgrid(t, x)
X_star = torch.hstack((X.flatten().reshape(-1, 1), T.flatten().reshape(-1, 1)))
u_star = Exact.flatten()


boundary = torch.where(
                (X_star[:, 1] == 0) | (X_star[:, 0] == x.min()) | (X_star[:, 0] == x.max())
            )[0]
indices = np.random.choice(boundary, N_u, replace=False)
x_train = X_star[indices, 0].reshape(-1, 1)
t_train = X_star[indices, 1].reshape(-1, 1)
u_train = u_star[indices].reshape(-1, 1)


indices = np.random.choice(X_star.shape[0], N_f, replace=False)
x_train_f = X_star[indices, 0].reshape(-1, 1)
t_train_f = X_star[indices, 1].reshape(-1, 1)


x_train_f = torch.vstack((x_train_f, x_train))
t_train_f = torch.vstack((t_train_f, t_train))


def prepare(*tensors):
    return (torch.Tensor(t).float().to(DEVICE) for t in tensors)

x_train, t_train, u_train = prepare(x_train, t_train, u_train)
x_train_f, t_train_f = prepare(x_train_f, t_train_f)

x_star, t_star = prepare(*X_star.split((1, 1), dim=1))

In [None]:
d_in = 2
d_out = 1
d = 256
m = 32
N = 8
L = 4

model_burgers = ActNet(d_in, d_out, d, m, N, L).to(DEVICE)

In [None]:
def loss_closure():
    optimizer.zero_grad()

    inputs_x, inputs_t, inputs_f_x, inputs_f_t, targets = x_train.to(DEVICE), t_train.to(DEVICE), x_train_f.to(DEVICE), t_train_f.to(DEVICE), u_train.to(DEVICE)

    inputs_f_x.requires_grad = True
    inputs_f_t.requires_grad = True

    u_pred = model_burgers(torch.cat([inputs_x, inputs_t], dim=1))
    outputs_f = model_burgers(torch.cat([inputs_f_x, inputs_f_t], dim=1))

    df_dt = torch.autograd.grad(outputs_f, inputs_f_t, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
    df_dx = torch.autograd.grad(outputs_f, inputs_f_x, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
    df_dx2 = torch.autograd.grad(df_dx, inputs_f_x, grad_outputs=torch.ones_like(df_dx), create_graph=True)[0]
    
    f_pred = df_dt + outputs_f * df_dx - (0.01 / np.pi) * df_dx2

    loss = pinns_loss(u_pred, targets, f_pred)
    writer.add_scalar("Loss/LBFGS/Burgers", loss.item(), epoch)

    loss.backward()
    return loss

optimizer = LBFGS(model_burgers.parameters(), lr=1e-3, max_iter=100)

writer = SummaryWriter()
model_burgers.train()
for epoch in tqdm(range(N_EPOCHS_FINE_TUNE)):
    optimizer.step(loss_closure)
writer.close()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

u_pred = model_burgers(torch.cat([x_star, t_star], dim=1)).detach().cpu()

U_pred = griddata(X_star, u_pred.flatten(), (X, T), method="cubic")

h = ax.imshow(
    U_pred.T,
    interpolation="nearest",
    cmap="jet",
    extent=[t.min(), t.max(), x.min(), x.max()],
    origin="lower",
    aspect="auto",
)

ax.set_xlabel("$t$", size=20)
ax.set_ylabel("$x$", size=20)
ax.legend(
    loc="upper center",
    bbox_to_anchor=(0.9, -0.05),
    ncol=5,
    frameon=False,
    prop={"size": 15},
)
ax.set_title("Prediction", fontsize=20)
ax.tick_params(labelsize=15)
cbar = fig.colorbar(h)

plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

h = ax.imshow(
    Exact.T,
    interpolation="nearest",
    cmap="jet",
    extent=[t.min(), t.max(), x.min(), x.max()],
    origin="lower",
    aspect="auto",
)

ax.set_xlabel("$t$", size=20)
ax.set_ylabel("$x$", size=20)
ax.legend(
    loc="upper center",
    bbox_to_anchor=(0.9, -0.05),
    ncol=5,
    frameon=False,
    prop={"size": 15},
)
ax.set_title("Reference Solution", fontsize=20)
ax.tick_params(labelsize=15)
cbar = fig.colorbar(h)

plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)

h = ax.imshow(
    np.abs((U_pred.T - Exact.T.numpy())),
    interpolation="nearest",
    cmap="jet",
    extent=[t.min(), t.max(), x.min(), x.max()],
    origin="lower",
    aspect="auto",
)

ax.set_xlabel("$t$", size=20)
ax.set_ylabel("$x$", size=20)
ax.legend(
    loc="upper center",
    bbox_to_anchor=(0.9, -0.05),
    ncol=5,
    frameon=False,
    prop={"size": 15},
)
ax.set_title("Absolute Error", fontsize=20)
ax.tick_params(labelsize=15)
cbar = fig.colorbar(h)

plt.show()

In [None]:
print("L2 Error: ", np.mean((U_pred.T - Exact.T.numpy()).flatten()**2))
print("Relative L2 Error: ", np.mean((U_pred.T - Exact.T.numpy()).flatten()**2) / np.mean(Exact.T.numpy().flatten()**2))

## Influence of the Hyperparameters

In [None]:
def test_model(d_in, d_out, d, m, N, L, batch_size, n_epochs_train, n_epochs_fine_tune, N_u, N_f, lambda_u, lambda_f):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    data = pd.read_csv(f"./data/poisson/poisson_w16.csv")

    x = torch.Tensor(data["x"])
    y = torch.Tensor(data["y"])
    u_exact = torch.Tensor(data["u"])

    X_star = torch.hstack((x.flatten().reshape(-1, 1), y.flatten().reshape(-1, 1)))

    boundary = torch.where(
                    (X_star[:, 0] == -1) | (X_star[:, 0] == 1) | (X_star[:, 1] == -1) | (X_star[:, 1] == 1)
                )[0]
    indices = np.random.choice(boundary, N_u, replace=False)
    x_train = X_star[indices]
    u_train = u_exact[indices]

    indices = np.random.choice([i for i in range(len(X_star)) if i not in boundary], N_f, replace=False)
    x_train_f = X_star[indices]

    poisson_data = DataLoader(x_train_f, batch_size=batch_size, shuffle=True)


    model = ActNet(d_in, d_out, d, m, N, L).to(device)
    optimizer = Adam(model.parameters(), lr=1e-7)
    scheduler = SequentialLR(optimizer, [LinearLR(optimizer, 1e-7, 5e-3, 1000), StepLR(optimizer, 1000, 0.75)], [1000])


    model.train()
    for epoch in tqdm(range(n_epochs_train)):
        total_loss = 0
        for inputs_f in poisson_data:
            inputs, inputs_f, targets = x_train.to(device), inputs_f.to(device), u_train.to(device)
            inputs_f.requires_grad = True

            u_pred = model(inputs)
            outputs_f = model(inputs_f)

            u_pred = u_pred * (1 - inputs[:, 0]**2) * (1 - inputs[:, 1]**2)
            outputs_f = outputs_f * (1 - inputs_f[:, 0]**2) * (1 - inputs_f[:, 1]**2)

            u_grad = torch.autograd.grad(outputs_f, inputs_f, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
            u_xx = torch.autograd.grad(u_grad[:, 0], inputs_f, grad_outputs=torch.ones_like(u_grad[:, 0]), create_graph=True)[0][:, 0]
            u_yy = torch.autograd.grad(u_grad[:, 1], inputs_f, grad_outputs=torch.ones_like(u_grad[:, 1]), create_graph=True)[0][:, 1]
            
            f_pred = u_xx + u_yy - 2 * np.pi**2 * W**2 * torch.sin(np.pi * W * inputs_f[:, 0]) * torch.sin(np.pi * W * inputs_f[:, 1])

            loss = pinns_loss(u_pred, targets, f_pred, lambda_u, lambda_f)
            total_loss += loss.item()

            optimizer.zero_grad()
            loss.backward()
            adaptive_gradient_clipping(model.parameters())
            optimizer.step()
        scheduler.step()


    def loss_closure():
        optimizer.zero_grad()

        inputs, inputs_f, targets = x_train.to(DEVICE), x_train_f.to(DEVICE), u_train.to(DEVICE)
        inputs_f.requires_grad = True

        u_pred = model(inputs)
        outputs_f = model(inputs_f)

        u_pred = u_pred * (1 - inputs[:, 0]**2) * (1 - inputs[:, 1]**2)
        outputs_f = outputs_f * (1 - inputs_f[:, 0]**2) * (1 - inputs_f[:, 1]**2)

        u_xy = torch.autograd.grad(outputs_f, inputs_f, grad_outputs=torch.ones_like(outputs_f), create_graph=True)[0]
        u_xx = torch.autograd.grad(u_xy[:, 0], inputs_f, grad_outputs=torch.ones_like(u_xy[:, 0]), create_graph=True)[0][:, 0]
        u_yy = torch.autograd.grad(u_xy[:, 1], inputs_f, grad_outputs=torch.ones_like(u_xy[:, 1]), create_graph=True)[0][:, 1]
        
        f_pred = u_xx + u_yy - 2 * np.pi**2 * W**2 * torch.sin(np.pi * W * inputs_f[:, 0]) * torch.sin(np.pi * W * inputs_f[:, 1])

        loss = pinns_loss(u_pred, targets, f_pred)
        writer.add_scalar("Loss/LBFGS/Poisson", loss.item(), epoch)

        loss.backward()
        return loss

    optimizer = LBFGS(model.parameters(), lr=1e-3, max_iter=100)

    writer = SummaryWriter()
    for epoch in tqdm(range(n_epochs_fine_tune)):
        optimizer.step(loss_closure)
    writer.close()

    model.eval()
    u_pred = model(X_star.to(device))
    return ((u_pred - u_exact)**2).mean() / (u_exact**2).mean()

## Loss Function Components

### Influence of the components

### Simple problem without physics information

In [None]:
train_losses = {
    "Perceptron": [],
    "MLP": [],
    "ActNet": []
}

test_losses = {
    "Perceptron": [],
    "MLP": [],
    "ActNet": []
}

In [None]:
X, y = make_regression(n_samples=5000, n_features=1, noise=0.2)

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)

X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.float32).view(-1, 1)

train_reg_lin_data = DataLoader(TensorDataset(X_train, y_train), batch_size=300, shuffle=True)
test_reg_lin_data = DataLoader(TensorDataset(X_test, y_test), batch_size=300, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

plt.scatter(X, y)
plt.xlabel("Feature")
plt.ylabel("Target")
plt.title("Simple Linear Regression Data")
plt.show()

#### Perceptron

In [None]:
perceptron = nn.Linear(1, 1).to(device)

optimizer = Adam(perceptron.parameters(), lr=1e-2)
criterion = nn.MSELoss()

In [None]:
for epoch in tqdm(range(250)):
    total_loss = 0
    for inputs, targets in train_reg_lin_data:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = perceptron(inputs)

        loss = criterion(outputs, targets)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_losses["Perceptron"].append(total_loss / len(train_reg_lin_data))

    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_reg_lin_data:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = perceptron(inputs)

            loss = criterion(outputs, targets)
            total_loss += loss.item()
        test_losses["Perceptron"].append(total_loss / len(test_reg_lin_data))

#### MLP

In [None]:
mlp = nn.Sequential(
    nn.Linear(1, 8),
    nn.Tanh(),
    nn.Linear(8, 8),
    nn.Tanh(),
    nn.Linear(8, 1)
).to(device)

optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-2)
criterion = nn.MSELoss()

In [None]:
for epoch in tqdm(range(250)):
    total_loss = 0
    for inputs, targets in train_reg_lin_data:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = mlp(inputs)

        loss = criterion(outputs, targets)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_losses["MLP"].append(total_loss / len(train_reg_lin_data))

    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_reg_lin_data:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = mlp(inputs)

            loss = criterion(outputs, targets)
            total_loss += loss.item()
        test_losses["MLP"].append(total_loss / len(test_reg_lin_data))

## ActNet

In [None]:
d_in = 1
d_out = 1
d = 128
m = 128
N = 8
L = 2

actnet = ActNet(d_in, d_out, d, m, N, L).to(device)
optimizer = Adam(actnet.parameters(), lr=1e-2)
criterion = nn.MSELoss()

In [None]:
actnet.train()
for epoch in tqdm(range(250)):
    total_loss = 0
    for inputs, targets in train_reg_lin_data:
        inputs, targets = inputs.to(device), targets.to(device)
        outputs = actnet(inputs)

        loss = criterion(outputs, targets)
        total_loss += loss.item()

        optimizer.zero_grad()
        loss.backward()
        adaptive_gradient_clipping(actnet.parameters(), clip_factor=1e-5)
        optimizer.step()
    train_losses["ActNet"].append(total_loss / len(train_reg_lin_data))

    with torch.no_grad():
        total_loss = 0
        for inputs, targets in test_reg_lin_data:
            inputs, targets = inputs.to(device), targets.to(device)
            outputs = actnet(inputs)

            loss = criterion(outputs, targets)
            total_loss += loss.item()
        test_losses["ActNet"].append(total_loss / len(test_reg_lin_data))

#### Loss Functions

In [None]:
model_names = list(train_losses.keys())
n_models = len(model_names)

plt.subplots(1, n_models, figsize=(6*n_models, 5), facecolor="#f5f5f5")
for i, model_name in enumerate(model_names):
    plt.subplot(1, n_models, i+1)
    plt.plot(train_losses[model_name], label="Apprentissage")
    plt.plot(test_losses[model_name], label="Test")
    plt.title(f"{model_name} Loss Values")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
plt.show()

In [None]:
for model_name in model_names:
    print(f"{model_name} Loss - Train: {train_losses[model_name][-1]} - Test: {test_losses[model_name][-1]}")