All tests for part (d), 

In [3]:
def get_activation(name):
    if isinstance(name, nn.Module):
        return name
    name = name.lower()
    if name == "tanh":
        return nn.Tanh()
    if name == "relu":
        return nn.ReLU()
    if name == "sigmoid":
        return nn.Sigmoid()
    if name == "gelu":
        return nn.GELU()
    if name == "elu":
        return nn.ELU
    if name == "leaky":
        return nn.LeakyReLU
    raise ValueError(f"Unknown activation: {name}")

class PINN(nn.Module):
    """
    PINN for u_t = u_xx on x in [0,1], t in [0,T],
    with u(x,0) = sin(pi x), u(0,t) = u(1,t) = 0.

    Trial solution:
        g(x,t) = (1 - t) sin(pi x) + x (1 - x) t N_theta(x,t)
    """

    def __init__(self, layers, activation="tanh", device=None):
        super().__init__()

        if device is None:
            device = "cuda" if torch.cuda.is_available() else "cpu"
        self.device = torch.device(device)

        act = get_activation(activation)

        modules = []
        for in_dim, out_dim in zip(layers[:-2], layers[1:-1]):
            modules.append(nn.Linear(in_dim, out_dim))
            modules.append(act)
        modules.append(nn.Linear(layers[-2], layers[-1]))
        self.net = nn.Sequential(*modules)

        self.to(self.device)

    # Raw network N_theta(x,t)
    def network(self, x, t):
        inp = torch.cat([x, t], dim=1)  # (N,2)
        return self.net(inp)            # (N,1)

    # Initial condition u(x,0) = sin(pi x)
    def u0(self, x):
        return torch.sin(torch.pi * x)

    # Trial solution g(x,t)
    def trial_solution(self, x, t):
        N = self.network(x, t)
        return (1.0 - t) * self.u0(x) + x * (1.0 - x) * t * N

    # Right-hand side f(x,t) = 0 here
    def f(self, x, t):
        return torch.zeros_like(x)

    # PDE residual r = g_t - g_xx - f
    def residual(self, x, t):
        x = x.clone().detach().to(self.device).requires_grad_(True)
        t = t.clone().detach().to(self.device).requires_grad_(True)

        g = self.trial_solution(x, t)

        g_t = torch.autograd.grad(
            g, t,
            grad_outputs=torch.ones_like(g),
            create_graph=True,
            retain_graph=True,
        )[0]

        g_x = torch.autograd.grad(
            g, x,
            grad_outputs=torch.ones_like(g),
            create_graph=True,
            retain_graph=True,
        )[0]

        g_xx = torch.autograd.grad(
            g_x, x,
            grad_outputs=torch.ones_like(g_x),
            create_graph=True,
            retain_graph=True,
        )[0]

        r = g_t - g_xx - self.f(x, t)
        return r

    def loss(self, x_coll, t_coll):
        r = self.residual(x_coll, t_coll)
        return torch.mean(r**2)

    def train_pinn(self, x_coll, t_coll,
                   epochs=5000, lr=1e-3,
                   optimizer_cls=optim.Adam,
                   verbose_every=500):
        x_coll = x_coll.reshape(-1, 1).to(self.device)
        t_coll = t_coll.reshape(-1, 1).to(self.device)

        optimizer = optimizer_cls(self.parameters(), lr=lr)

        for epoch in range(epochs):
            optimizer.zero_grad()
            loss = self.loss(x_coll, t_coll)
            loss.backward()
            optimizer.step()

            if verbose_every and epoch % verbose_every == 0:
                print(f"Epoch {epoch:5d} | Loss = {loss.item():.3e}")

        return self

NameError: name 'nn' is not defined

In [4]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt


# ---------- Helper: activation chooser ----------

def get_activation(name):
    """
    Return a PyTorch activation module given a name or module.

    Parameters
    ----------
    name : str or nn.Module
        Name of the activation ("tanh", "relu", "sigmoid", "gelu") or a
        pre-instantiated torch.nn.Module.

    Returns
    -------
    nn.Module
        Activation function module.
    """
    if isinstance(name, nn.Module):
        return name
    name = name.lower()
    if name == "elu":
        return nn.ELU()
    if name == "tanh":
        return nn.Tanh()
    if name == "relu":
        return nn.ReLU()
    if name == "sigmoid":
        return nn.Sigmoid()
    if name == "gelu":
        return nn.GELU()
    raise ValueError(f"Unknown activation: {name}")


# ---------- PINN class (CPU-only, Jensen-style trial solution) ----------

class PINN(nn.Module):
    """
    PINN for the 1D heat equation

        u_t = u_xx,  x in [0,1],  t in [0,T],

    with u(x,0) = sin(pi x), u(0,t) = u(1,t) = 0.

    Trial solution:
        g(x,t) = (1 - t) sin(pi x) + x (1 - x) t N_θ(x,t),
    where N_θ is a fully-connected neural network.
    """

    def __init__(self, layers, activation="tanh"):
        """
        Initialize the PINN model.

        Parameters
        ----------
        layers : list[int]
            Layer sizes, e.g. [2, 20, 20, 1].
        activation : str or nn.Module, default "tanh"
            Activation function for hidden layers.
        """
        super().__init__()

        act = get_activation(activation)

        modules = []
        for in_dim, out_dim in zip(layers[:-2], layers[1:-1]):
            modules.append(nn.Linear(in_dim, out_dim))
            modules.append(act)
        modules.append(nn.Linear(layers[-2], layers[-1]))
        self.net = nn.Sequential(*modules)

    def network(self, x, t):
        """
        Raw network N_θ(x,t).

        Parameters
        ----------
        x, t : torch.Tensor of shape (N,1)

        Returns
        -------
        torch.Tensor of shape (N,1)
        """
        inp = torch.cat([x, t], dim=1)
        return self.net(inp)

    def u0(self, x):
        """
        Initial condition u(x,0) = sin(pi x).
        """
        return torch.sin(torch.pi * x)

    def trial_solution(self, x, t):
        """
        Trial solution g(x,t) that satisfies IC and BC.

        g(x,t) = (1 - t) u0(x) + x (1 - x) t N_θ(x,t)
        """
        N = self.network(x, t)
        return (1.0 - t) * self.u0(x) + x * (1.0 - x) * t * N

    def f(self, x, t):
        """
        Right-hand side f(x,t) in u_t = u_xx + f. Here f = 0.
        """
        return torch.zeros_like(x)

    def residual(self, x, t):
        """
        PDE residual r(x,t) = g_t - g_xx - f at collocation points.

        Parameters
        ----------
        x, t : torch.Tensor of shape (N,1)

        Returns
        -------
        torch.Tensor of shape (N,1)
        """
        x = x.clone().detach().requires_grad_(True)
        t = t.clone().detach().requires_grad_(True)

        g = self.trial_solution(x, t)

        g_t = torch.autograd.grad(
            g, t,
            grad_outputs=torch.ones_like(g),
            create_graph=True,
            retain_graph=True,
        )[0]

        g_x = torch.autograd.grad(
            g, x,
            grad_outputs=torch.ones_like(g),
            create_graph=True,
            retain_graph=True,
        )[0]

        g_xx = torch.autograd.grad(
            g_x, x,
            grad_outputs=torch.ones_like(g_x),
            create_graph=True,
            retain_graph=True,
        )[0]

        r = g_t - g_xx - self.f(x, t)
        return r

    def loss(self, x_coll, t_coll):
        """
        Mean squared PDE residual over collocation points.
        """
        r = self.residual(x_coll, t_coll)
        return torch.mean(r**2)

    def train_pinn(self, x_coll, t_coll,
                   epochs=2000, lr=1e-3,
                   optimizer_cls=optim.Adam,
                   verbose_every=0):
        """
        Train the PINN on given collocation points.

        Parameters
        ----------
        x_coll, t_coll : torch.Tensor of shape (N,) or (N,1)
        epochs : int, default 2000
        lr : float, default 1e-3
        optimizer_cls : optimizer class, default Adam
        verbose_every : int, default 0
            If > 0, print loss every verbose_every epochs.
        """
        x_coll = x_coll.reshape(-1, 1)
        t_coll = t_coll.reshape(-1, 1)

        optimizer = optimizer_cls(self.parameters(), lr=lr)

        for epoch in range(epochs):
            optimizer.zero_grad()
            loss = self.loss(x_coll, t_coll)
            loss.backward()
            optimizer.step()

            if verbose_every and epoch % verbose_every == 0:
                print(f"Epoch {epoch:5d} | Loss = {loss.item():.3e}")

        return self


# ---------- Collocation + analytical solution ----------

def make_collocation(Nx=40, Nt=40, T=0.3):
    """
    Uniform collocation points on [0,1] × [0,T].
    """
    x = torch.linspace(0.0, 1.0, Nx)
    t = torch.linspace(0.0, T, Nt)
    X, Tm = torch.meshgrid(x, t, indexing="ij")
    x_coll = X.reshape(-1, 1)
    t_coll = Tm.reshape(-1, 1)
    return x_coll, t_coll


def analytical_u(x, t_scalar):
    """
    Analytical solution for u_t = u_xx with u(x,0) = sin(pi x):

        u(x,t) = exp(-pi^2 t) sin(pi x).
    """
    if isinstance(x, torch.Tensor):
        x_np = x.detach().cpu().numpy().flatten()
    else:
        x_np = np.asarray(x).flatten()
    return np.exp(-np.pi**2 * t_scalar) * np.sin(np.pi * x_np)


# ---------- Sweep over activations / depths / widths ----------

def run_architecture_sweep():
    """
    Train many PINNs with different (activation, depth, width) and
    plot only the resulting MSE values in a bar chart.
    """
    # where to evaluate MSE
    t_eval = 0.1

    # collocation grid
    x_coll, t_coll = make_collocation(Nx=40, Nt=40, T=0.3)

    # test grid at fixed time
    x_test = torch.linspace(0, 1, 200).reshape(-1, 1)

    # architecture / activation choices
    activations = ["tanh", "relu", "sigmoid", "gelu"]
    depths = [1, 2, 3]       # number of hidden layers
    widths = [10, 20, 50]    # neurons per hidden layer

    results = []  # list of dicts: { "label", "activation", "depth", "width", "mse" }

    for act in activations:
        for depth in depths:
            for width in widths:
                layers = [2] + [width] * depth + [1]
                label = f"{act}-L{depth}-W{width}"
                print(f"\n=== Training {label} ===")

                model = PINN(layers=layers, activation=act)
                model.train_pinn(x_coll, t_coll,
                                 epochs=2000, lr=1e-3,
                                 optimizer_cls=optim.Adam,
                                 verbose_every=0)

                with torch.no_grad():
                    t_test = torch.full_like(x_test, t_eval)
                    u_pinn = model.trial_solution(x_test, t_test).cpu().numpy().flatten()

                u_exact = analytical_u(x_test, t_eval)
                mse = np.mean((u_pinn - u_exact)**2)
                print(f"{label}: MSE = {mse:.3e}")

                results.append({
                    "label": label,
                    "activation": act,
                    "depth": depth,
                    "width": width,
                    "mse": mse
                })

    # ---------- Plot MSE only ----------

    # Make a separate bar plot per activation for clarity
    for act in activations:
        sub = [r for r in results if r["activation"] == act]
        if not sub:
            continue

        labels = [f"L{r['depth']}-W{r['width']}" for r in sub]
        mses = [r["mse"] for r in sub]

        x_pos = np.arange(len(sub))

        plt.figure(figsize=(8, 4))
        plt.bar(x_pos, mses)
        plt.xticks(x_pos, labels, rotation=45, ha="right")
        plt.ylabel("MSE at t = {:.2f}".format(t_eval))
        plt.title(f"PINN MSE for activation = {act}")
        plt.tight_layout()
        plt.show()


if __name__ == "__main__":
    run_architecture_sweep()



=== Training tanh-L1-W10 ===
tanh-L1-W10: MSE = 4.331e-02

=== Training tanh-L1-W20 ===
tanh-L1-W20: MSE = 3.117e-02

=== Training tanh-L1-W50 ===
tanh-L1-W50: MSE = 4.871e-03

=== Training tanh-L2-W10 ===
tanh-L2-W10: MSE = 3.987e-02

=== Training tanh-L2-W20 ===
tanh-L2-W20: MSE = 3.766e-02

=== Training tanh-L2-W50 ===
tanh-L2-W50: MSE = 1.827e-05

=== Training tanh-L3-W10 ===
tanh-L3-W10: MSE = 3.815e-02

=== Training tanh-L3-W20 ===
tanh-L3-W20: MSE = 1.899e-03

=== Training tanh-L3-W50 ===


KeyboardInterrupt: 

In [7]:
import torch
import time

# analytical solution
def analytical_diffusion(x, t):
    return torch.exp(-torch.pi**2 * t) * torch.sin(torch.pi * x)

def get_activation(name):
    """
    Return a PyTorch activation module given a name or module.

    Parameters
    ----------
    name : str or nn.Module
        Name of the activation ("tanh", "relu", "sigmoid", "gelu") or a
        pre-instantiated torch.nn.Module.

    Returns
    -------
    nn.Module
        Activation function module.
    """
    if isinstance(name, nn.Module):
        return name
    name = name.lower()
    if name == "leaky":
        return nn.LeakyReLU()
    if name == "elu":
        return nn.ELU()
    if name == "tanh":
        return nn.Tanh()
    if name == "relu":
        return nn.ReLU()
    if name == "sigmoid":
        return nn.Sigmoid()
    if name == "gelu":
        return nn.GELU()
    raise ValueError(f"Unknown activation: {name}")

# test / collocation points
N_test = 100
x_test = torch.rand(N_test, 1)
t_test = torch.rand(N_test, 1)
u_exact = analytical_diffusion(x_test, t_test)

learning_rates = [1e-1, 1e-2, 1e-3, 1e-4]
epoch_list     = [1000, 2000, 5000]
activations    = ["elu", "tanh", "relu", "gelu", "sigmoid", "leaky"]
node_sizes     = [10, 20, 50,100,200]
hidden_layers  = [1, 2, 3,4,5]


for lr in learning_rates:
    print(f"\n=== Learning rate: {lr} ===")

    for epochs in epoch_list:
        print(f"  Epochs: {epochs}")

        for act in activations:
            for nodes in node_sizes:
                for layers in hidden_layers:

                    layer_config = [2] + [nodes] * layers + [1]

                    model = PINN(
                        layers=layer_config,
                        activation=act,
                        
                    )

                    start_time = time.time()

                    model.train_pinn(
                        x_coll=x_test,
                        t_coll=t_test,
                        epochs=epochs,
                        lr=lr,
                        verbose_every=None
                    )

                    with torch.no_grad():
                        u_pred = model.trial_solution(x_test, t_test)
                        mse = torch.mean((u_pred - u_exact)**2).item()

                    elapsed = time.time() - start_time

                    print(
                        f"MSE: {mse:.3e} | "
                        f"LR={lr}, epochs={epochs}, "
                        f"layers={layers}, nodes={nodes}, act={act} | "
                        f"time={elapsed:.2f}s"
                    )

                    model.train_pinn(
                        x_coll=x_test,
                        t_coll=t_test,
                        epochs=epochs,
                        lr=lr,
                        verbose_every=None
                    )

                    with torch.no_grad():
                        u_pred = model.trial_solution(x_test, t_test)
                        mse = torch.mean((u_pred - u_exact)**2).item()

                    elapsed = time.time() - start_time

                    print(
                        f"MSE: {mse:.3e} | "
                        f"LR={lr}, epochs={epochs}, "
                        f"layers={layers}, nodes={nodes}, act={act} | "
                        f"time={elapsed:.2f}s"
                    )



=== Learning rate: 0.1 ===
  Epochs: 1000
MSE: 1.001e-04 | LR=0.1, epochs=1000, layers=1, nodes=10, act=elu | time=0.55s
MSE: 9.565e-05 | LR=0.1, epochs=1000, layers=1, nodes=10, act=elu | time=1.13s
MSE: 4.420e-05 | LR=0.1, epochs=1000, layers=2, nodes=10, act=elu | time=0.75s
MSE: 1.099e-05 | LR=0.1, epochs=1000, layers=2, nodes=10, act=elu | time=1.39s
MSE: 1.025e-05 | LR=0.1, epochs=1000, layers=3, nodes=10, act=elu | time=0.81s
MSE: 3.662e-05 | LR=0.1, epochs=1000, layers=3, nodes=10, act=elu | time=1.65s
MSE: 4.468e-05 | LR=0.1, epochs=1000, layers=4, nodes=10, act=elu | time=0.98s
MSE: 4.314e-02 | LR=0.1, epochs=1000, layers=4, nodes=10, act=elu | time=2.00s
MSE: 2.094e-04 | LR=0.1, epochs=1000, layers=5, nodes=10, act=elu | time=1.20s
MSE: 4.314e-02 | LR=0.1, epochs=1000, layers=5, nodes=10, act=elu | time=2.41s
MSE: 1.481e-04 | LR=0.1, epochs=1000, layers=1, nodes=20, act=elu | time=0.61s
MSE: 4.619e-05 | LR=0.1, epochs=1000, layers=1, nodes=20, act=elu | time=1.21s
MSE: 1.43