In [7]:
import torch
import torch.nn as nn
import torch.optim as optim

In [8]:
def elu(x, alpha=1.0):
    # Create an empty tensor to store the output with the same shape as x
    result = torch.empty_like(x)
    # Iterate over each element to apply the ELU activation
    for i in range(x.size(0)):
        if x[i] >= 0:
            result[i] = x[i]
        else:
            result[i] = alpha * (torch.exp(x[i]) - 1)
    return result

In [10]:
# Define the custom network using the ELU function
class TwoLayerELUNetwork(nn.Module):
    def __init__(self):
        super(TwoLayerELUNetwork, self).__init__()
        # Initialize weights for two layers
        self.w1 = nn.Parameter(torch.tensor([2.0]))  # First layer weight
        self.w2 = nn.Parameter(torch.tensor([3.0]))  # Second layer weight

    def forward(self, x):
        # First layer forward: f1 = w1 * x
        f1 = self.w1 * x
        # Apply ELU activation
        a1 = elu(f1)

        # Second layer forward: f2 = w2 * a1
        f2 = self.w2 * a1
        # Apply ELU activation
        a2 = elu(f2)

        return a1, a2

# Instantiate the model
model = TwoLayerELUNetwork()

# Input and expected output
x = torch.tensor([-1.0], requires_grad=True)
y_true = torch.tensor([-1.4255])

# Forward pass
a1, y_pred = model(x)

# Mean Squared Error loss
loss_fn = nn.MSELoss()
loss = loss_fn(y_pred, y_true)

# Backward pass to compute gradients
loss.backward()

# Output results
print(f"a1: {a1.item()}")
print(f"y_pred: {y_pred.item()}")
print(f"Loss (MSE): {loss.item()}")
print(f"Gradient with respect to w1: {model.w1.grad.item()}")
print(f"Gradient with respect to w2: {model.w2.grad.item()}")

a1: -0.8646647334098816
y_pred: -0.9252790212631226
Loss (MSE): 0.2502210736274719
Gradient with respect to w1: -0.030350569635629654
Gradient with respect to w2: -0.06463716924190521
