In [1]:
import torch
import torch.nn as nn

In [2]:
class DeepNet(nn.Module):
    def __init__(self, nlayers: int):
        super(DeepNet, self).__init__()
        layers = []
        for _ in range(nlayers):
            layers.append(nn.Linear(10, 10))
            layers.append(nn.Sigmoid())
        self.layers = nn.Sequential(*layers)

    def forward(self, x):
        return self.layers(x)

In [11]:
torch.manual_seed(42)  # set random seed for reproducibility

input_tensor = torch.randn(32, 10)
target = torch.randn(32, 10)

model = DeepNet(50)
criterion = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [13]:
n_epochs = 50
for epoch in range(n_epochs):
    # forward pass
    outputs = model(input_tensor)
    loss = criterion(outputs, target)

    # backward pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # print the average gradient magnitude for the first layer
    gradients = model.layers[0].weight.grad
    avg_gradient_magnitude = gradients.abs().mean().item()
    print(f"{epoch+1:2d}/{n_epochs:2d} | Loss: {loss.item()} | Average Gradient Magnitude: {avg_gradient_magnitude}")

    # # Stop if gradients are vanishing
    # if avg_gradient_magnitude < 1e-6:
    #     print("Gradients are vanishing!")
    #     break

 1/50 | Loss: 1.1513307094573975 | Average Gradient Magnitude: 2.802596928649634e-45
 2/50 | Loss: 1.15116286277771 | Average Gradient Magnitude: 2.802596928649634e-45
 3/50 | Loss: 1.1509954929351807 | Average Gradient Magnitude: 2.802596928649634e-45
 4/50 | Loss: 1.1508281230926514 | Average Gradient Magnitude: 2.802596928649634e-45
 5/50 | Loss: 1.150660753250122 | Average Gradient Magnitude: 2.802596928649634e-45
 6/50 | Loss: 1.1504935026168823 | Average Gradient Magnitude: 2.802596928649634e-45
 7/50 | Loss: 1.1503263711929321 | Average Gradient Magnitude: 2.802596928649634e-45
 8/50 | Loss: 1.150159478187561 | Average Gradient Magnitude: 2.802596928649634e-45
 9/50 | Loss: 1.14999258518219 | Average Gradient Magnitude: 2.802596928649634e-45
10/50 | Loss: 1.1498258113861084 | Average Gradient Magnitude: 2.802596928649634e-45
11/50 | Loss: 1.1496589183807373 | Average Gradient Magnitude: 2.802596928649634e-45
12/50 | Loss: 1.1494923830032349 | Average Gradient Magnitude: 2.802596