In [1]:
#ReLU Heuristics for Avoiding Bad Local Minima
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
  # Define the neural network model
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

        # He initialization
        nn.init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
        nn.init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')

    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out

In [3]:
# Define the input size, hidden layer size, and output size
input_size = 784  # Example for MNIST dataset (28x28 images)
hidden_size = 500
output_size = 10  # Number of classes for MNIST (digits 0-9)

In [4]:
# Create the neural network
model = NeuralNet(input_size, hidden_size, output_size)

In [5]:
# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [9]:
# Dummy training loop
num_epochs = 5
for epoch in range(num_epochs):
    # Generate random input and target data
    inputs = torch.randn(64, input_size)  # Batch size of 64
    targets = torch.randint(0, output_size, (64,))
    # Forward pass
    outputs = model(inputs)
    loss = criterion(outputs, targets)
    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/5], Loss: 2.9275
Epoch [2/5], Loss: 2.7781
Epoch [3/5], Loss: 2.8883
Epoch [4/5], Loss: 2.7750
Epoch [5/5], Loss: 2.7542


In [10]:
print("Training complete.")

Training complete.
