In [4]:
import torch
import torch.nn as nn
from collections import OrderedDict

# Define the network architectures
models = [
    nn.Sequential(OrderedDict([
        ('hidden', nn.Linear(2, 2)),
        ('relu', nn.ReLU()),
        ('output', nn.Linear(2, 2)),
        ('sigmoid', nn.Sigmoid())
    ])),
    nn.Sequential(OrderedDict([
        ('hidden1', nn.Linear(2, 4)),
        ('relu1', nn.ReLU()),
        ('hidden2', nn.Linear(4, 4)),
        ('relu2', nn.ReLU()),
        ('output', nn.Linear(4, 2)),
        ('sigmoid', nn.Sigmoid())
    ])),
    nn.Sequential(OrderedDict([
        ('hidden1', nn.Linear(2, 8)),
        ('relu1', nn.ReLU()),
        ('hidden2', nn.Linear(8, 8)),
        ('relu2', nn.ReLU()),
        ('hidden3', nn.Linear(8, 8)),
        ('relu3', nn.ReLU()),
        ('output', nn.Linear(8, 2)),
        ('sigmoid', nn.Sigmoid())
    ]))
]

# Define the loss function and optimizer
criterion = nn.BCELoss()
optimizers = [torch.optim.SGD(model.parameters(), lr=0.1) for model in models]

# Generate the training data
data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
data_target = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

# Train each model for 500 epochs
for i, model in enumerate(models):
    print(f"Training model {i+1}")
    for epoch in range(500):
        optimizers[i].zero_grad()
        output = model(data_in)
        loss = criterion(output, data_target)
        loss.backward()
        optimizers[i].step()

        if epoch % 100 == 0:
            print(f"Epoch {epoch}, loss={loss.item()}")

    # Evaluate the trained model
    with torch.no_grad():
        output = model(data_in)
        prediction = (output > 0.5).float()
        accuracy = (prediction == data_target).float().mean()
        print(f"Accuracy for model {i+1}: {accuracy.item()}")

    # Print the weights for each layer of the best model
    if i == 2:
        print(f"Weights for model {i+1}:")
        for name, param in model.named_parameters():
            print(f"{name}: {param.data}")

Training model 1
Epoch 0, loss=0.6795583963394165
Epoch 100, loss=0.6103525757789612
Epoch 200, loss=0.5882846713066101
Epoch 300, loss=0.5604674816131592
Epoch 400, loss=0.534103274345398
Accuracy for model 1: 0.75
Training model 2
Epoch 0, loss=0.6761232018470764
Epoch 100, loss=0.6289888024330139
Epoch 200, loss=0.6230872869491577
Epoch 300, loss=0.6118887066841125
Epoch 400, loss=0.5795202851295471
Accuracy for model 2: 0.875
Training model 3
Epoch 0, loss=0.6515295505523682
Epoch 100, loss=0.6217636466026306
Epoch 200, loss=0.5804995894432068
Epoch 300, loss=0.4641929864883423
Epoch 400, loss=0.26186180114746094
Accuracy for model 3: 0.875
Weights for model 3:
hidden1.weight: tensor([[-0.5175, -0.2207],
        [-0.1876,  0.7522],
        [ 0.8334,  0.1684],
        [ 1.1078,  1.0758],
        [-0.5161, -0.3950],
        [-0.4586, -0.8747],
        [-0.0274, -0.3290],
        [ 0.3292, -0.3390]])
hidden1.bias: tensor([ 0.8984,  0.3162, -0.0693,  0.1876,  0.0694,  1.3614, -0.0155, 