In [10]:
import torch
import torch.nn as nn
from collections import OrderedDict
import torch.optim as optim

In [11]:
model1 = nn.Sequential(OrderedDict([
    ('hidden', nn.Sequential(nn.Linear(2, 2), nn.Sigmoid())),
    ('output', nn.Sequential(nn.Linear(2, 1), nn.Sigmoid()))]))

model2 = nn.Sequential(OrderedDict([
    ('hidden', nn.Sequential(nn.Linear(2, 4), nn.Sigmoid())),
    ('output', nn.Sequential(nn.Linear(4, 1), nn.Sigmoid()))]))

model3 = nn.Sequential(OrderedDict([
    ('hidden', nn.Sequential(nn.Linear(2, 8), nn.Sigmoid())),
    ('output', nn.Sequential(nn.Linear(8, 1), nn.Sigmoid()))]))

In [12]:
print(model1)
print(model2)
print(model3)

Sequential(
  (hidden): Sequential(
    (0): Linear(in_features=2, out_features=2, bias=True)
    (1): Sigmoid()
  )
  (output): Sequential(
    (0): Linear(in_features=2, out_features=1, bias=True)
    (1): Sigmoid()
  )
)
Sequential(
  (hidden): Sequential(
    (0): Linear(in_features=2, out_features=4, bias=True)
    (1): Sigmoid()
  )
  (output): Sequential(
    (0): Linear(in_features=4, out_features=1, bias=True)
    (1): Sigmoid()
  )
)
Sequential(
  (hidden): Sequential(
    (0): Linear(in_features=2, out_features=8, bias=True)
    (1): Sigmoid()
  )
  (output): Sequential(
    (0): Linear(in_features=8, out_features=1, bias=True)
    (1): Sigmoid()
  )
)


In [13]:
data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
print(data_in)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])


In [19]:
data_target = torch.tensor([[0], [1], [1], [1]], dtype=torch.float32)
print(data_target)

tensor([[0.],
        [1.],
        [1.],
        [1.]])


In [20]:
criterion = nn.BCELoss() 
optimizer = optim.SGD(model1.parameters(), lr=0.1)

In [21]:
# Train the model
import time
def train_model(model, criterion, optimizer, X, Y, epochs=1000):
    start_time = time.time()
    for epoch in range(epochs):
        optimizer.zero_grad()
        outputs = model(X)
        loss = criterion(outputs, Y)
        loss.backward()
        optimizer.step()
        if (epoch+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

    end_time = time.time()  
    training_time = end_time - start_time 
    return training_time

models = [model1, model2, model3]
for i, model in enumerate(models, 1):
    print(f"Evaluating Model {i}:")
    training_time = train_model(model, criterion, optimizer, data_in, data_target)

    with torch.no_grad():
        predicted = model(data_in).round().squeeze()
        accuracy = (predicted == data_target.squeeze()).sum().item() / len(data_target)
    
    print(f"Model {i} Training Time: {training_time:.4f} seconds")
    print(f"Model {i} Accuracy: {accuracy:.4f}\n")

Evaluating Model 1:
Epoch [100/1000], Loss: 0.5391
Epoch [200/1000], Loss: 0.5157
Epoch [300/1000], Loss: 0.4807
Epoch [400/1000], Loss: 0.4324
Epoch [500/1000], Loss: 0.3732
Epoch [600/1000], Loss: 0.3097
Epoch [700/1000], Loss: 0.2494
Epoch [800/1000], Loss: 0.1979
Epoch [900/1000], Loss: 0.1571
Epoch [1000/1000], Loss: 0.1261
Model 1 Training Time: 1.7757 seconds
Model 1 Accuracy: 1.0000

Evaluating Model 2:
Epoch [100/1000], Loss: 0.6918
Epoch [200/1000], Loss: 0.6918
Epoch [300/1000], Loss: 0.6918
Epoch [400/1000], Loss: 0.6918
Epoch [500/1000], Loss: 0.6918
Epoch [600/1000], Loss: 0.6918
Epoch [700/1000], Loss: 0.6918
Epoch [800/1000], Loss: 0.6918
Epoch [900/1000], Loss: 0.6918
Epoch [1000/1000], Loss: 0.6918
Model 2 Training Time: 1.4680 seconds
Model 2 Accuracy: 0.7500

Evaluating Model 3:
Epoch [100/1000], Loss: 0.7106
Epoch [200/1000], Loss: 0.7106
Epoch [300/1000], Loss: 0.7106
Epoch [400/1000], Loss: 0.7106
Epoch [500/1000], Loss: 0.7106
Epoch [600/1000], Loss: 0.7106
Epoc

In [22]:
# print model wights
import numpy as np
models = [model1, model2, model3]
best_model = models[np.argmin([criterion(model(data_in), data_target).item() for model in models])]
for name, param in best_model.named_parameters():
    if param.requires_grad:
        print(f'Layer: {name} | Size: {param.size()}')
        print(param.data)

Layer: hidden.0.weight | Size: torch.Size([2, 2])
tensor([[-1.2698, -1.3064],
        [ 2.8888,  2.7685]])
Layer: hidden.0.bias | Size: torch.Size([2])
tensor([ 0.3376, -1.4623])
Layer: output.0.weight | Size: torch.Size([1, 2])
tensor([[-2.3113,  4.4462]])
Layer: output.0.bias | Size: torch.Size([1])
tensor([-0.6172])
