In [1]:
import torch
import torch.nn as nn
from collections import OrderedDict

In [2]:
model1 = nn.Sequential(OrderedDict([
    ('hidden_net', nn.Linear(2,32)),
    ('hidden_act', nn.Sigmoid()),
    ('output_net', nn.Linear(32,2)),
    ('output_act', nn.Sigmoid())
]))
model2 = nn.Sequential(OrderedDict([
    ('hidden_net', nn.Linear(2,8)),
    ('hidden_act', nn.ReLU()),
    ('output_net', nn.Linear(8,2)),
    ('output_act', nn.Sigmoid())
]))
model3 = nn.Sequential(OrderedDict([
    ('hidden_net', nn.Linear(2,16)),
    ('hidden_act', nn.Sigmoid()),
    ('output_net', nn.Linear(16,2)),
    ('output_act', nn.Sigmoid())
]))

In [3]:
print(model1)
print(model2)
print(model3)

Sequential(
  (hidden_net): Linear(in_features=2, out_features=32, bias=True)
  (hidden_act): Sigmoid()
  (output_net): Linear(in_features=32, out_features=2, bias=True)
  (output_act): Sigmoid()
)
Sequential(
  (hidden_net): Linear(in_features=2, out_features=8, bias=True)
  (hidden_act): ReLU()
  (output_net): Linear(in_features=8, out_features=2, bias=True)
  (output_act): Sigmoid()
)
Sequential(
  (hidden_net): Linear(in_features=2, out_features=16, bias=True)
  (hidden_act): Sigmoid()
  (output_net): Linear(in_features=16, out_features=2, bias=True)
  (output_act): Sigmoid()
)


In [4]:
data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float)
print(data_in)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])


In [5]:
data_target = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float)
print(data_target)

tensor([[0., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.]])


In [6]:
criterion1 = nn.MSELoss()
optimizer1 = torch.optim.Adam(model1.parameters(), lr=0.01)
criterion2 = nn.CrossEntropyLoss()
optimizer2 = torch.optim.SGD(model2.parameters(), lr=0.01, momentum=0.9)
criterion3 = nn.L1Loss()
optimizer3 = torch.optim.SGD(model3.parameters(), lr=0.01, momentum=0.9)

In [7]:
def train(model, inputs, outputs, criterion, optimizer):
    for epoch in range(100):
        optimizer.zero_grad()
        loss = criterion(model(inputs), outputs)
        loss.backward()
        optimizer.step()

In [8]:
for model in [model1, model2, model3]:
    if model == model1:
        criterion = criterion1
        optimizer = optimizer1
    if model == model2:
        criterion = criterion2
        optimizer = optimizer2
    if model == model3:
        criterion = criterion3
        optimizer = optimizer3
    train(model, data_in, data_target, criterion, optimizer)
    outputs = model(data_in)
    predicted = (outputs >=0.5).float()
    print(predicted)
    accuracy = (predicted == data_target).float().mean()
    print(f'Training Accuracy: {accuracy.item()*100}')

tensor([[0., 0.],
        [0., 1.],
        [0., 1.],
        [1., 0.]])
Training Accuracy: 100.0
tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.]])
Training Accuracy: 62.5
tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        [0., 1.]])
Training Accuracy: 62.5


In [9]:
print('Weight of network1 :\n',model1[0].weight)
print('Weight of network2 :\n',model2[0].weight)
print('Weight of network3 :\n',model3[0].weight)

Weight of network1 :
 Parameter containing:
tensor([[-1.5832,  0.5899],
        [-0.5482, -1.2400],
        [-0.8949, -0.1814],
        [-0.4022, -1.3632],
        [-0.2604, -1.4232],
        [-0.6266, -1.4479],
        [-1.2226, -1.8005],
        [-1.3858, -0.8732],
        [-1.0015, -1.2994],
        [-1.4872,  1.0885],
        [-1.3479, -0.9300],
        [ 1.2788,  0.5540],
        [-1.2339, -0.9198],
        [-1.5657, -1.6871],
        [-1.3676, -0.2621],
        [ 1.1781,  0.7543],
        [ 1.5876, -1.8456],
        [-1.3765, -0.4899],
        [-0.1504, -0.3625],
        [ 1.0624,  1.3164],
        [ 1.1051,  0.8770],
        [ 0.4576,  1.0997],
        [ 0.7400, -1.8451],
        [ 0.2642,  1.0642],
        [-1.4382, -1.3577],
        [-0.0282,  1.0266],
        [ 1.1077,  0.4716],
        [ 1.0138,  0.2779],
        [ 1.5380,  0.5831],
        [-0.9831, -1.4486],
        [-1.4253, -0.9675],
        [ 0.7420,  1.4859]], requires_grad=True)
Weight of network2 :
 Parameter contain