## A.I. Assignment 5

## Learning Goals

By the end of this lab, you should be able to:
* Get more familiar with tensors in pytorch 
* Create a simple multilayer perceptron model with pytorch
* Visualise the parameters


### Task

Build a fully connected feed forward network that adds two bits. Determine the a propper achitecture for this network (what database you use for this problem? how many layers? how many neurons on each layer? what is the activation function? what is the loss function? etc)

Create at least 3 such networks and compare their performance (how accurate they are?, how farst they are trained to get at 1 accuracy?)

Display for the best one the weights for each layer.


In [2]:
import torch
import torch.nn as nn
from collections import OrderedDict
if torch.cuda.is_available():
    my_device = torch.device('cuda')
else:
    my_device = torch.device('cpu')
print('Device: {}'.format(my_device))

x = torch.rand(2, 2, device=my_device)
print(x)

Device: cpu
tensor([[0.2363, 0.7747],
        [0.1751, 0.0125]])


In [151]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np

class AdderModel(nn.Module):
    def __init__(self):
        super(AdderModel, self).__init__()
        self.fc1 = nn.Linear(2, 2)  # Input layer: 2 neurons, Hidden layer: 2 neurons
        self.fc2 = nn.Linear(2, 2)  # Hidden layer: 2 neurons, Output layer: 2 neurons

    def forward(self, x):
        x = torch.relu(self.fc1(x))  # ReLU activation for the hidden layer
        x = torch.sigmoid(self.fc2(x))  # Sigmoid activation for the output layer
        return x

# Define dataset
X_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_train = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

# Instantiate model
model = AdderModel()

# Define loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross-Entropy Loss

# Try different learning rates
learning_rates = [0.1, 0.01, 0.001]
for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")

    # Initialize model weights using Xavier initialization
    def weights_init(m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_uniform_(m.weight)
    model.apply(weights_init)

    optimizer = optim.Adam(model.parameters(), lr=lr)  # Adam optimizer

    # Training
    num_epochs = 5000  # Increased number of epochs
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        # Apply L2 regularization
        l2_reg = None
        for param in model.parameters():
            if l2_reg is None:
                l2_reg = param.norm(2)
            else:
                l2_reg = l2_reg + param.norm(2)
        loss = loss + 0.001 * l2_reg  # Regularization strength: 0.001

        loss.backward()
        
        # Gradient Clipping to prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Max norm: 1.0

        optimizer.step()

        if (epoch+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    # Test the model
    with torch.no_grad():
        predictions = model(X_train)
        predicted_labels = torch.round(predictions)  # Round the predictions to get binary labels (0 or 1)
        correct = (predicted_labels == y_train).sum().item()
        total = y_train.numel()  # Total number of elements in y_train
        accuracy = correct / total
        print(f"Accuracy: {accuracy * 100:.2f}%")



Training with learning rate: 0.1
Epoch [100/5000], Loss: 0.0344
Epoch [200/5000], Loss: 0.0291
Epoch [300/5000], Loss: 0.0281
Epoch [400/5000], Loss: 0.0277
Epoch [500/5000], Loss: 0.0275
Epoch [600/5000], Loss: 0.0273
Epoch [700/5000], Loss: 0.0272
Epoch [800/5000], Loss: 0.0272
Epoch [900/5000], Loss: 0.0272
Epoch [1000/5000], Loss: 0.0272
Epoch [1100/5000], Loss: 0.0272
Epoch [1200/5000], Loss: 0.0272
Epoch [1300/5000], Loss: 0.0272
Epoch [1400/5000], Loss: 0.0272
Epoch [1500/5000], Loss: 0.0272
Epoch [1600/5000], Loss: 0.0272
Epoch [1700/5000], Loss: 0.0272
Epoch [1800/5000], Loss: 0.0272
Epoch [1900/5000], Loss: 0.0272
Epoch [2000/5000], Loss: 0.0272
Epoch [2100/5000], Loss: 0.0272
Epoch [2200/5000], Loss: 0.0272
Epoch [2300/5000], Loss: 0.0272
Epoch [2400/5000], Loss: 0.0272
Epoch [2500/5000], Loss: 0.0272
Epoch [2600/5000], Loss: 0.0272
Epoch [2700/5000], Loss: 0.0272
Epoch [2800/5000], Loss: 0.0272
Epoch [2900/5000], Loss: 0.0273
Epoch [3000/5000], Loss: 0.0272
Epoch [3100/500

In [265]:
# Attempt 1
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict

X_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_train = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

model = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(2, 2)),
    ('relu', nn.ReLU()),  # ReLU activation
    ('fc2', nn.Linear(2, 2)),
    ('sigmoid', nn.Sigmoid())  # Sigmoid activation
]))

def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
        
model.apply(weights_init)

criterion = nn.MSELoss()

learning_rates = [0.1, 0.01, 0.001]
for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")

    optimizer = optim.Adam(model.parameters(), lr=lr)

    num_epochs = 1000
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        l2_reg = None
        for param in model.parameters():
            if l2_reg is None:
                l2_reg = param.norm(2)
            else:
                l2_reg = l2_reg + param.norm(2)
        loss = loss + 0.001 * l2_reg

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)

        optimizer.step()

        if (epoch+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    with torch.no_grad():
        predictions = model(X_train)
        predicted_labels = torch.round(predictions)
        correct = (predicted_labels == y_train).sum().item()
        total = y_train.numel()
        accuracy = correct / total
        print(f"Accuracy: {accuracy * 100:.2f}%")



Training with learning rate: 0.1
Epoch [100/1000], Loss: 0.1738
Epoch [200/1000], Loss: 0.0253
Epoch [300/1000], Loss: 0.0244
Epoch [400/1000], Loss: 0.0242
Epoch [500/1000], Loss: 0.0241
Epoch [600/1000], Loss: 0.0240
Epoch [700/1000], Loss: 0.0240
Epoch [800/1000], Loss: 0.0240
Epoch [900/1000], Loss: 0.0240
Epoch [1000/1000], Loss: 0.0241
Accuracy: 100.00%

Training with learning rate: 0.01
Epoch [100/1000], Loss: 0.0240
Epoch [200/1000], Loss: 0.0239
Epoch [300/1000], Loss: 0.0239
Epoch [400/1000], Loss: 0.0240
Epoch [500/1000], Loss: 0.0240
Epoch [600/1000], Loss: 0.0240
Epoch [700/1000], Loss: 0.0240
Epoch [800/1000], Loss: 0.0240
Epoch [900/1000], Loss: 0.0240
Epoch [1000/1000], Loss: 0.0239
Accuracy: 100.00%

Training with learning rate: 0.001
Epoch [100/1000], Loss: 0.0239
Epoch [200/1000], Loss: 0.0239
Epoch [300/1000], Loss: 0.0239
Epoch [400/1000], Loss: 0.0239
Epoch [500/1000], Loss: 0.0239
Epoch [600/1000], Loss: 0.0239
Epoch [700/1000], Loss: 0.0239
Epoch [800/1000], Lo

In [285]:
# Attempt 2, best one
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict

X_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_train = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

new_model = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(2, 3)),
    ('relu', nn.ReLU()),
    ('fc2', nn.Linear(3, 3)),
    ('relu', nn.Tanh()),
    ('fc3', nn.Linear(3, 2)),
    ('sigmoid', nn.Sigmoid())
]))

criterion = nn.MSELoss()

learning_rates = [0.1, 0.01, 0.001]
for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")

    optimizer = optim.Adam(model.parameters(), lr=lr)

    num_epochs = 50
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        loss.backward()
        
        optimizer.step()

        if (epoch+1) % 5 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    with torch.no_grad():
        predictions = model(X_train)
        predicted_labels = torch.round(predictions)
        correct = (predicted_labels == y_train).sum().item()
        total = y_train.numel()
        accuracy = correct / total
        print(f"Accuracy: {accuracy * 100:.2f}%")
        print("Model Parameters:")
        for name, param in new_model.named_parameters():
            if param.requires_grad:
                print(name, param.data)




Training with learning rate: 0.1
Epoch [5/50], Loss: 0.0000
Epoch [10/50], Loss: 0.0000
Epoch [15/50], Loss: 0.0000
Epoch [20/50], Loss: 0.0000
Epoch [25/50], Loss: 0.0000
Epoch [30/50], Loss: 0.0000
Epoch [35/50], Loss: 0.0000
Epoch [40/50], Loss: 0.0000
Epoch [45/50], Loss: 0.0000
Epoch [50/50], Loss: 0.0000
Accuracy: 100.00%
Model Parameters:
fc1.weight tensor([[-0.3826, -0.5174],
        [ 0.3124, -0.2854],
        [ 0.0867,  0.5934]])
fc1.bias tensor([-0.4885,  0.1310,  0.1114])
fc2.weight tensor([[-0.0334, -0.5586,  0.1379],
        [ 0.2959,  0.0308,  0.4329],
        [ 0.4772, -0.1475, -0.3903]])
fc2.bias tensor([-0.3393,  0.0645,  0.2708])
fc3.weight tensor([[ 0.2204,  0.3110, -0.5230],
        [ 0.0096, -0.2126, -0.4429]])
fc3.bias tensor([-0.5058,  0.0303])

Training with learning rate: 0.01
Epoch [5/50], Loss: 0.0000
Epoch [10/50], Loss: 0.0000
Epoch [15/50], Loss: 0.0000
Epoch [20/50], Loss: 0.0000
Epoch [25/50], Loss: 0.0000
Epoch [30/50], Loss: 0.0000
Epoch [35/50], Los

In [234]:
# Attempt 3
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict

X_train = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
y_train = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)


model = nn.Sequential(OrderedDict([
    ('fc1', nn.Linear(2, 5)),
    ('relu', nn.ReLU()),
    ('fc2', nn.Linear(5, 2)),
    ('sigmoid', nn.Sigmoid())
]))

def weights_init(m):
    if isinstance(m, nn.Linear):
        nn.init.xavier_uniform_(m.weight)
model.apply(weights_init)

criterion = nn.BCELoss()

learning_rates = [0.1, 0.01, 0.001]
for lr in learning_rates:
    print(f"\nTraining with learning rate: {lr}")

    optimizer = optim.Adam(model.parameters(), lr=lr)

    num_epochs = 100
    for epoch in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)  # Max norm: 1.0

        optimizer.step()

        if (epoch+1) % 5 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

    with torch.no_grad():
        predictions = model(X_train)
        predicted_labels = torch.round(predictions)
        correct = (predicted_labels == y_train).sum().item()
        total = y_train.numel()
        accuracy = correct / total
        print(f"Accuracy: {accuracy * 100:.2f}%")



Training with learning rate: 0.1
Epoch [5/100], Loss: 0.5790
Epoch [10/100], Loss: 0.4498
Epoch [15/100], Loss: 0.3305
Epoch [20/100], Loss: 0.1968
Epoch [25/100], Loss: 0.1056
Epoch [30/100], Loss: 0.0553
Epoch [35/100], Loss: 0.0295
Epoch [40/100], Loss: 0.0170
Epoch [45/100], Loss: 0.0104
Epoch [50/100], Loss: 0.0071
Epoch [55/100], Loss: 0.0051
Epoch [60/100], Loss: 0.0041
Epoch [65/100], Loss: 0.0034
Epoch [70/100], Loss: 0.0029
Epoch [75/100], Loss: 0.0026
Epoch [80/100], Loss: 0.0023
Epoch [85/100], Loss: 0.0021
Epoch [90/100], Loss: 0.0020
Epoch [95/100], Loss: 0.0018
Epoch [100/100], Loss: 0.0017
Accuracy: 100.00%

Training with learning rate: 0.01
Epoch [5/100], Loss: 0.0016
Epoch [10/100], Loss: 0.0014
Epoch [15/100], Loss: 0.0012
Epoch [20/100], Loss: 0.0011
Epoch [25/100], Loss: 0.0010
Epoch [30/100], Loss: 0.0009
Epoch [35/100], Loss: 0.0008
Epoch [40/100], Loss: 0.0007
Epoch [45/100], Loss: 0.0006
Epoch [50/100], Loss: 0.0006
Epoch [55/100], Loss: 0.0005
Epoch [60/100],

In [16]:
import pandas as pd
df_bits = pd.read_csv("bit_test.csv")
#X = data['input1'].astype(float)
#y = data['input2'].astype(float)
X=df_bits[['input1','input2']]
#print(X)
Y=df_bits[['output1','output2']]

##df_bits.dropna(axis=1,inplace=True)
#df_bits.isnull().sum()
#print(df_bits.head())
#print(data['input1'])
data_in = torch.tensor(X[['input1', 'input2']].values).float()
data_out = torch.tensor(Y[['output1', 'output2']].values).float()
print(data_in)
print(data_out)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])
tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.]])


In [5]:
## your code here
model1 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2,2)),
    ('activation', nn.ReLU()),
     ('output', nn.Linear(2,2))
]))

In [72]:
print(model1)

Sequential(
  (hidden): Linear(in_features=2, out_features=2, bias=True)
  (activation): ReLU()
  (output): Linear(in_features=2, out_features=2, bias=True)
)


In [73]:
# your code here
#data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]]).float()
print(data_in)

tensor([[0., 0.],
        [0., 1.],
        [1., 0.],
        [1., 1.]])


In [74]:
# your code here
#data_target = torch.tensor( ...
print(data_out)

tensor([[0., 1.],
        [0., 1.],
        [0., 1.],
        [1., 0.]])


In [99]:
# your code here
criterion = nn.MSELoss()
optimizer = optim.SGD(model1.parameters(), lr=0.1)

In [299]:
import torch.nn as nn
import torch.optim as optim

model1 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2,5)),
    ('activation', nn.ReLU()),
     ('output', nn.Linear(5,2))
]))

criterion = nn.MSELoss()
optimizer = optim.SGD(model1.parameters(), lr=0.2)

num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model1(data_in)
    loss = criterion(outputs, data_out)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


predicted_labels = torch.round(torch.sigmoid(model1(data_in))).detach().numpy()
accuracy = (predicted_labels == data_out.detach().numpy()).mean()
print('Accuracy:', accuracy.item())


Epoch [100/1000], Loss: 0.0037
Epoch [200/1000], Loss: 0.0000
Epoch [300/1000], Loss: 0.0000
Epoch [400/1000], Loss: 0.0000
Epoch [500/1000], Loss: 0.0000
Epoch [600/1000], Loss: 0.0000
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
Accuracy: 0.75


In [278]:
## your code here
model2 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2,5)),
    ('activation', nn.Tanh()),
     ('output', nn.Linear(5,2))
]))


criterion = nn.MSELoss()
optimizer = optim.SGD(model2.parameters(), lr=0.9)

num_epochs = 2000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model2(data_in)
    loss = criterion(outputs, data_out)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


predicted_labels = torch.round(torch.sigmoid(model2(data_in))).detach().numpy()
accuracy = (predicted_labels == data_out.detach().numpy()).mean()
print('Accuracy:', accuracy.item())


Epoch [100/2000], Loss: 0.0072
Epoch [200/2000], Loss: 0.0007
Epoch [300/2000], Loss: 0.0027
Epoch [400/2000], Loss: 0.0000
Epoch [500/2000], Loss: 0.0000
Epoch [600/2000], Loss: 0.0000
Epoch [700/2000], Loss: 0.0000
Epoch [800/2000], Loss: 0.0000
Epoch [900/2000], Loss: 0.0000
Epoch [1000/2000], Loss: 0.0000
Epoch [1100/2000], Loss: 0.0000
Epoch [1200/2000], Loss: 0.0000
Epoch [1300/2000], Loss: 0.0000
Epoch [1400/2000], Loss: 0.0000
Epoch [1500/2000], Loss: 0.0000
Epoch [1600/2000], Loss: 0.0000
Epoch [1700/2000], Loss: 0.0000
Epoch [1800/2000], Loss: 0.0000
Epoch [1900/2000], Loss: 0.0000
Epoch [2000/2000], Loss: 0.0000
Accuracy: 0.875


In [286]:
model3 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2, 4)),          # Adjust hidden layer size as needed
    ('activation', nn.ReLU()),             # Use ReLU activation in the hidden layer
    ('output', nn.Linear(4, 2)),          # Output layer with 2 neurons for 2-bit output
    ('softmax', nn.Softmax(dim=1))        # Apply Softmax activation to output layer
]))

criterion = nn.MSELoss()
optimizer = optim.SGD(model3.parameters(), lr=0.002)

num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model3(data_in)
    loss = criterion(outputs, data_out)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')


predicted_labels = torch.round(torch.sigmoid(model3(data_in))).detach().numpy()
accuracy = (predicted_labels == data_out.detach().numpy()).mean()
print('Accuracy:', accuracy.item())


Epoch [100/1000], Loss: 0.1713
Epoch [200/1000], Loss: 0.1691
Epoch [300/1000], Loss: 0.1670
Epoch [400/1000], Loss: 0.1649
Epoch [500/1000], Loss: 0.1629
Epoch [600/1000], Loss: 0.1609
Epoch [700/1000], Loss: 0.1589
Epoch [800/1000], Loss: 0.1569
Epoch [900/1000], Loss: 0.1550
Epoch [1000/1000], Loss: 0.1531
Accuracy: 0.5


In [328]:
model4 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2, 3)),          # Adjust hidden layer size as needed
    ('activation', nn.ELU()),             # Use ELU activation in the hidden layer
    ('output', nn.Linear(3, 2))           # Output layer with 2 neurons for 2-bit output
]))

criterion = nn.MSELoss()

# Define the optimizer
optimizer = optim.SGD(model4.parameters(), lr=0.5)

# Convert input and output data to torch tensors
data_in = torch.tensor(X[['input1', 'input2']].values, dtype=torch.float32)
data_out = torch.tensor(Y[['output1', 'output2']].values, dtype=torch.float32)

num_epochs = 2000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model4(data_in)
    loss = criterion(outputs, data_out)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print the loss every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

# Evaluate the trained model
with torch.no_grad():
    predicted_labels = torch.round(torch.sigmoid(model4(data_in))).detach().numpy()
    accuracy = (predicted_labels == data_out.detach().numpy()).mean()
    print('Accuracy:', accuracy)

Epoch [100/2000], Loss: 0.0629
Epoch [200/2000], Loss: 0.0842
Epoch [300/2000], Loss: 0.0621
Epoch [400/2000], Loss: 0.0607
Epoch [500/2000], Loss: 0.0571
Epoch [600/2000], Loss: 0.0485
Epoch [700/2000], Loss: 0.0072
Epoch [800/2000], Loss: 0.0006
Epoch [900/2000], Loss: 0.0000
Epoch [1000/2000], Loss: 0.0000
Epoch [1100/2000], Loss: 0.0000
Epoch [1200/2000], Loss: 0.0000
Epoch [1300/2000], Loss: 0.0000
Epoch [1400/2000], Loss: 0.0000
Epoch [1500/2000], Loss: 0.0000
Epoch [1600/2000], Loss: 0.0000
Epoch [1700/2000], Loss: 0.0000
Epoch [1800/2000], Loss: 0.0000
Epoch [1900/2000], Loss: 0.0000
Epoch [2000/2000], Loss: 0.0000
Accuracy: 0.625


In [339]:
import torch
import torch.nn as nn
import torch.optim as optim
from collections import OrderedDict

# Define dataset (inputs and outputs for adding two bits)
data_in = torch.tensor([[0, 0], [0, 1], [1, 0], [1, 1]], dtype=torch.float32)
data_out = torch.tensor([[0, 0], [0, 1], [0, 1], [1, 0]], dtype=torch.float32)

# Define model architectures
models = []

# Model 1
model1 = nn.Sequential(OrderedDict([
    ('hidden', nn.Linear(2, 5)),
    ('activation', nn.ReLU()),
    ('output', nn.Linear(5, 2))
]))
models.append(model1)

# Model 2
model2 = nn.Sequential(OrderedDict([
    ('hidden1', nn.Linear(2, 4)),
    ('activation1', nn.Tanh()),
    ('hidden2', nn.Linear(4, 4)),
    ('activation2', nn.Tanh()),
    ('output', nn.Linear(4, 2))
]))
models.append(model2)

# Model 3
model3 = nn.Sequential(OrderedDict([
    ('hidden1', nn.Linear(2, 3)),
    ('activation1', nn.Sigmoid()),
    ('hidden2', nn.Linear(3, 3)),
    ('activation2', nn.Sigmoid()),
    ('hidden3', nn.Linear(3, 3)),
    ('activation3', nn.Sigmoid()),
    ('output', nn.Linear(3, 2))
]))
models.append(model3)

# Define criterion (loss function) and optimizer
criterion = nn.MSELoss()
optimizers = [optim.SGD(model.parameters(), lr=0.2) for model in models]

# Train each model
for i, model in enumerate(models):
    print(f"Training Model {i+1}")
    optimizer = optimizers[i]
    for epoch in range(1000):
        # Forward pass
        outputs = model(data_in)
        loss = criterion(outputs, data_out)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # Print the loss every 100 epochs
        if (epoch + 1) % 100 == 0:
            print(f'Epoch [{epoch+1}/1000], Loss: {loss.item():.4f}')

    # Evaluate the trained model
    with torch.no_grad():
        predicted_labels = torch.round(torch.sigmoid(model(data_in))).detach().numpy()
        accuracy = (predicted_labels == data_out.detach().numpy()).mean()
        print(f'Model {i+1} Accuracy:', accuracy.item())

# Display the weights for each layer of the best-performing model (highest accuracy)
best_model_idx = max(range(len(models)), key=lambda i: accuracy)
best_model = models[best_model_idx]
for name, param in best_model.named_parameters():
    print(f'Layer: {name}, Weights: {param.data}')


Training Model 1
Epoch [100/1000], Loss: 0.0085
Epoch [200/1000], Loss: 0.0001
Epoch [300/1000], Loss: 0.0000
Epoch [400/1000], Loss: 0.0000
Epoch [500/1000], Loss: 0.0000
Epoch [600/1000], Loss: 0.0000
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
Model 1 Accuracy: 0.625
Training Model 2
Epoch [100/1000], Loss: 0.1104
Epoch [200/1000], Loss: 0.0094
Epoch [300/1000], Loss: 0.0003
Epoch [400/1000], Loss: 0.0000
Epoch [500/1000], Loss: 0.0000
Epoch [600/1000], Loss: 0.0000
Epoch [700/1000], Loss: 0.0000
Epoch [800/1000], Loss: 0.0000
Epoch [900/1000], Loss: 0.0000
Epoch [1000/1000], Loss: 0.0000
Model 2 Accuracy: 0.75
Training Model 3
Epoch [100/1000], Loss: 0.2187
Epoch [200/1000], Loss: 0.2187
Epoch [300/1000], Loss: 0.2187
Epoch [400/1000], Loss: 0.2187
Epoch [500/1000], Loss: 0.2187
Epoch [600/1000], Loss: 0.2186
Epoch [700/1000], Loss: 0.2186
Epoch [800/1000], Loss: 0.2186
Epoch [900/1000], Loss: 0.2186
E