In [1]:
#pip install torch

In [2]:
import torch as th
import torch.nn as nn
import torch.optim as optim
from random import uniform
import torch.nn.functional as F

## DATASET

The dataset consists of all binary vectors with six elements, where a value of 1 is assigned to each symmetric vector. The primary challenge in creating the dataset lies in its compatibility with the utilized libraries. Indeed, one of the initial obstacles was understanding the appropriate format for generating the dataset to make it usable with PyTorch.

In [3]:
# Generate data: all possible 2^6 binary vectors
data = th.tensor([[i >> d & 1 for d in range(6)] for i in range(64)], dtype=th.float32)
target = th.tensor([int(all(data[i] == data[i].flip(0))) for i in range(64)], dtype=th.float32)

symmetric_data = data[target == 1]
non_symmetric_data = data[target == 0]

# Determine the length of the minority class
num_symmetric = symmetric_data.shape[0]
num_non_symmetric = non_symmetric_data.shape[0]

# Oversample the minority class to match the number of samples in the majority class
oversampled_symmetric_data = symmetric_data.repeat(num_non_symmetric // num_symmetric + 1, 1)[:num_non_symmetric]
balanced_data = th.cat([non_symmetric_data, oversampled_symmetric_data], dim=0)

# Create corresponding target labels for the balanced dataset
balanced_target = th.cat([th.zeros(num_non_symmetric), th.ones(num_non_symmetric)])

# Shuffle the balanced dataset
indices = th.randperm(balanced_data.shape[0])
balanced_data = balanced_data[indices]
balanced_target = balanced_target[indices]

56
56


## SYMMETRIC WEIGHTS

The first experiment involves developing a neural network where the weights are initialized symmetrically. This choice was made based on insights from the paper:

"The key property of this solution is that for a given hidden unit, weights that are symmetric about the middle of the input vector are equal in magnitude and opposite in sign. So if a symmetrical pattern is presented, both hidden units will receive a net input of 0 from the input units, and, because the hidden units have a negative bias, both will be off. In this case the output unit, having a positive bias, will be on."
[Source: Rumelhart, D. E., Hinton, G. E., & Williams, R. J. (1986). Learning representations by back-propagating errors. Nature, 323(6088), 533-536.](https://bucket.ballarin.cc/papers/oth/rumelhart1986.pdf)

The network was constructed as faithfully as possible to that described in the paper, precisely to verify the possibility of reproducing its results.



In [4]:
class SymmetryClassifier(nn.Module):
    def __init__(self):
        super(SymmetryClassifier, self).__init__()
        self.fc1 = nn.Linear(6, 2)
        self.fc2 = nn.Linear(2, 1)
        
        # Initialize the weights of the first linear layer
        self.fc1.weight.data.uniform_(-3, 3)
        
        # Initialize the weights of the second linear layer
        self.fc2.weight.data.uniform_(-3, 3)
        
        # Symmetry in the weights of the first layer
        with th.no_grad():
            weights = self.fc1.weight.data
            size = weights.size(1)
            for i in range(size // 2):
                weights[:, i] = -weights[:, size - i - 1]

    def forward(self, x):
        x = self.fc1(x)
        x = F.sigmoid(x)  # Sigmoid activation function
        x = self.fc2(x)
        return F.sigmoid(x)  # Sigmoid to output a probability


In [5]:
# Same as the paper
epochs = 1425 
epsilon = 0.1  
alpha = 0.9

In [6]:
model = SymmetryClassifier()
criterion = th.nn.MSELoss()   # Same as the paper
optimizer = th.optim.SGD(model.parameters(), lr=epsilon, momentum=alpha)

primo strato: tensor([[-0.4658, -2.6870,  2.7859, -2.7859,  2.6870,  0.4658],
        [-0.3963, -0.3970, -0.9405,  0.9405,  0.3970,  0.3963]])


In [7]:
# Training loop (on the balanced dataset)
for epoch in range(epochs):
    # Forward pass
    outputs = model(balanced_data).squeeze() 
    loss = criterion(outputs, balanced_target.float())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss at every 100th step
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [100/1425], Loss: 0.2464
Epoch [200/1425], Loss: 0.2309
Epoch [300/1425], Loss: 0.2218
Epoch [400/1425], Loss: 0.2155
Epoch [500/1425], Loss: 0.2104
Epoch [600/1425], Loss: 0.2047
Epoch [700/1425], Loss: 0.1957
Epoch [800/1425], Loss: 0.1806
Epoch [900/1425], Loss: 0.1613
Epoch [1000/1425], Loss: 0.1424
Epoch [1100/1425], Loss: 0.1225
Epoch [1200/1425], Loss: 0.1029
Epoch [1300/1425], Loss: 0.0864
Epoch [1400/1425], Loss: 0.0747


In [8]:
# Inspect the parameters
for name, param in model.named_parameters():
    print(name, param)

fc1.weight Parameter containing:
tensor([[-2.1843, -5.7853,  3.3429, -3.2803,  5.8479,  2.2512],
        [-2.0021, -4.3957,  2.0563, -2.0158,  4.4361,  2.0419]],
       requires_grad=True)
fc1.bias Parameter containing:
tensor([-2.0909,  1.4148], requires_grad=True)
fc2.weight Parameter containing:
tensor([[-6.1428,  5.7764]], requires_grad=True)
fc2.bias Parameter containing:
tensor([-2.6991], requires_grad=True)


In [9]:
# Assess the model testing it on the entire dataset (unbalanced)
with th.no_grad():
    outputs = model(data).squeeze()
    predictions = (outputs >= 0.5).float()
    accuracy = (predictions == target).float().mean()
    Tpr = ((predictions == 1) & (target == 1)).float().sum()/((target == 1)).float().sum()
    Fpr = ((predictions == 1) & (target == 0)).float().sum()/((target == 0)).float().sum()
    Tnr = ((predictions == 0) & (target == 0)).float().sum()/((target == 0)).float().sum()
    Fnr = ((predictions == 0) & (target == 1)).float().sum()/((target == 1)).float().sum()

    print(f'TPR: {Tpr}, FPR: {Fpr}, TNR: {Tnr}, FNR: {Fnr}')
    print(f'Accuracy: {accuracy.item()}')

TPR: 1.0, FPR: 0.1071428582072258, TNR: 0.8928571343421936, FNR: 0.0
Accuracy: 0.90625


As evident from various metrics, the network performs well although not at the same level as described in the paper. Nevertheless, it is notable that the symmetrically initialized weights remain symmetric throughout training on the data.

## NON-SYMMETRIC WEIGHTS

The second experiment involves constructing a neural network identical to the previous one, with the only difference being the weight initialization. In this case, the weights are uniformly distributed between -3 and 3, but they are not symmetrically chosen.

In [28]:
class SymmetryClassifier_not_sim(nn.Module):
    def __init__(self):
        super(SymmetryClassifier_not_sim, self).__init__()
        self.fc1 = nn.Linear(6, 2)
        self.fc2 = nn.Linear(2, 1)
        
        # Initialize the weights of the first linear layer
        self.fc1.weight.data.uniform_(-3, 3)
        self.fc1.bias.data.zero_()  
        # Initialize the weights of the second linear layer
        self.fc2.weight.data.uniform_(-3, 3)
        self.fc2.bias.data.zero_()  

    def forward(self, x):
        x = self.fc1(x)
        x = F.sigmoid(x)  # Sigmoid activation function
        x = self.fc2(x)
        return F.sigmoid(x)  # Sigmoid to output a probability

In [29]:
# Same as the paper
epochs = 1425 
epsilon = 0.1
alpha = 0.9

In [30]:
model_not_sim = SymmetryClassifier_not_sim()
criterion = th.nn.MSELoss()
optimizer = th.optim.SGD(model_not_sim.parameters(), lr=epsilon, momentum=alpha)

In [31]:
# Training loop (on balanced dataset)
for epoch in range(epochs):
    # Forward pass
    outputs = model_not_sim(balanced_data).squeeze() 
    loss = criterion(outputs, balanced_target.float())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss at every 100th step
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [100/1425], Loss: 0.2481
Epoch [200/1425], Loss: 0.2354
Epoch [300/1425], Loss: 0.2214
Epoch [400/1425], Loss: 0.2103
Epoch [500/1425], Loss: 0.2018
Epoch [600/1425], Loss: 0.1954
Epoch [700/1425], Loss: 0.1907
Epoch [800/1425], Loss: 0.1869
Epoch [900/1425], Loss: 0.1836
Epoch [1000/1425], Loss: 0.1802
Epoch [1100/1425], Loss: 0.1765
Epoch [1200/1425], Loss: 0.1726
Epoch [1300/1425], Loss: 0.1692
Epoch [1400/1425], Loss: 0.1664


In [32]:
# Inspect the parameters
for name, param in model_not_sim.named_parameters():
    print(name, param)

fc1.weight Parameter containing:
tensor([[-2.0423, -0.7728,  4.8907, -4.8292, -1.3198,  2.1148],
        [-0.0623,  4.6020, -2.8319,  2.7310,  4.6137, -0.0325]],
       requires_grad=True)
fc1.bias Parameter containing:
tensor([-1.9602, -0.6527], requires_grad=True)
fc2.weight Parameter containing:
tensor([[-5.4238, -3.2359]], requires_grad=True)
fc2.bias Parameter containing:
tensor([3.2188], requires_grad=True)


It is noticeable that the weights of the first linear layer are semi-symmetric, while those of the second layer are not. This leads to incorrect classifications.

In [33]:
# Assess the model testing it on the entire dataset (unbalanced)
with th.no_grad():
    outputs = model_not_sim(data).squeeze()
    predictions = (outputs >= 0.5).float()
    accuracy = (predictions == target).float().mean()
    Tpr = ((predictions == 1) & (target == 1)).float().sum()/((target == 1)).float().sum()
    Fpr = ((predictions == 1) & (target == 0)).float().sum()/((target == 0)).float().sum()
    Tnr = ((predictions == 0) & (target == 0)).float().sum()/((target == 0)).float().sum()
    Fnr = ((predictions == 0) & (target == 1)).float().sum()/((target == 1)).float().sum()

    print(f'TPR: {Tpr}, FPR: {Fpr}, TNR: {Tnr}, FNR: {Fnr}')
    print(f'Accuracy: {accuracy.item()}')

TPR: 0.5, FPR: 0.1785714328289032, TNR: 0.8214285969734192, FNR: 0.5
Accuracy: 0.78125


The evaluation metrics of this network exhibit significantly different values compared to those of the previous network. Specifically, in addition to an accuracy value of 78%, it shows very low values for both TPR and FNR, both at 50%. This outcome suggests that if the network is not initialized with symmetric weights, it fails to render all its weights symmetric within the number of epochs stated in the paper. Further tests were conducted by increasing the number of epochs even by tenfold, but the accuracy described in the paper was never achieved.

## PAPER VALUES

In the third experiment, the exact neural network described in the paper was reconstructed, with weights initialized to the same values as in the original study.

In [34]:
class SymmetryClassifier_paper(nn.Module):
    def __init__(self):
        super(SymmetryClassifier_paper, self).__init__()
        self.fc1 = nn.Linear(6, 2)
        self.fc2 = nn.Linear(2, 1)
    
        # Initialize the weights of the first linear layer with the same values of the paper
        self.fc1.weight.data = th.tensor([[-14.2, 3.6, -7.1, 7.1, -3.6, 14.2],
                            [14.2, -3.6, 7.2, -7.2, 3.6, -14.2]])
        self.fc1.bias.data = th.tensor([-1.1, -1.1])

        # Initialize the weights of the second linear layer with the same values of the paper
        weights2 = self.fc2.weight.data
        weights2 = th.tensor([[-8.8, -8.8]])
        self.fc2.bias.data = th.tensor([6.4])

    def forward(self, x):
        x = self.fc1(x)
        x = F.sigmoid(x)  # Sigmoid activation function
        x = self.fc2(x)
        return F.sigmoid(x)  # Sigmoid to output a probability

In [35]:
# As the paper
epochs = 1425
epsilon = 0.1
alpha = 0.9

In [36]:
model_paper = SymmetryClassifier_paper()
criterion = th.nn.MSELoss()
optimizer = th.optim.SGD(model_paper.parameters(), lr=epsilon, momentum=alpha)

In [37]:
# Training loop (on balanced dataset)
for epoch in range(epochs):
    # Forward pass
    outputs = model_paper(balanced_data).squeeze() 
    loss = criterion(outputs, balanced_target.float())

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print loss at every 100th step
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}')

Epoch [100/1425], Loss: 0.4971
Epoch [200/1425], Loss: 0.4951
Epoch [300/1425], Loss: 0.4850
Epoch [400/1425], Loss: 0.0782
Epoch [500/1425], Loss: 0.0334
Epoch [600/1425], Loss: 0.0223
Epoch [700/1425], Loss: 0.0167
Epoch [800/1425], Loss: 0.0133
Epoch [900/1425], Loss: 0.0110
Epoch [1000/1425], Loss: 0.0094
Epoch [1100/1425], Loss: 0.0082
Epoch [1200/1425], Loss: 0.0072
Epoch [1300/1425], Loss: 0.0065
Epoch [1400/1425], Loss: 0.0059


In [38]:
# Inspect the parameters
for name, param in model_paper.named_parameters():
    print(name, param)

fc1.weight Parameter containing:
tensor([[-14.5961,   3.6689,  -7.3702,   7.3394,  -3.7267,  14.5730],
        [ 14.6330,  -3.7290,   7.3828,  -7.3984,   3.6939, -14.6457]],
       requires_grad=True)
fc1.bias Parameter containing:
tensor([-2.4297, -2.4741], requires_grad=True)
fc2.weight Parameter containing:
tensor([[-7.0244, -7.0425]], requires_grad=True)
fc2.bias Parameter containing:
tensor([3.5520], requires_grad=True)


In [39]:
# Assess the model testing it on the entire dataset (unbalanced)
with th.no_grad():
    outputs = model_paper(data).squeeze()
    predictions = (outputs >= 0.5).float()
    accuracy = (predictions == target).float().mean()
    Tpr = ((predictions == 1) & (target == 1)).float().sum()/((target == 1)).float().sum()
    Fpr = ((predictions == 1) & (target == 0)).float().sum()/((target == 0)).float().sum()
    Tnr = ((predictions == 0) & (target == 0)).float().sum()/((target == 0)).float().sum()
    Fnr = ((predictions == 0) & (target == 1)).float().sum()/((target == 1)).float().sum()

    print(f'TPR: {Tpr}, FPR: {Fpr}, TNR: {Tnr}, FNR: {Fnr}')
    print(f'Accuracy: {accuracy.item()}')

TPR: 1.0, FPR: 0.0, TNR: 1.0, FNR: 0.0
Accuracy: 1.0


In this case, we were able to achieve perfect classification results.