In [1]:
import numpy as np
import torch
from torch import Tensor
from torch import tensor
from torch import nn
from matplotlib import pyplot as plt
import matplotlib as mpl
import torchvision
import json

In [12]:
data = torch.tensor([
    [0,0],
    [0,1],
    [1,0],
    [1,1]
]).to(dtype=torch.float32)
label = torch.tensor([
    [0],
    [1],
    [1],
    [0]
]).to(dtype=torch.float32)

Model

In [52]:
class XORNet(nn.Module):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.lin = nn.Linear(2,1,bias=False)
    def forward(self,x):
        x = self.lin(x)
        return torch.abs(x).clamp(0,1)
    def sample_weight_state(self):
        return self.lin.weight

Gradient Descend family
can only solve 50% of the time, require good weight initialization

In [14]:
model = XORNet()
optimizer = torch.optim.SGD(model.parameters())
mse = torch.nn.MSELoss()
max_epochs = 100
for epoch in range(max_epochs):
    pred =  model(data)
    loss = mse(pred,label)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    print(loss.item())

0.3616286516189575
0.3615608215332031
0.36149322986602783
0.36142581701278687
0.3613585829734802
0.3612915277481079
0.3612246513366699
0.36115798354148865
0.3610914349555969
0.3610251545906067
0.3609590232372284
0.36089304089546204
0.36082723736763
0.3607616424560547
0.3606962263584137
0.36063098907470703
0.3605659306049347
0.3605010509490967
0.3604363799095154
0.36037182807922363
0.3603074550628662
0.3602433204650879
0.3601793050765991
0.36011549830436707
0.36005187034606934
0.35998833179473877
0.3599250614643097
0.35986194014549255
0.35979899764060974
0.35973620414733887
0.35967355966567993
0.3596111536026001
0.3595488667488098
0.35948681831359863
0.3594248592853546
0.3593630790710449
0.35930150747299194
0.3592400848865509
0.3591788411140442
0.3591177761554718
0.35905683040618896
0.35899603366851807
0.35893547534942627
0.358875036239624
0.3588147759437561
0.3587546944618225
0.35869476199150085
0.3586350083351135
0.35857537388801575
0.3585159480571747
0.35845667123794556
0.35839754343

Genetic algorithm(better than gradient descend)
can solve nearly every time, requiring 100 generations (or around 700 generations with worst weight initialization)

In [53]:
def mutate(net:XORNet,mutation_power = 0.1):
    cloned = XORNet()
    cloned.load_state_dict(net.state_dict())
    with torch.no_grad():
        cloned.lin.weight.copy_(cloned.lin.weight + mutation_power * (torch.rand_like(cloned.lin.weight) *2-1))
    return cloned

In [69]:
def calculate_fitness(net):
    inputs = torch.tensor([[0.,0.],[0.,1.],[1.,0.],[1.,1.]])
    targets = torch.tensor([[0.],[1.],[1.],[0.]])
    outputs = net(inputs)
    return nn.MSELoss()(outputs, targets).item()

In [None]:
max_generation = 500 
agent_count = 100
elite_size = int(agent_count * 0.2)
temperature = 10
model = XORNet()
#test with gradient descend worst case (model already at local minima)
with torch.no_grad():
    model.lin.weight.copy_(torch.tensor([0.5,0.5]))

for generation in range(max_generation):
    agents = []
    fitnesses = []

    for _ in range(agent_count):
        mutated = mutate(model)
        loss = calculate_fitness(mutated)
        fitness = 1 / (loss + 1e-8)
        agents.append(mutated)
        fitnesses.append(fitness)

    # Select elites
    elite_indices = sorted(range(agent_count), key=lambda i: fitnesses[i], reverse=True)[:elite_size]
    elites = [agents[i] for i in elite_indices]

    # Boltzmann selection for next model
    probs = torch.tensor(fitnesses) / temperature
    probs = torch.softmax(probs, dim=0)
    selected_idx = torch.multinomial(probs, 1).item()
    model = agents[selected_idx]

    print(f"Generation {generation}: Best fitness = {fitnesses[elite_indices[0]]:.4f}") #more fitness the better

Generation 0: Best fitness = 2.9633
Generation 1: Best fitness = 3.1356
Generation 2: Best fitness = 2.9938
Generation 3: Best fitness = 2.8532
Generation 4: Best fitness = 3.0691
Generation 5: Best fitness = 2.9571
Generation 6: Best fitness = 2.9087
Generation 7: Best fitness = 2.8715
Generation 8: Best fitness = 2.9271
Generation 9: Best fitness = 2.9871
Generation 10: Best fitness = 2.9488
Generation 11: Best fitness = 2.8602
Generation 12: Best fitness = 2.8822
Generation 13: Best fitness = 2.9796
Generation 14: Best fitness = 2.9838
Generation 15: Best fitness = 2.9665
Generation 16: Best fitness = 2.9717
Generation 17: Best fitness = 2.9984
Generation 18: Best fitness = 2.9996
Generation 19: Best fitness = 2.9900
Generation 20: Best fitness = 2.9802
Generation 21: Best fitness = 2.9946
Generation 22: Best fitness = 3.1516
Generation 23: Best fitness = 3.1402
Generation 24: Best fitness = 3.4188
Generation 25: Best fitness = 3.4732
Generation 26: Best fitness = 3.3048
Generation 

KeyboardInterrupt: 

In [87]:
model(data)

tensor([[0.0000],
        [1.0000],
        [1.0000],
        [0.0040]], grad_fn=<ClampBackward1>)