In [17]:
%matplotlib inline
import matplotlib
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import *

In [145]:
"""
Even the simple XOR system has 8 dimensions of evolutions:
- This model does not always learn
- Highly depends on the initialization
- The minibatch is not helping here (not enough inputs: no estimation of gradient)
"""

class XORModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(nn.Linear(2, 2), nn.ReLU(), nn.Linear(2, 2))
        self.softmax = nn.Softmax(dim=-1)
        
    def init_weights(self):
        for module in self.model.modules():
            if isinstance(module, nn.Linear):
                torch.nn.init.xavier_normal_(module.weight)
    
    def forward(self, x, with_softmax=True):
        x = x.float()
        x = self.model(x)
        if with_softmax:
            x = self.softmax(x)
        return x

def train(xs, ys):
    data_set = TensorDataset(xs, ys)
    data_loader = DataLoader(data_set, batch_size=4, shuffle=True)

    model = XORModel()
    model.init_weights()

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-1)

    for epoch in range(1000):
        cumulative_loss = 0.
        for inputs, expected in data_loader:
            optimizer.zero_grad()
            got = model(inputs, with_softmax=False)
            loss = criterion(got, expected)
            loss.backward()
            optimizer.step()
            cumulative_loss += loss.item()
        if epoch % 100 == 0:
            print(cumulative_loss)
    return model

xs = torch.tensor([[0, 0], [1, 0], [0, 1], [1, 1]], dtype=torch.long, requires_grad=False)
ys = torch.tensor([0, 1, 1, 0], dtype=torch.long, requires_grad=False)
model = train(xs, ys)

0.7094878554344177
0.003522902727127075
0.0013992488384246826
0.0007776021957397461
0.0005016326904296875
0.00035318732261657715
0.0002639591693878174
0.0002047419548034668
0.00016415119171142578
0.00013449788093566895


In [149]:
def classify(model, xs):
    xs = torch.tensor(xs, dtype=torch.long, requires_grad=False)
    ys = model(xs, with_softmax=True)
    return torch.argmax(ys, dim=-1)

classify(model, [[0, 0], [1, 0], [0, 1], [1, 1]])

tensor([0, 1, 1, 0])

In [176]:
"""
Reducing the number of dimensions by using a simple Sigmoid, and BCELoss
"""

class XORModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(nn.Linear(2, 2), nn.ReLU(), nn.Linear(2, 1))
        self.sigmoid = nn.Sigmoid()
        
    def init_weights(self):
        for module in self.model.modules():
            if isinstance(module, nn.Linear):
                torch.nn.init.xavier_normal_(module.weight)
    
    def forward(self, x, with_sigmoid=True):
        x = x.float()
        x = self.model(x)
        if with_sigmoid:
            x = self.sigmoid(x)
        else:
            x = 
        return x

def train(xs, ys):
    data_set = TensorDataset(xs, ys)
    data_loader = DataLoader(data_set, batch_size=4, shuffle=True)

    model = XORModel()
    model.init_weights()

    criterion = nn.BCEWithLogitsLoss() # Numerically stable version of nn.Sigmoid() + nn.BCELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-1)

    for epoch in range(1000):
        cumulative_loss = 0.
        for inputs, expected in data_loader:
            optimizer.zero_grad()
            got = model(inputs, with_sigmoid=False)
            loss = criterion(got, expected)
            loss.backward()
            optimizer.step()
            cumulative_loss += loss.item()
        if epoch % 100 == 0:
            print(cumulative_loss)
    return model

xs = torch.tensor([[0, 0], [1, 0], [0, 1], [1, 1]], dtype=torch.long, requires_grad=False)
ys = torch.tensor([0, 1, 1, 0], dtype=torch.float, requires_grad=False)
model2 = train(xs, ys)

0.7019362449645996
0.005526186432689428
0.00216383533552289
0.0011967032914981246
0.0007712696678936481
0.0005452843615785241
0.00040775196976028383
0.000318303209496662
0.00025565517717041075
0.0002100435522152111


In [177]:
def classify(model, xs):
    xs = torch.tensor(xs, dtype=torch.long, requires_grad=False)
    ys = model(xs)
    return list(y.item() >= 0.5 for y in ys)

classify(model2, [[0, 0], [1, 0], [0, 1], [1, 1]])

[False, True, True, False]