# XOR ANN

## Model

In [165]:
import torch.nn as nn

class XOR_Classifier(nn.Module):
    def __init__(self, num_inputs, num_hidden, num_outputs):
        super().__init__()
        # Initialize modules needed for this network
        self.linear1 = nn.Linear(num_inputs, num_hidden)
        self.activation = nn.Tanh()
        self.linear2 = nn.Linear(num_hidden, num_outputs)
        
    def forward(self, x):
        # Compute output given an input
        x = self.linear1(x)
        x = self.activation(x)
        x = self.linear2(x)
        return x

## Data

In [166]:
import torch
import torch.utils.data as data

class XOR_Dataset(data.Dataset):
    def __init__(self, size, std=0.1):
        """
        Inputs:
            size - number of data points
            std  - standard deviation of noise
        """
        super().__init__()
        self.size = size
        self.std = std
        self.generate_cont_xor()
    
    def generate_cont_xor(self):
        data = torch.randint(low=0, high=2, size=(self.size, 2), dtype=torch.float32)
        label = (data.sum(dim=1) == 1).to(torch.long)
        data += self.std * torch.randn(data.shape)
        
        self.data = data
        self.label = label
        
    def __len__(self):
        return self.size
    
    def __getitem__(self, idx):
        data_point = self.data[idx]
        data_label = self.label[idx]
        return data_point, data_label
    

## Training

In [167]:
def train_model(model, optimizer, data_loader, loss_module, num_epochs=100):
    model.train()
    
    for epoch in range(num_epochs):
        for data_inputs, data_labels in data_loader:
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1) # Output is [Batch size, 1] but we want [Batch size]
            
            loss = loss_module(preds, data_labels.float())
            
            optimizer.zero_grad()
            loss.backward()
            
            optimizer.step()

## Evaluation

In [168]:
import matplotlib.pyplot as plt
import numpy as np

def plot_decision_boundary(model, data_loader):
    model.eval()
    
    data_inputs, data_labels = data_loader.dataset.data, data_loader.dataset.label
    data_inputs, data_labels = data_inputs.numpy(), data_labels.numpy()
    
    x_min, x_max = data_inputs[:, 0].min() - 0.1, data_inputs[:, 0].max() + 0.1
    y_min, y_max = data_inputs[:, 1].min() - 0.1, data_inputs[:, 1].max() + 0.1
    
    xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01),
                         np.arange(y_min, y_max, 0.01))
    
    Z = model(torch.from_numpy(np.c_[xx.ravel(), yy.ravel()]).float())
    Z = Z.reshape(xx.shape)
    
    plt.contourf(xx, yy, Z.detach().numpy(), cmap=plt.cm.Spectral, alpha=0.8) # type: ignore
    plt.scatter(data_inputs[:, 0], data_inputs[:, 1], c=data_labels, s=40, cmap=plt.cm.Spectral) # type: ignore
    plt.xlim(xx.min(), xx.max())
    plt.ylim(yy.min(), yy.max())
    
    plt.show()             

def eval_model(model, data_loader, use_plots):
    model.eval() # Set model to eval mode
    true_preds, num_preds = 0., 0.
    
    with torch.no_grad(): # Deactivate gradients for the following code
        for data_inputs, data_labels in data_loader:
            
            # Determine prediction of model on dev set
            preds = model(data_inputs)
            preds = preds.squeeze(dim=1)
            preds = torch.sigmoid(preds) # Sigmoid to map predictions between 0 and 1
            pred_labels = (preds >= 0.5).long() # Binarize predictions to 0 and 1
            
            # Keep records of predictions for the accuracy metric (true_preds=TP+TN, num_preds=TP+TN+FP+FN)
            true_preds += (pred_labels == data_labels).sum()
            num_preds += data_labels.shape[0]
            
    print(f"accuracy: {100.0*true_preds / num_preds:4.2f}%")
    if use_plots: plot_decision_boundary(model, data_loader)

## Adjustable ANN

In [170]:
class XOR:
  def __init__(self, num_hidden):
    self.model = XOR_Classifier(num_inputs=2, num_hidden=num_hidden, num_outputs=1)
  
  def train(self, std, num_epochs):
    self.dataset = XOR_Dataset(size=2500, std=std)
    self.test_dataset = XOR_Dataset(size=1500, std=std)

    train_model(
      self.model, 
      torch.optim.SGD(self.model.parameters(), lr=0.1), 
      data.DataLoader(self.dataset, batch_size=128, shuffle=True),
      nn.BCEWithLogitsLoss(),
      num_epochs=num_epochs
    )

  def eval(self, use_plots=False):
    print("Training data ", end="")
    eval_model(
      self.model, 
      data.DataLoader(self.dataset, batch_size=128, shuffle=False),
      use_plots
    )

    print("Test data ", end="")
    eval_model(
      self.model, 
      data.DataLoader(self.test_dataset, batch_size=128, shuffle=False, drop_last=False),
      use_plots
    )