### Binary Complete Neural Network
This notebook is made to test the conception of complete binary model and compare it with fully-precision model on low-dimensional data

Importing all needed libraries

In [1]:
import numpy as np
import pandas as pd
import torch

from torch import nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler
from tqdm import tqdm
from torchsummary import summary

from IRNet_complete.models import ZhegalkinLinearModel

Specifying device and switching multiprocessing method from 'fork' to 'spawn' as it works better on UNIX-based systems

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
torch.multiprocessing.set_start_method('spawn', force=True)

Loading Iris dataset of flower classification

In [3]:
data = load_iris(as_frame=True)
dataset = data.frame
target_names = data.target_names
dataset.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [4]:
target_names

array(['setosa', 'versicolor', 'virginica'], dtype='<U10')

Splitting dataset into train and test sets

In [5]:
targets = dataset['target']
features = dataset.drop(['target'], axis=1)

In [6]:
train_x, test_x, train_y, test_y = train_test_split(features.to_numpy(), targets.to_numpy(), test_size=0.2)

Specifying and training StandardScaler and applying it to data

In [7]:
std_scaler = StandardScaler()
prep_train_x = std_scaler.fit_transform(train_x)
prep_test_x = std_scaler.transform(test_x)

Specifying dataset

In [8]:
class DatasetIris(Dataset):
    def __init__(self, features, targets):
        self.features = features
        self.targets = targets

    def __getitem__(self, idx):
        return self.features[idx], self.targets[idx]

    def __len__(self):
        return len(self.features)

In [9]:
train_dataset = DatasetIris(prep_train_x, train_y)
test_dataset = DatasetIris(prep_test_x, test_y)

Specifying dataloader

In [10]:
BATCH_SIZE = 8

def collate_fn(batch):
    batch_features = []
    batch_targets = []
    for row in batch:
        batch_features.append(row[0])
        batch_targets.append(row[1])
    batch_features = torch.Tensor(batch_features)
    batch_targets = torch.LongTensor(batch_targets)
    return batch_features, batch_targets

train_dataloader = DataLoader(train_dataset, collate_fn=collate_fn, shuffle=True, batch_size=BATCH_SIZE)
test_dataloader = DataLoader(test_dataset, collate_fn=collate_fn, shuffle=True, batch_size=BATCH_SIZE)

In [11]:
loss = torch.nn.CrossEntropyLoss()

In [12]:
history = {
    'loss' : [float('inf')],
}

def train(model, dataloader):
    optimizer = torch.optim.Adam(model.parameters())
    model.train()
    accuracy = 0.0
    pbar = tqdm(dataloader, desc=f"loss: {history['loss'][-1]:.2f}, accuracy: {accuracy:.2f}")
    for features_batch, targets_batch in pbar:
        features_batch, targets_batch = features_batch.to(device, non_blocking=True), targets_batch.to(device, non_blocking=True)

        optimizer.zero_grad()
        pred = model(features_batch)
        cur_loss = loss(pred, targets_batch)
        cur_loss.backward()
        optimizer.step()

        with torch.no_grad():
            pred_softmax = nn.Softmax()(pred)
            model_answer = torch.argmax(pred_softmax, axis=-1)
        accuracy = ((model_answer-targets_batch) == 0).sum()/len(targets_batch)

        history['loss'].append(cur_loss.item())
        pbar.set_description(f"loss: {history['loss'][-1]:.2f}, accuracy: {accuracy:.2f}")

def test(model, dataloader):
    model.eval()
    cur_loss = float('inf')
    accuracy = 0.0
    pbar = tqdm(dataloader, desc=f"test_loss: {cur_loss:.2f}, test_acc : {accuracy:.2f}")
    for features_batch, targets_batch in pbar:
        features_batch, targets_batch = features_batch.to(device, non_blocking=True), targets_batch.to(device, non_blocking=True)

        with torch.no_grad():
            pred = model(features_batch)
            cur_loss = loss(pred, targets_batch)
            pred_softmax = nn.Softmax()(pred)
            model_answer = torch.argmax(pred_softmax, axis=-1)

        accuracy = ((model_answer-targets_batch) == 0).sum()/len(targets_batch)

        pbar.set_description(f"test_loss: {cur_loss:.2f}, test_acc : {accuracy:.2f}")


Specifying, training and evaluating fully-precision model

In [13]:
baseline_model = nn.Sequential(nn.Linear(4, 16), nn.Linear(16, 16), nn.Linear(16, 16), nn.Linear(16, 3)).to(device)

In [14]:
EPOCHS = 10

for i in range(EPOCHS):
    train(baseline_model, train_dataloader)

  batch_features = torch.Tensor(batch_features)
  return self._call_impl(*args, **kwargs)
loss: 0.93, accuracy: 0.62: 100%|██████████| 15/15 [00:01<00:00,  9.18it/s]
loss: 0.74, accuracy: 0.88: 100%|██████████| 15/15 [00:00<00:00, 984.16it/s]
loss: 0.83, accuracy: 0.75: 100%|██████████| 15/15 [00:00<00:00, 1091.57it/s]
loss: 0.40, accuracy: 0.88: 100%|██████████| 15/15 [00:00<00:00, 1072.93it/s]
loss: 0.62, accuracy: 0.75: 100%|██████████| 15/15 [00:00<00:00, 1184.25it/s]
loss: 0.44, accuracy: 0.75: 100%|██████████| 15/15 [00:00<00:00, 1211.62it/s]
loss: 0.25, accuracy: 1.00: 100%|██████████| 15/15 [00:00<00:00, 1151.42it/s]
loss: 0.35, accuracy: 0.75: 100%|██████████| 15/15 [00:00<00:00, 1089.11it/s]
loss: 0.49, accuracy: 0.75: 100%|██████████| 15/15 [00:00<00:00, 1109.39it/s]
loss: 0.32, accuracy: 0.88: 100%|██████████| 15/15 [00:00<00:00, 945.22it/s]


In [15]:
test(baseline_model, test_dataloader)

test_loss: 0.39, test_acc : 0.83: 100%|██████████| 4/4 [00:00<00:00, 1645.79it/s]


In [16]:
summary(baseline_model, (1, 4))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                [-1, 1, 16]              80
            Linear-2                [-1, 1, 16]             272
            Linear-3                [-1, 1, 16]             272
            Linear-4                 [-1, 1, 3]              51
Total params: 675
Trainable params: 675
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


Specifying, training and evaluation Zhegalkin linear binary model

In [17]:
zhegalkin_model = ZhegalkinLinearModel(4, 16, 3).to(device)

In [18]:
for i in range(EPOCHS):
    train(zhegalkin_model, train_dataloader)

loss: 0.32, accuracy: 0.00:   0%|          | 0/15 [00:00<?, ?it/s]

tensor([[-1.,  1.,  1.,  ..., -1.,  1.,  1.],
        [-1.,  1.,  1.,  ...,  1.,  1., -1.],
        [ 1., -1.,  1.,  ...,  1.,  1.,  1.],
        ...,
        [ 1.,  1., -1.,  ..., -1., -1.,  1.],
        [ 1., -1.,  1.,  ...,  1.,  1., -1.],
        [ 1.,  1.,  1.,  ..., -1., -1., -1.]], device='cuda:0',
       grad_fn=<MulBackward0>)


loss: 0.32, accuracy: 0.00:   0%|          | 0/15 [00:01<?, ?it/s]


RuntimeError: mat1 and mat2 must have the same dtype, but got Bool and Float

In [19]:
test(zhegalkin_model, test_dataloader)

test_loss: 0.65, test_acc : 0.83: 100%|██████████| 4/4 [00:00<00:00, 332.55it/s]


In [22]:
summary(zhegalkin_model, (1, 4))

RuntimeError: mat1 and mat2 shapes cannot be multiplied (2x16 and 136x16)

This proofs that Zhegalkin-based linear model can achieve high accuracy on low dimensional dataset and predict non-linear relations