# Project Pokrovskiy Sviatoslav

## Paper: BinaryConnect: Training Deep Neural Networks with binary weights during propagations

The main point of the selected article that all my privious expirience using NN was finished at the moment of training then my computer could not make necessary calculations. I keen on studing varios way to using the deep leaning metods on not such power devices.
In this part of project i try to understand the approach of BinaryConnect and use it in fasion MNIST dataset.
The authors use library PyLearn2 wich doesn't work on python3 and higher. It was a great problem for me, but i use their code for understading using of BinaryConnect.
Here i use pytorch.

### Customization the layers and conv2d

In [1]:
import torch
from torch.nn import Module, Conv2d, Linear
from torch.nn.functional import linear, conv2d

def Binarize(tensor,quant_mode='det'):
    if quant_mode=='det':
        return tensor.sign()
    if quant_mode=='bin':
        return (tensor>=0).type(type(tensor))*2-1
    else:
        return tensor.add_(1).div_(2).add_(torch.rand(tensor.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1)


class BNNLinear(Linear):

    def __init__(self, *kargs, **kwargs):
        super(BNNLinear, self).__init__(*kargs, **kwargs)
        self.register_buffer('weight_org', self.weight.data.clone())

    def forward(self, input):

        if (input.size(1) != 784) and (input.size(1) != 3072):
            input.data=Binarize(input.data)
            
        self.weight.data=Binarize(self.weight_org)
        out = linear(input, self.weight)

        if not self.bias is None:
            self.bias.org=self.bias.data.clone()
            out += self.bias.view(1, -1).expand_as(out)

        return out
    

class BNNConv2d(Conv2d):

    def __init__(self, *kargs, **kwargs):
        super(BNNConv2d, self).__init__(*kargs, **kwargs)
        self.register_buffer('weight_org', self.weight.data.clone())

    def forward(self, input):
        if input.size(1) != 3:
            input.data = Binarize(input.data)
        
        self.weight.data=Binarize(self.weight_org)
        

        out = conv2d(input, self.weight, None, self.stride,
                                   self.padding, self.dilation, self.groups)

        if not self.bias is None:
            self.bias.org=self.bias.data.clone()
            out += self.bias.view(1, -1, 1, 1).expand_as(out)

        return out
    

### Customization the model of BNN

In [60]:
import torch.nn as nn

class BNNCaffenet(nn.Module):

    def __init__(self, num_classes=10):
        super(BNNCaffenet, self).__init__()
 
        self.features = nn.Sequential(
                
                BNNConv2d(1, 96, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(96),
                nn.Hardtanh(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True),
                
                BNNConv2d(96, 192, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(192),
                nn.Hardtanh(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True),
                
                BNNConv2d(192, 288, kernel_size=3, stride=1, padding=1, bias=False),
                nn.BatchNorm2d(288),
                nn.Hardtanh(inplace=True),
                nn.MaxPool2d(kernel_size=2, stride=2, padding=0, ceil_mode=True),
                
                nn.Flatten(),
                nn.BatchNorm1d(4608),
                nn.Hardtanh(inplace=True),
                BNNLinear(4608, num_classes),
                nn.BatchNorm1d(num_classes, affine=False),
                nn.LogSoftmax(dim=1),
        )

    def forward(self, x):
        return self.features(x)


    def init_w(self):
        # weight initialization
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    nn.init.zeros_(m.bias)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.ones_(m.weight)
                nn.init.zeros_(m.bias)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.zeros_(m.bias)
        return


def bnn_caffenet(num_classes=10):
    return BNNCaffenet(num_classes)


### Customization the classifier of the model and saving the best model

In [3]:
import os
import numpy as np
from torch import save, no_grad
from tqdm import tqdm
import shutil


class BnnClassifier():
    def __init__(self, model, train_loader=None, test_loader=None, device=None):
        super().__init__()
        self.model = model
        self.train_loader = train_loader
        self.test_loader = test_loader
        self.device = device


    @staticmethod
    def save_checkpoint(state, is_best, checkpoint):
        head, tail = os.path.split(checkpoint)
        if not os.path.exists(head):
            os.makedirs(head)

        filename = os.path.join(head, '{0}_checkpoint.pth.tar'.format(tail))
        save(state, filename)
        if is_best:
            shutil.copyfile(filename, os.path.join(head,
                '{0}_best.pth.tar'.format(tail)))

        return

    def test(self, criterion):
        self.model.eval()
        top1 = 0
        test_loss = 0.

        with no_grad():
            for data, target in tqdm(self.test_loader):
                data, target = data.to(self.device), target.to(self.device)
                output = self.model(data)
                test_loss += criterion(output, target).item()
                pred = output.argmax(dim=1, keepdim=True)      
                top1 += pred.eq(target.view_as(pred)).sum().item()

        top1_acc = 100. * top1 / len(self.test_loader.sampler)

        return top1_acc


    def top1_accuracy(self):
        return top1_accuracy(self.model, self.test_loader, self.device)


    def train_step(self, criterion, optimizer):
        losses = []
        for data, target in tqdm(self.train_loader,
                total=len(self.train_loader)):
            data, target = data.to(self.device), target.to(self.device)
            output = self.model(data)
            loss = criterion(output, target)
            losses.append(loss.item())
            optimizer.zero_grad()
            loss.backward()
            for p in self.model.modules():
                if hasattr(p, 'weight_org'):
                    p.weight.data.copy_(p.weight_org)
            optimizer.step()
            for p in self.model.modules():
                if hasattr(p, 'weight_org'):
                    p.weight_org.data.copy_(p.weight.data.clamp_(-1,1))
        return losses

    def train(self, criterion, optimizer, epochs, scheduler,
            checkpoint=None):

        if checkpoint is None:
            raise ValueError('Specify a valid checkpoint')

        
        best_accuracy = 0.

        losses = []
        accuracies = []



        for epoch in range(1, epochs+1):
            self.model.train()
            epoch_losses = self.train_step(criterion, optimizer)
            losses += epoch_losses
            epoch_losses = np.array(epoch_losses)
            lr = optimizer.param_groups[0]['lr']  
            test_accuracy = self.test(criterion)
            accuracies.append(test_accuracy)
            if scheduler:     
                scheduler.step()
            is_best = test_accuracy > best_accuracy
            if is_best:
                best_accuracy = test_accuracy
            
            print('Train Epoch {0}\t Loss: {1:.6f}\t Test Accuracy {2:.3f} \t lr: {3:.4f}'
                    .format(epoch, epoch_losses.mean(), test_accuracy, lr))
            print('Best accuracy: {:.3f} '.format(best_accuracy))

            self.save_checkpoint({
                'epoch': epoch+1,
                'state_dict': self.model.state_dict(),
                'best_accuracy': best_accuracy,
                'optimizer': optimizer.state_dict(),
                'criterion': criterion,
                }, is_best, checkpoint)

        return

### Configurable parameters

In [64]:
# LR 
lr = .001
print("LR = "+str(lr))

# Steps
steps = [80,200]
print("Steps = "+str(steps))

# Gamma
gamma = 0.1
print("Gamma = "+str(gamma))

# Num_epochs
epochs = 15
print("num_epochs = "+str(epochs))

# Checkpoint
checkpoint = "results/bnn_caffenet_fasion_MNIST"
print("Checkpoint = "+str(checkpoint))
 

LR = 0.001
Steps = [80, 200]
Gamma = 0.1
num_epochs = 15
Checkpoint = results/bnn_caffenet_fasion_MNIST


### Realization of the model

In [65]:
import torch
import torchvision
import torchvision.transforms as transforms
import importlib


device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_set = torchvision.datasets.FashionMNIST("./data", download=True, transform=
                                                transforms.Compose([transforms.ToTensor()]))
test_set = torchvision.datasets.FashionMNIST("./data", download=True, train=False, transform=
                                               transforms.Compose([transforms.ToTensor()]))

train_loader = torch.utils.data.DataLoader(train_set, 
                                           batch_size=100)
test_loader = torch.utils.data.DataLoader(test_set,
                                          batch_size=100)

model = BNNCaffenet()
model.to(device)


classification = BnnClassifier(model, train_loader, test_loader, device)

criterion = torch.nn.CrossEntropyLoss()
criterion.to(device)

if hasattr(model, 'init_w'):
    model.init_w()


optimizer = torch.optim.SGD(model.parameters(), lr=lr,momentum=0.9, weight_decay=1e-5)
print(optimizer)

scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, steps, gamma=gamma)

classification.train(criterion, optimizer, epochs, scheduler, checkpoint)

download ok
data ok
SGD (
Parameter Group 0
    dampening: 0
    differentiable: False
    foreach: None
    lr: 0.001
    maximize: False
    momentum: 0.9
    nesterov: False
    weight_decay: 1e-05
)


100%|█████████████████████████████████████████| 600/600 [05:51<00:00,  1.71it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.34it/s]


Train Epoch 1	 Loss: 1.361141	 Test Accuracy 67.190 	 lr: 0.0010
Best accuracy: 67.190 


100%|█████████████████████████████████████████| 600/600 [05:28<00:00,  1.83it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.59it/s]


Train Epoch 2	 Loss: 1.084982	 Test Accuracy 71.770 	 lr: 0.0010
Best accuracy: 71.770 


100%|█████████████████████████████████████████| 600/600 [05:48<00:00,  1.72it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.69it/s]


Train Epoch 3	 Loss: 1.015165	 Test Accuracy 73.440 	 lr: 0.0010
Best accuracy: 73.440 


100%|█████████████████████████████████████████| 600/600 [05:59<00:00,  1.67it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.64it/s]


Train Epoch 4	 Loss: 0.972730	 Test Accuracy 74.250 	 lr: 0.0010
Best accuracy: 74.250 


100%|█████████████████████████████████████████| 600/600 [05:44<00:00,  1.74it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.30it/s]


Train Epoch 5	 Loss: 0.945572	 Test Accuracy 75.370 	 lr: 0.0010
Best accuracy: 75.370 


100%|█████████████████████████████████████████| 600/600 [05:43<00:00,  1.75it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.43it/s]


Train Epoch 6	 Loss: 0.923917	 Test Accuracy 76.900 	 lr: 0.0010
Best accuracy: 76.900 


100%|█████████████████████████████████████████| 600/600 [05:54<00:00,  1.69it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.59it/s]


Train Epoch 7	 Loss: 0.902923	 Test Accuracy 77.060 	 lr: 0.0010
Best accuracy: 77.060 


100%|█████████████████████████████████████████| 600/600 [05:44<00:00,  1.74it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.99it/s]


Train Epoch 8	 Loss: 0.887622	 Test Accuracy 78.170 	 lr: 0.0010
Best accuracy: 78.170 


100%|█████████████████████████████████████████| 600/600 [05:51<00:00,  1.71it/s]
100%|█████████████████████████████████████████| 100/100 [00:08<00:00, 11.12it/s]


Train Epoch 9	 Loss: 0.876877	 Test Accuracy 77.490 	 lr: 0.0010
Best accuracy: 78.170 


100%|█████████████████████████████████████████| 600/600 [05:41<00:00,  1.76it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.48it/s]


Train Epoch 10	 Loss: 0.863229	 Test Accuracy 78.700 	 lr: 0.0010
Best accuracy: 78.700 


100%|█████████████████████████████████████████| 600/600 [05:46<00:00,  1.73it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 11.06it/s]


Train Epoch 11	 Loss: 0.851570	 Test Accuracy 79.330 	 lr: 0.0010
Best accuracy: 79.330 


100%|█████████████████████████████████████████| 600/600 [05:47<00:00,  1.73it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.92it/s]


Train Epoch 12	 Loss: 0.843434	 Test Accuracy 79.350 	 lr: 0.0010
Best accuracy: 79.350 


100%|█████████████████████████████████████████| 600/600 [05:46<00:00,  1.73it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 11.02it/s]


Train Epoch 13	 Loss: 0.837319	 Test Accuracy 80.100 	 lr: 0.0010
Best accuracy: 80.100 


100%|█████████████████████████████████████████| 600/600 [05:47<00:00,  1.73it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 10.85it/s]


Train Epoch 14	 Loss: 0.828038	 Test Accuracy 80.350 	 lr: 0.0010
Best accuracy: 80.350 


100%|█████████████████████████████████████████| 600/600 [05:57<00:00,  1.68it/s]
100%|█████████████████████████████████████████| 100/100 [00:09<00:00, 11.09it/s]


Train Epoch 15	 Loss: 0.822933	 Test Accuracy 81.040 	 lr: 0.0010
Best accuracy: 81.040 


In the future i plan to finish visaulisation of the result for presentation of the project and using the model with more number of epochs in order to find the optimal model settings.