In [0]:
# -*- coding: utf-8 -*-
"""
Created on Sat Apr 25 19:12:42 2020

@author: Jana
"""
import torch
import torch.nn as nn
from torch.nn import init
import math
import matplotlib.pyplot as plt
dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
device = torch.device("cuda:0") # Uncomment this to run on GPU
def dist_matplt(tensor,tup_shape,title):
    plt.title(title)
    flatten = 1
    for i in tup_shape:
        flatten = flatten * i
    x = tensor.view(-1,flatten).detach().numpy()
    plt.hist(x[0],bins=50)
    plt.show()
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self, name, fmt=':f'):
        self.name = name
        self.fmt = fmt
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

    def __str__(self):
        fmtstr = '{name} {val' + self.fmt + '} ({avg' + self.fmt + '})'
        return fmtstr.format(**self.__dict__)


class ProgressMeter(object):
    def __init__(self, num_batches, meters, prefix=""):
        self.batch_fmtstr = self._get_batch_fmtstr(num_batches)
        self.meters = meters
        self.prefix = prefix

    def display(self, batch):
        entries = [self.prefix + self.batch_fmtstr.format(batch)]
        entries += [str(meter) for meter in self.meters]
        print('\t'.join(entries))

    def _get_batch_fmtstr(self, num_batches):
        num_digits = len(str(num_batches // 1))
        fmt = '{:' + str(num_digits) + 'd}'
        return '[' + fmt + '/' + fmt.format(num_batches) + ']'
def binarize(quant_mode = 'det'):
    class sign(torch.autograd.Function):
        @staticmethod
        def forward(ctx, input):
            ctx.save_for_backward(input)
            if quant_mode=='det':
                input = input.sign()
            else:
                input = input.add_(1).div_(2).add_(torch.rand(input.size()).add(-0.5)).clamp_(0,1).round().mul_(2).add_(-1)
            return input
        @staticmethod
        def backward(ctx, grad_output):
            input, = ctx.saved_tensors
            grad_input = grad_output.clone()
            grad_input[input.ge(1)] = 0
            grad_input[input.le(-1)] = 0
            return grad_input
    return sign().apply

class weight_quantize_fn(nn.Module):
  def __init__(self):
    super(weight_quantize_fn, self).__init__()
    self.Binarize = binarize()

  def forward(self, x):
      #E = torch.mean(torch.abs(x)).detach()
      weight_q = self.Binarize(x)
      return weight_q
def keep_elements_dict(vals, kwargs):
    t = list(kwargs.keys())
    for k in t:
        if k not in vals:
            del kwargs[k]
class BinConv2d(nn.Conv2d):
    def __init__(self,in_channels,out_channels,kernel_size,full_precision = False,**kwargs):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.kernel_size = kernel_size
        self.full_precision = full_precision
        self.binarize_input = kwargs.get('binarize_input',True)
        keep_elements_dict(['stride', 'padding', 'kernel_size','padding_mode','dilation','groups'],kwargs)
        self.stride = kwargs.get('stride',1)
        self.padding = kwargs.get('padding',0)
        self.padding_mode = kwargs.get('padding_mode','zeros')
        self.dilation = kwargs.get('dilation',1)
        self.groups = kwargs.get('groups',1)
        self.bias = False
        super(BinConv2d,self).__init__(self.in_channels,self.out_channels,self.kernel_size,**kwargs)
        #sns.distplot(self.weight.detach().numpy())
        self.reset_parameters()
        self.Binarize = weight_quantize_fn()
    def forward(self,input):
        if not self.full_precision:
            self.Modweight = self.Binarize(self.weight)
            if self.binarize_input:
              input = self.Binarize(input)
        else:
            self.Modweight = self.weight
        self.out = nn.functional.conv2d(input,self.Modweight,None)
        return self.out
    def reset_parameters(self):
        self.kaming_uniform()
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)
    #Weight Initialization
    def kaming_uniform(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
    def uniform(self):
        init.uniform_(self.weight, a=0.0, b=1.0)
    def xavier_normal(self):
        nn.init.xavier_normal_(self.weight)
    def kaming_normal(self):
         nn.init.kaiming_normal_(self.weight, mode='fan_out')
            
class BinLinear(nn.Linear):
    def __init__(self,in_channels,out_channels,full_precision = False,**kwargs):
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.full_precision = full_precision
        self.bias = False
        super(BinLinear,self).__init__(self.in_channels,self.out_channels)
        #sns.distplot(self.weight.detach().numpy())
        self.reset_parameters()
        self.Binarize = weight_quantize_fn()
    def forward(self,input):
        if not self.full_precision:
            self.Modweight = self.Binarize(self.weight)
            input = self.Binarize(input)
        else:
            self.Modweight = self.weight
        self.out = nn.functional.linear(input,self.Modweight,None)
        return self.out
    def reset_parameters(self):
        self.kaming_uniform()
        if self.bias is not None:
            fan_in, _ = init._calculate_fan_in_and_fan_out(self.weight)
            bound = 1 / math.sqrt(fan_in)
            init.uniform_(self.bias, -bound, bound)
    #Weight Initialization
    def kaming_uniform(self):
        init.kaiming_uniform_(self.weight, a=math.sqrt(5))
    def uniform(self):
        init.uniform_(self.weight, a=0.0, b=1.0)
    def xavier_normal(self):
        nn.init.xavier_normal_(self.weight)
    def kaming_normal(self):
         nn.init.kaiming_normal_(self.weight, mode='fan_out')


class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.infl_ratio=1
        self.fc1 = BinLinear(784, 2048*self.infl_ratio,True)
        self.htanh1 = nn.Hardtanh()
        self.bn1 = nn.BatchNorm1d(2048*self.infl_ratio)
        self.fc2 = BinLinear(2048*self.infl_ratio, 2048*self.infl_ratio,True)
        self.htanh2 = nn.Hardtanh()
        self.bn2 = nn.BatchNorm1d(2048*self.infl_ratio)
        self.fc3 = BinLinear(2048*self.infl_ratio, 2048*self.infl_ratio,True)
        self.htanh3 = nn.Hardtanh()
        self.bn3 = nn.BatchNorm1d(2048*self.infl_ratio)
        self.fc4 = BinLinear(2048*self.infl_ratio, 10,True)
        self.logsoftmax=nn.LogSoftmax()
        self.drop=nn.Dropout(0.5)

    def forward(self, x):
        x = x.view(-1, 28*28)
        x = self.fc1(x)
        #x = self.bn1(x)
        x = self.htanh1(x)
        x = self.fc2(x)
        #x = self.bn2(x)
        x = self.htanh2(x)
        x = self.fc3(x)
        #x = self.bn3(x)
        x = self.htanh3(x)
        x = self.fc4(x)
        out = self.logsoftmax(x)
        #dist_matplt(out,out.shape,"softmax")
        return out

class VGG_Cifar10(nn.Module):

    def __init__(self, num_classes=10):
        super(VGG_Cifar10, self).__init__()
        self.infl_ratio=1
        self.features = nn.Sequential(
            BinConv2d(3, 128*self.infl_ratio, kernel_size=3,full_precision = True),
            nn.BatchNorm2d(128*self.infl_ratio),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),

            BinConv2d(128*self.infl_ratio, 128*self.infl_ratio, kernel_size=3,full_precision = True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(128*self.infl_ratio),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),

            BinConv2d(128*self.infl_ratio, 256*self.infl_ratio, kernel_size=3,full_precision = True),
            nn.BatchNorm2d(256*self.infl_ratio),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),

            BinConv2d(256*self.infl_ratio, 256*self.infl_ratio, kernel_size=3,full_precision = True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(256*self.infl_ratio),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),

            BinConv2d(256*self.infl_ratio, 512*self.infl_ratio, kernel_size=3, full_precision = True),
            nn.BatchNorm2d(512*self.infl_ratio),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),

            BinConv2d(512*self.infl_ratio, 512, kernel_size=3, full_precision = True),
            #nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(512),
            #nn.Hardtanh(inplace=True)
            nn.ReLU(inplace = True)

        )
        self.classifier = nn.Sequential(
            BinLinear(512 * 1 * 1, 1024,True),
            nn.BatchNorm1d(1024),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),
            #nn.Dropout(0.5),
            BinLinear(1024, 1024,True),
            nn.BatchNorm1d(1024),
            #nn.Hardtanh(inplace=True),
            nn.ReLU(inplace = True),
            #nn.Dropout(0.5),
            BinLinear(1024, num_classes,True), #Full Presicision
            nn.LogSoftmax()
        )

        self.regime = {
            0: {'optimizer': 'Adam', 'betas': (0.9, 0.999),'lr': 5e-3},
            40: {'lr': 1e-3},
            80: {'lr': 5e-4},
            100: {'lr': 1e-4},
            120: {'lr': 5e-5},
            140: {'lr': 1e-5}
        }

    def forward(self, x):
        x = self.features(x)
        x = x.view(-1, 512 * 1 * 1)
        x = self.classifier(x)
        return x
#Accuracy
def accuracy(output, target, topk=(1,)):
    """Computes the accuracy over the k top predictions for the specified values of k"""
    with torch.no_grad():
        maxk = max(topk)
        batch_size = target.size(0)
        _, pred = output.topk(maxk, 1, True, True)
        pred = pred.t()
        correct = pred.eq(target.view(1, -1).expand_as(pred))
        res = []
        for k in topk:
            correct_k = correct[:k].view(-1).float().sum(0, keepdim=True)
            res.append(correct_k.mul_(100.0 / batch_size))
        return res    
from collections import Counter

def no_of_params(weights):
    weights = weights.detach().numpy()
    weights = weights.flatten()
    weights = weights.tolist()
    counter = Counter(weights)
    print(counter)
def train(train_loader, model, criterion, optimizer, epoch):
    # switch to train mode
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(train_loader),
        [losses, top1, top5],
        prefix="Epoch: [{}]".format(epoch))
    model.train()
    add_param = []
    for i, (images, target) in enumerate(train_loader):
        
        # compute output
        output = model(images.type(dtype))
        loss = criterion(output, target.to(device))

        # measure accuracy and record loss
        acc1, acc5 = accuracy(output, target.to(device), topk=(1, 5))
        losses.update(loss.detach().item(), images.size(0))
        top1.update(acc1[0], images.size(0))
        top5.update(acc5[0], images.size(0))
        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if i % 100 == 0:
            progress.display(i)
    print("epoch : {}, accuray: {},Loss : {} ".format(epoch,top1.avg,losses.avg))
    import numpy as np
    add_param.append(epoch)
    add_param.append(losses.avg)
    add_param.append((top1.avg).tolist())
    add_param.append((top5.avg).tolist())
    trainframe.loc[epoch] = add_param

def validate(val_loader, model, criterion, epoch):
    losses = AverageMeter('Loss', ':.4e')
    top1 = AverageMeter('Acc@1', ':6.2f')
    top5 = AverageMeter('Acc@5', ':6.2f')
    progress = ProgressMeter(
        len(val_loader),
        [losses, top1, top5],
        prefix='Test: ')
    # switch to evaluate mode
    model.eval()

    with torch.no_grad():
        add_param = []
        add_param.append(epoch)
        for i, (images, target) in enumerate(val_loader):
            # compute output
            output = model(images.type(dtype))
            loss = criterion(output, target.to(device))

            # measure accuracy and record loss
            acc1, acc5 = accuracy(output, target.to(device), topk=(1, 5))
            losses.update(loss.detach().item(), images.size(0))
            top1.update(acc1[0], images.size(0))
            top5.update(acc5[0], images.size(0))
            if i % 100 == 0:
              progress.display(i)
        add_param.append(losses.avg)
        add_param.append((top1.avg).tolist())
        add_param.append((top5.avg).tolist())
        testframe.loc[epoch] = add_param
        # TODO: this should also be done with the ProgressMeter
        print("Test epoch : {}, accuray: {},Loss : {} ".format(epoch,top1.avg,losses.avg))
        return top1.avg
def adjust_optimizer(optimizer, epoch, config): 
    __optimizers = {
    'SGD': torch.optim.SGD,
    'ASGD': torch.optim.ASGD,
    'Adam': torch.optim.Adam,
    'Adamax': torch.optim.Adamax,
    'Adagrad': torch.optim.Adagrad,
    'Adadelta': torch.optim.Adadelta,
    'Rprop': torch.optim.Rprop,
    'RMSprop': torch.optim.RMSprop
    }
    """Reconfigures the optimizer according to epoch and config dict"""
    def modify_optimizer(optimizer, setting):
        if 'optimizer' in setting:
            optimizer = __optimizers[setting['optimizer']](
                optimizer.param_groups)
        for param_group in optimizer.param_groups:
            for key in param_group.keys():
                if key in setting:
                    param_group[key] = setting[key]
        return optimizer
    for e in range(epoch + 1):  # run over all epochs - sticky setting
        if e in config:
            optimizer = modify_optimizer(optimizer, config[e])

    return optimizer    
import os
def save_checkpoint(state, is_best, path='.', filename='checkpoint.pth.tar', save_all=False):
    filename = os.path.join(path, filename)
    torch.save(state, filename)        
if __name__ == '__main__':
    #dtype = torch.float
    #device = torch.device("cpu")
    dtype = torch.cuda.FloatTensor # Uncomment this to run on GPU
    device = torch.device("cuda:0") # Uncomment this to run on GPU
    torch.backends.cudnn.benchmark = True #Uncomment this to run on GPU
    best_prec1 = 0
    import pandas as pd
    #trainframe = pd.DataFrame(columns =['epoch','loss','accuracy1','accuracy5','fc1P1','fc1N1','fc2P1','fc2N1','fc3P1','fc3N1','fc4P1','fc4N1'])
    trainframe = pd.DataFrame(columns =['epoch','loss','accuracy1','accuracy5'])
    testframe = pd.DataFrame(columns =['epoch','loss','accuracy1','accuracy5'])
    import torch
    import torchvision
    from torchvision import transforms
    print('==> Preparing data..')
    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    trainset = torchvision.datasets.CIFAR10(
        root='C:\\Users\\ajana\\data', train=True, download=True, transform=transform_train)
    train_loader = torch.utils.data.DataLoader(
        trainset, batch_size=128, shuffle=True, num_workers=1)

    testset = torchvision.datasets.CIFAR10(
        root='C:\\Users\\ajana\\data', train=False, download=True, transform=transform_test)
    test_loader = torch.utils.data.DataLoader(
        testset, batch_size=1000, shuffle=False, num_workers=1)
    start_epoch = 1
    epochs = 121
    import torch.optim as optim
    model = VGG_Cifar10()
    criterion = nn.CrossEntropyLoss()
    criterion.type(dtype)
    model.type(dtype)
    lr = 0.01
    momentum = 0.9
    weight_decay = 1e-4
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    #regime = getattr(model, 'regime', {0: {'optimizer': optimizer,
    #                                       'lr': lr,
    #                                       'momentum': momentum,
    #                                       'weight_decay': weight_decay}})
    optimizer = optim.Adam(model.parameters(),lr = lr)

    for epoch in range(start_epoch, epochs):
        #optimizer = adjust_optimizer(optimizer, epoch, regime)
        train(train_loader, model, criterion, optimizer, epoch)
        acc1 = validate(test_loader, model, criterion, epoch)
        is_best = acc1 > best_prec1
        best_prec1 = max(acc1, best_prec1)

        save_checkpoint({
            'epoch': epoch,
            'state_dict': model.state_dict(),
            'best_prec1': best_prec1,
        }, is_best, path="drive/My Drive/Model Results/",filename='checkpoint_cifar10)_0.01.pth.tar')
        trainframe.to_excel("drive/My Drive/Model Results/Cifar10_train_0.01.xlsx")
        testframe.to_excel("drive/My Drive/Model Results/Cifar10_test_0.01.xlsx") 
        
print(trainframe,testframe)
