<a href="https://colab.research.google.com/github/Pranavug/tp/blob/master/vggnet_quantized_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
 !git clone https://github.com/huyvnphan/PyTorch_CIFAR10.git

In [0]:
%cd PyTorch_CIFAR10/

/content/PyTorch_CIFAR10


In [0]:
!unzip ../state_dicts.zip -d .

In [0]:
import requests, zipfile, os
from tqdm import tqdm

def main():
    url = "https://rutgers.box.com/shared/static/y9wi8ic7bshe2nn63prj9vsea7wibd4x.zip"

    # Streaming, so we can iterate over the response.
    r = requests.get(url, stream=True)

    # Total size in Mebibyte
    total_size = int(r.headers.get('content-length', 0))
    block_size = 2**20 # Mebibyte
    t=tqdm(total=total_size, unit='MiB', unit_scale=True)

    with open('state_dicts.zip', 'wb') as f:
        for data in r.iter_content(block_size):
            t.update(len(data))
            f.write(data)
    t.close()

    if total_size != 0 and t.n != total_size:
        raise Exception('Error, something went wrong')
        
    print('Download successful. Unzipping file.')
    path_to_zip_file = os.path.join(os.getcwd(), 'state_dicts.zip')
    directory_to_extract_to = os.path.join(os.getcwd(), 'cifar10_models')
    with zipfile.ZipFile(path_to_zip_file, 'r') as zip_ref:
        zip_ref.extractall(directory_to_extract_to)
        print('Unzip file successful!')
        
main()

100%|██████████| 2.46G/2.46G [03:16<00:00, 12.5MMiB/s]


Download successful. Unzipping file.
Unzip file successful!


In [0]:
import torch
import torch.nn as nn
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.nn.modules.conv import _ConvNd
from torch.nn.modules.utils import _single, _pair, _triple, _list_with_default

__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]

cfgs = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

In [0]:
class VGG(nn.Module):

    def __init__(self, features, num_classes=10, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [0]:
class myLinear(nn.Module):
    def __init__(self, in_features, out_features, bias=True, n_quants=64, beta=100):
        super().__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.bias = bias
        # @TODO: change initialization method to one mentioned
        # in PyTorch implementation
        l = 0.1
        self.weight = torch.nn.Parameter(l*torch.randn(out_features, in_features))
        self.bias = torch.nn.Parameter(l*torch.randn(out_features))
        std = l * 1.
        self.alpha = torch.nn.Parameter(torch.linspace(-2*l, 2*l, n_quants)) # quant params
        self.beta = beta
        # o = torch.sort(l*torch.randn(n_quants))
        # print(o.values)
        # self.alpha = torch.nn.Parameter(o.values)
    def forward(self, input):
        _, y = input.shape
        beta = self.beta
        #print("alpha", self.alpha)
        #print("orig", self.weight[0])
        virtual_alpha = self.alpha
        virtual_weight = self.weight
        virtual_bias = self.bias
        virtual_weight = torch.unsqueeze(virtual_weight, -1)
        virtual_weight = (virtual_weight - virtual_alpha)
        # print("diff", virtual_weight[0][0])
        virtual_weight = virtual_weight
        virtual_weight = -beta*virtual_weight*virtual_weight
        # print("neg_sq", virtual_weight[0][0])
        # virtual_weight = torch.exp(virtual_weight)
        virtual_weight = torch.nn.functional.softmax(virtual_weight, dim=2)
        virtual_weight = virtual_weight * virtual_alpha
        # print(virtual_weight.shape)
        virtual_weight = torch.sum(virtual_weight, dim=2)
        #print(virtual_weight.shape)
        #print("final", virtual_weight[0])
        # output = input @ self.weight.t() + self.bias
        output = input @ virtual_weight.t() + virtual_bias
        return output

class myConv2d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, stride=1,
                 padding=0, dilation=1, groups=1,
                 bias=False, padding_mode='zeros',n_quants=64, beta=100):
        temp = kernel_size
        self.kernel_size = _pair(kernel_size)
        self.stride = _pair(stride)
        self.padding = _pair(padding)
        self.dilation = _pair(dilation)
        self.groups = groups
        super(myConv2d, self).__init__()
        l = 0.1
        std = l * 1.
        self.bias = None
        self.alpha = torch.nn.Parameter(torch.linspace(-2*l, 2*l, n_quants)) # quant params
        self.beta = beta
        self.weight = torch.nn.Parameter(l*torch.randn(out_channels, in_channels,
                                                     temp, temp))

    def _conv_forward(self, input):
        #print(weight.shape)
        #_, y = input.shape
        beta = self.beta
        #print("alpha", self.alpha)
        #print("orig", weight[0])
        virtual_alpha = self.alpha
        virtual_weight = self.weight.view(-1)
        virtual_bias = self.bias
        virtual_weight = torch.unsqueeze(virtual_weight, -1)
        virtual_weight = (virtual_weight - virtual_alpha)
        virtual_weight = -beta*virtual_weight*virtual_weight
        # print("neg_sq", virtual_weight[0][0])
        # virtual_weight = torch.exp(virtual_weight)
        virtual_weight = torch.nn.functional.softmax(virtual_weight, dim=1)
        virtual_weight = virtual_weight * virtual_alpha
        # print(virtual_weight.shape)
        virtual_weight = torch.sum(virtual_weight, dim=1)
        virtual_weight = virtual_weight.view(self.weight.shape)
        #print(virtual_weight.shape)
        #print("final", virtual_weight[0])
        # output = input @ self.weight.t() + self.bias
        # output = input @ virtual_weight.t() + virtual_bias
        return F.conv2d(input, virtual_weight, None, self.stride,
                        self.padding, self.dilation, self.groups)

    def forward(self,input):
        return self._conv_forward(input)

In [0]:
class VGG(nn.Module):

    def __init__(self, features, num_classes=10, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            myLinear(512 * 7 * 7, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            myLinear(4096, 4096),
            nn.ReLU(True),
            nn.Dropout(),
            myLinear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = myConv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), nn.ReLU(inplace=True)]
            else:
                layers += [conv2d, nn.ReLU(inplace=True)]
            in_channels = v
    return nn.Sequential(*layers)

In [0]:
def _vgg(arch, cfg, batch_norm, pretrained, progress, device, **kwargs):
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
    if pretrained:
        script_dir = os.path.dirname(__file__)
        state_dict = torch.load(script_dir + '/state_dicts/'+arch+'.pt', map_location=device)
        model.load_state_dict(state_dict)
    return model

def vgg16_bn(pretrained=False, progress=True, device='cpu', **kwargs):
    """VGG 16-layer model (configuration "D") with batch normalization
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16_bn', 'D', True, pretrained, progress, device, **kwargs)

In [0]:
!python cifar10_train.py --classifier vgg16_bn --gpu '0,' --batch_size 256 --max_epochs 1

In [0]:
!python cifar10_test.py --classifier vgg16_bn --gpu '0,' --batch_size 256

In [0]:
!mv state_dicts ./cifar10_models/

In [0]:
pip install pytorch-lightning

In [0]:
import torch
import torch.nn as nn
import os
import torch
import torch.nn as nn
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.nn.parallel
import torch.backends.cudnn as cudnn
import torch.optim
import torch.utils.data
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.nn.modules.conv import _ConvNd
from torch.nn.modules.utils import _single, _pair, _triple, _list_with_default

__all__ = [
    'VGG', 'vgg11', 'vgg11_bn', 'vgg13', 'vgg13_bn', 'vgg16', 'vgg16_bn',
    'vgg19_bn', 'vgg19',
]

class myReLU(nn.Module):
    def __init__(self, num_bits = 3):
        super(myReLU, self).__init__()
        self.alpha = torch.nn.Parameter(torch.tensor(10.0))
        self.k = num_bits

    def forward(self, x):
        out = 0.5*(torch.abs(x) - torch.abs(x-self.alpha) + self.alpha)
        out = torch.round(out*((2**self.k-1)/self.alpha))
        out = out * ((self.alpha)/(2**self.k-1))
        return out

class VGG(nn.Module):

    def __init__(self, features, num_classes=10, init_weights=True):
        super(VGG, self).__init__()
        self.features = features
        self.avgpool = nn.AdaptiveAvgPool2d((7, 7))
        self.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, 4096),
            myReLU(6),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            myReLU(6),
            nn.Dropout(),
            nn.Linear(4096, num_classes),
        )
        if init_weights:
            self._initialize_weights()

    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                nn.init.normal_(m.weight, 0, 0.01)
                nn.init.constant_(m.bias, 0)

def make_layers(cfg, batch_norm=False):
    layers = []
    in_channels = 3
    for v in cfg:
        if v == 'M':
            layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
        else:
            conv2d = nn.Conv2d(in_channels, v, kernel_size=3, padding=1)
            if batch_norm:
                layers += [conv2d, nn.BatchNorm2d(v), myReLU(6)]
            else:
                layers += [conv2d, myReLU(6)]
            in_channels = v
    return nn.Sequential(*layers)

cfgs = {
    'A': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'B': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'E': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


def _vgg(arch, cfg, batch_norm, pretrained, progress, device, **kwargs):
    if pretrained:
        kwargs['init_weights'] = False
    model = VGG(make_layers(cfgs[cfg], batch_norm=batch_norm), **kwargs)
    if pretrained:
        script_dir = os.path.dirname(__file__)
        state_dict = torch.load(script_dir + '/state_dicts/'+arch+'.pt', map_location=device)
        model.load_state_dict(state_dict)
    return model


def vgg11(pretrained=False, progress=True, **kwargs):
    """VGG 11-layer model (configuration "A")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg11', 'A', False, pretrained, progress, **kwargs)


def vgg11_bn(pretrained=False, progress=True, device='cpu', **kwargs):
    """VGG 11-layer model (configuration "A") with batch normalization

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg11_bn', 'A', True, pretrained, progress, device, **kwargs)


def vgg13(pretrained=False, progress=True, **kwargs):
    """VGG 13-layer model (configuration "B")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg13', 'B', False, pretrained, progress, **kwargs)


def vgg13_bn(pretrained=False, progress=True, device='cpu', **kwargs):
    """VGG 13-layer model (configuration "B") with batch normalization

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg13_bn', 'B', True, pretrained, progress, device, **kwargs)


def vgg16(pretrained=False, progress=True, **kwargs):
    """VGG 16-layer model (configuration "D")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16', 'D', False, pretrained, progress, **kwargs)


def vgg16_bn(pretrained=False, progress=True, device='cpu', **kwargs):
    """VGG 16-layer model (configuration "D") with batch normalization

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg16_bn', 'D', True, pretrained, progress, device, **kwargs)


def vgg19(pretrained=False, progress=True, **kwargs):
    """VGG 19-layer model (configuration "E")

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg19', 'E', False, pretrained, progress, **kwargs)


def vgg19_bn(pretrained=False, progress=True, device='cpu', **kwargs):
    """VGG 19-layer model (configuration 'E') with batch normalization

    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
        progress (bool): If True, displays a progress bar of the download to stderr
    """
    return _vgg('vgg19_bn', 'E', True, pretrained, progress, device, **kwargs)
