In [7]:

import torch
import torch.nn as nn
import math



cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}


class VGG(nn.Module):
    def __init__(self, vgg_name):
        super(VGG, self).__init__()
        self.features = self._make_layers(cfg[vgg_name])
        self.classifier = nn.Linear(512, 10)

    def forward(self, x):
        out = self.features(x)
        out = out.view(out.size(0), -1)
        out = self.classifier(out)
        return out

    def _make_layers(self, cfg):
        layers = []
        in_channels = 3
        for x in cfg:
            if x == 'M':
                layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
            else:
                layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1, bias=False),
                           nn.BatchNorm2d(x),
                           nn.ReLU(inplace=True)]
                in_channels = x
        layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
        return nn.Sequential(*layers)

    def show_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                m.show_params()


def VGG11():
    return VGG('VGG11')


def VGG13():
    return VGG('VGG13')


#def VGG16():
#    return VGG('VGG16')

def VGG16(**kwargs):
    model = VGG(vgg_name = 'VGG16', **kwargs)
    return model



def VGG19():
    return VGG('VGG19')


In [8]:
import argparse
import os
import time
import shutil

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn



import torchvision
import torchvision.transforms as transforms



global best_prec
use_gpu = torch.cuda.is_available()
print('=> Building model...')


batch_size = 256





normalize = transforms.Normalize(mean=[0.491, 0.482, 0.447], std=[0.247, 0.243, 0.262])


train_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        normalize,
    ]))


trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)

test_dataset = torchvision.datasets.CIFAR10(
    root='./data',
    train=False,
    download=True,
    transform=transforms.Compose([
        transforms.ToTensor(),
        normalize,
    ]))

testloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)


print_freq = 100 # every 100 batches, accuracy printed. Here, each batch includes "batch_size" data points
# CIFAR10 has 50,000 training data, and 10,000 validation data.

def train(trainloader, model, criterion, optimizer, epoch):
    batch_time = AverageMeter()   ## at the begining of each epoch, this should be reset
    data_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    model.train()

    end = time.time()  # measure current time

    for i, (input, target) in enumerate(trainloader):
        # measure data loading time
        data_time.update(time.time() - end)  # data loading time

        input, target = input.cuda(), target.cuda()

        # compute output
        output = model(input)
        loss = criterion(output, target)

        # measure accuracy and record loss
        prec = accuracy(output, target)[0]
        losses.update(loss.item(), input.size(0))
        top1.update(prec.item(), input.size(0))

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        # measure elapsed time
        batch_time.update(time.time() - end) # time spent to process one batch
        end = time.time()


        if i % print_freq == 0:
            print('Epoch: [{0}][{1}/{2}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Data {data_time.val:.3f} ({data_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
                   epoch, i, len(trainloader), batch_time=batch_time,
                   data_time=data_time, loss=losses, top1=top1))



def validate(val_loader, model, criterion ):
    batch_time = AverageMeter()
    losses = AverageMeter()
    top1 = AverageMeter()

    # switch to evaluate mode
    model.eval()

    end = time.time()
    with torch.no_grad():
        for i, (input, target) in enumerate(val_loader):

            input, target = input.cuda(), target.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            # measure accuracy and record loss
            prec = accuracy(output, target)[0]
            losses.update(loss.item(), input.size(0))
            top1.update(prec.item(), input.size(0))

            # measure elapsed time
            batch_time.update(time.time() - end)
            end = time.time()

            if i % print_freq == 0:  # This line shows how frequently print out the status. e.g., i%5 => every 5 batch, prints out
                print('Test: [{0}/{1}]\t'
                  'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t'
                  'Loss {loss.val:.4f} ({loss.avg:.4f})\t'
                  'Prec {top1.val:.3f}% ({top1.avg:.3f}%)'.format(
                   i, len(val_loader), batch_time=batch_time, loss=losses,
                   top1=top1))

    print(' * Prec {top1.avg:.3f}% '.format(top1=top1))
    return top1.avg


def accuracy(output, target, topk=(1,)):
    """Computes the precision@k for the specified values of k"""
    maxk = max(topk)
    batch_size = target.size(0)

    _, pred = output.topk(maxk, 1, True, True) # topk(k, dim=None, largest=True, sorted=True)
                                               # will output (max value, its index)
    pred = pred.t()           # transpose
    correct = pred.eq(target.view(1, -1).expand_as(pred))   # "-1": calculate automatically

    res = []
    for k in topk:
        correct_k = correct[:k].view(-1).float().sum(0)  # view(-1): make a flattened 1D tensor
        res.append(correct_k.mul_(100.0 / batch_size))   # correct: size of [maxk, batch_size]
    return res


class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()

    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0

    def update(self, val, n=1):
        self.val = val
        self.sum += val * n    ## n is impact factor
        self.count += n
        self.avg = self.sum / self.count


def save_checkpoint(state, is_best, fdir):
    filepath = os.path.join(fdir, 'checkpoint.pth')
    torch.save(state, filepath)
    if is_best:
        shutil.copyfile(filepath, os.path.join(fdir, 'model_best.pth.tar'))


def adjust_learning_rate(optimizer, epoch):
    """Start with lr = 0.1, reduce by 10x at 15, 25, and 35 epochs"""
    adjust_list = [15, 25, 35, 50, 70]
    if epoch in adjust_list:
        for param_group in optimizer.param_groups:
            param_group['lr'] = param_group['lr'] * 0.1

#model = nn.DataParallel(model).cuda()
#all_params = checkpoint['state_dict']
#model.load_state_dict(all_params, strict=False)
#criterion = nn.CrossEntropyLoss().cuda()
#validate(testloader, model, criterion)

=> Building model...
Files already downloaded and verified
Files already downloaded and verified


# VGG16 with CrossEntropy Loss

In [9]:
model_name = "VGG16"
model = VGG16()
fdir = 'result/'+str(model_name)+'/model_best.pth.tar'

checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['state_dict'])
criterion = nn.CrossEntropyLoss().cuda()

model.eval()
model.cuda()


prec = validate(testloader, model, criterion)
print(f'first conv layer weights absolute sum: {model.features[0].weight.abs().sum()}')

  checkpoint = torch.load(fdir)


Test: [0/40]	Time 0.148 (0.148)	Loss 0.3038 (0.3038)	Prec 91.406% (91.406%)
 * Prec 90.470% 
first conv layer weights absolute sum: 225.03768920898438


# VGG16 with Even Weight CrossEntropy and Weights' Absolute Sum

In [10]:
model_name = "VGG16_loss0"


In [None]:
# number of epochs to train the model
lr = 0.01  # Start at 0.1
n_epochs = 10
best_prec = 0
weight_decay = 1e-4
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.95, weight_decay=weight_decay)
model.train()  # prep model for training

if not os.path.exists('result'):
    os.makedirs('result')

fdir = 'result/' + str(model_name)

if not os.path.exists(fdir):
    os.makedirs(fdir)

for epoch in range(n_epochs):
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0

    for data, target in trainloader:
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()

        # Forward pass
        output = model(data)
        loss1 = criterion(output, target)
        loss2 = model.features[0].weight.abs().sum()
        loss = loss1 + loss2

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Accumulate losses
        train_loss1 += loss1.item() * data.size(0)
        train_loss2 += loss2.item() * data.size(0)
        train_loss += loss.item() * data.size(0)

        # Calculate average losses
        train_loss1 /= len(trainloader.dataset)
        train_loss2 /= len(trainloader.dataset)
        train_loss /= len(trainloader.dataset)

    print(f"Epoch: {epoch}")
    print('Training Loss1: {:.6f}'.format(epoch+1, train_loss1))
    print('Training Loss2: {:.6f}'.format(epoch+1, train_loss2))
    print('Training Loss: {:.6f}'.format(epoch+1, train_loss))
    prec = validate(testloader, model, criterion)
    # Remember best precision and save checkpoint
    is_best = prec > best_prec
    best_prec = max(prec, best_prec)
    print('Best accuracy: {:.3f}'.format(best_prec))

    # Save model checkpoint
    save_checkpoint({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            'best_prec': best_prec,
            }, is_best, fdir)


In [15]:
fdir = 'result/'+str(model_name)+'/model_best.pth.tar'
checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in testloader:
        data, target = data.cuda(), target.cuda() # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(testloader.dataset),
        100. * correct / len(testloader.dataset)))
print(f'first conv layer weights absolute sum: {model.features[0].weight.abs().sum()}')

  checkpoint = torch.load(fdir)



Test set: Accuracy: 7132/10000 (71%)

first conv layer weights absolute sum: 33.18050003051758


# VGG16 with CrossEntropy Loss and weighted Weights' Absolute Sum

In [17]:
model_name = "VGG16"
model = VGG16()
fdir = 'result/'+str(model_name)+'/model_best.pth.tar'

checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['state_dict'])
criterion = nn.CrossEntropyLoss().cuda()

model.cuda();


  checkpoint = torch.load(fdir)


In [None]:
model_name = "VGG16_loss1"
# number of epochs to train the model
lr = 0.01  # Start at 0.1
n_epochs = 50
best_prec = 0
weight_decay = 1e-4
criterion = nn.CrossEntropyLoss().cuda()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.95, weight_decay=weight_decay)
model.train()  # prep model for training

if not os.path.exists('result'):
    os.makedirs('result')

fdir = 'result/' + str(model_name)

if not os.path.exists(fdir):
    os.makedirs(fdir)

for epoch in range(n_epochs):
    adjust_learning_rate(optimizer, epoch)
    train_loss = 0.0
    train_loss1 = 0.0
    train_loss2 = 0.0

    for data, target in trainloader:
        data, target = data.cuda(), target.cuda()
        optimizer.zero_grad()

        # Forward pass
        output = model(data)
        loss1 = criterion(output, target)
        loss2 = model.features[0].weight.abs().sum()
        loss = loss1 + 0.05*loss2

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        # Accumulate losses
        train_loss1 += loss1.item() * data.size(0)
        train_loss2 += loss2.item() * data.size(0)
        train_loss += loss.item() * data.size(0)

        # Calculate average losses
        train_loss1 /= len(trainloader.dataset)
        train_loss2 /= len(trainloader.dataset)
        train_loss /= len(trainloader.dataset)

    print(f"Epoch: {epoch}")
    print('Training Loss1: {:.6f}'.format(epoch+1, train_loss1))
    print('Training Loss2: {:.6f}'.format(epoch+1, train_loss2))
    print('Training Loss: {:.6f}'.format(epoch+1, train_loss))
    prec = validate(testloader, model, criterion)
    # Remember best precision and save checkpoint
    is_best = prec > best_prec
    best_prec = max(prec, best_prec)
    print('Best accuracy: {:.3f}'.format(best_prec))

    # Save model checkpoint
    save_checkpoint({
            'epoch': epoch+1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'loss': train_loss,
            'best_prec': best_prec,
            }, is_best, fdir)


In [19]:
fdir = 'result/'+str(model_name)+'/model_best.pth.tar'
checkpoint = torch.load(fdir)
model.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
epoch = checkpoint['epoch']
train_loss = checkpoint['loss']

model.eval()

test_loss = 0
correct = 0

with torch.no_grad():
    for data, target in testloader:
        data, target = data.cuda(), target.cuda() # loading to GPU
        output = model(data)
        pred = output.argmax(dim=1, keepdim=True)
        correct += pred.eq(target.view_as(pred)).sum().item()

print('\nTest set: Accuracy: {}/{} ({:.0f}%)\n'.format(
        correct, len(testloader.dataset),
        100. * correct / len(testloader.dataset)))
print(f'first conv layer weights absolute sum: {model.features[0].weight.abs().sum()}')

  checkpoint = torch.load(fdir)



Test set: Accuracy: 7414/10000 (74%)

first conv layer weights absolute sum: 0.655273973941803
