In [1]:
import numpy
import vgg16
import random
import time, datetime
import os, shutil
import yaml
import ast, bisect
import csv

import numpy as np
import torch
import torch.nn as nn
import torch.backends.cudnn as cudnn
from torch import optim
from torch.optim.lr_scheduler import LambdaLR
from torch.autograd import grad
import torchnet as tnt


In [2]:
import places365_v2


In [None]:
train_loader, test_loader = places365_v2.Places365()

  0%|          | 2/1803460 [00:00<28:26:24, 17.61it/s]

Retrieving Training Data


  2%|▏         | 42805/1803460 [20:59<290:24:38,  1.68it/s] 

In [2]:
import matplotlib.pyplot as plt
import os
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
import torchvision.transforms as transforms


def get_data(batch_size=32):

    transform_train = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
    ])

    trainset = datasets.CIFAR100(
        root='./data', train=True, download=True, transform=transform_train)
    trainloader = DataLoader(
        trainset, batch_size=batch_size, shuffle=True, num_workers=2)

    testset = datasets.CIFAR100(
        root='./data', train=False, download=True, transform=transform_test)
    testloader = DataLoader(
        testset, batch_size=batch_size, shuffle=False, num_workers=2)

    return trainloader, testloader

In [3]:
train_loader, test_loader = get_data()

Files already downloaded and verified
Files already downloaded and verified


In [4]:
model = vgg16.VGG('VGG16')

In [5]:
epochs = 150
norm = 'L2'
lr = 0.01
momentum = 0.9
decay = 0.0005
penalty = 0.1
fd_order = 'O2'
l1 = lambda epoch: epoch//3
l2 = lambda epoch: 0.95 * epoch
lr_schedule = [l1, l2]

In [6]:
criterion = nn.CrossEntropyLoss()
train_criterion = nn.CrossEntropyLoss(reduction='none')

has_cuda = torch.cuda.is_available()
cudnn.benchmark = True
if has_cuda:
    criterion = criterion.cuda(0)
    train_criterion = train_criterion.cuda(0)
    model = model.cuda(0)

    
optimizer = optim.SGD(model.parameters(),
                  lr = lr,
                  weight_decay = decay,
                  momentum = momentum,
                  nesterov = False)

In [7]:
def scheduler(optimizer,lr_schedule):
    """Return a hyperparmeter scheduler for the optimizer"""
    lscheduler = LambdaLR(optimizer, lr_lambda = lr_schedule)

    return lscheduler
#schedule = scheduler(optimizer,lr_schedule)
schedule = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max = 200)

In [9]:
# --------
# Training
# --------


#trainlog = os.path.join(args.logdir,'training.csv')
#traincolumns = ['index','time','loss', 'regularizer']
# with open(trainlog,'w') as f:
#     logger = csv.DictWriter(f, traincolumns)
#     logger.writeheader()

ix=0 #count of gradient steps

tik = penalty

regularizing = tik>0

h = 1 # finite difference step size

def train(epoch, ttot):
    global ix

    # Put the model in train mode (unfreeze batch norm parameters)
    model.train()

    # Run through the training data
    if has_cuda:
        torch.cuda.synchronize()
    tepoch = time.perf_counter()


    for batch_ix, (x, target) in enumerate(train_loader):

        if has_cuda:
            x = x.cuda()
            target = target.cuda()

        optimizer.zero_grad()
        if regularizing:
            x.requires_grad_(True)

        prediction = model(x)
        lx = train_criterion(prediction, target)
        loss = lx.mean()


        # Compute finite difference approximation of directional derivative of grad loss wrt inputs
        if regularizing:

            dx = grad(loss, x, retain_graph=True)[0]
            sh = dx.shape
            x.requires_grad_(False)

            # v is the finite difference direction.
            # For example, if norm=='L2', v is the gradient of the loss wrt inputs
            v = dx.view(sh[0],-1)
            Nb, Nd = v.shape


            if norm=='L2':
                nv = v.norm(2,dim=-1,keepdim=True)
                nz = nv.view(-1)>0
                v[nz] = v[nz].div(nv[nz])
            if norm=='L1':
                v = v.sign()
                v = v/np.sqrt(Nd)
            elif norm=='Linf':
                vmax, Jmax = v.abs().max(dim=-1)
                sg = v.sign()
                I = torch.arange(Nb, device=v.device)
                sg = sg[I,Jmax]

                v = torch.zeros_like(v)
                I = I*Nd
                Ix = Jmax+I
                v.put_(Ix, sg)

            v = v.view(sh)
            xf = x + h*v

            mf = model(xf)
            lf = train_criterion(mf,target)
            if fd_order=='O2':
                xb = x - h*v
                mb = model(xb)
                lb = train_criterion(mb,target)
                H = 2*h
            else:
                H = h
                lb = lx
            dl = (lf-lb)/H # This is the finite difference approximation
                           # of the directional derivative of the loss


        tik_penalty = torch.tensor(np.nan)
        dlmean = torch.tensor(np.nan)
        dlmax = torch.tensor(np.nan)
        if tik>0:
            dl2 = dl.pow(2)
            tik_penalty = dl2.mean()/2
            loss = loss + tik*tik_penalty

        loss.backward()

        optimizer.step()

        if np.isnan(loss.data.item()):
            raise ValueError('model returned nan during training')

        t = ttot + time.perf_counter() - tepoch
        fmt = '{:.4f}'
#         logger.writerow({'index':ix,
#             'time': fmt.format(t),
#             'loss': fmt.format(loss.item()),
#             'regularizer': fmt.format(tik_penalty) })

#         if (batch_ix % 2 == 0 and batch_ix > 0):
#             print('[%2d, %3d] penalized training loss: %.3g' %
#                 (epoch, batch_ix, loss.data.item()))
        ix +=1

    if has_cuda:
        torch.cuda.synchronize()

    return ttot + time.perf_counter() - tepoch



In [10]:

def main():


    #save_model_path = os.path.join(args.logdir, 'checkpoint.pth.tar')
    #best_model_path = os.path.join(args.logdir, 'best.pth.tar')

    pct_max = 90.
    fail_count = fail_max = 5
    time = 0.
    pct0 = 100.
    for e in range(epochs):

        # Update the learning rate
        schedule.step()

        time = train(e, time)

        loss, pct_err= test(e,time)
        if pct_err >= pct_max:
            fail_count -= 1

#         torch.save({'ix': ix,
#                     'epoch': e + 1,
#                     'model': model,
#                     'state_dict': model.state_dict(),
#                     'pct_err': pct_err,
#                     'loss': loss
#                     }, save_model_path)
        if pct_err < pct0:
            pct0 = pct_err

        if fail_count < 1:
            raise ValueError('Percent error has not decreased in %d epochs'%fail_max)



In [11]:
def test(epoch, ttot):
    model.eval()

    with torch.no_grad():

        # Get the true training loss and error
        top1_train = tnt.meter.ClassErrorMeter()
        train_loss = tnt.meter.AverageValueMeter()
        for data, target in train_loader:
            if has_cuda:
                target = target.cuda(0)
                data = data.cuda(0)

            output = model(data)


            top1_train.add(output.data, target.data)
            loss = criterion(output, target)
            train_loss.add(loss.data.item())

        t1t = top1_train.value()[0]
        lt = train_loss.value()[0]

        # Evaluate test data
        test_loss = tnt.meter.AverageValueMeter()
        top1 = tnt.meter.ClassErrorMeter()
        for data, target in test_loader:
            if has_cuda:
                target = target.cuda(0)
                data = data.cuda(0)

            output = model(data)

            loss = criterion(output, target)

            top1.add(output, target)
            test_loss.add(loss.item())

        t1 = top1.value()[0]
        l = test_loss.value()[0]

    # Report results
#     with open(testlog,'a') as f:
#         logger = csv.DictWriter(f, testcolumns)
#         fmt = '{:.4f}'
#         logger.writerow({'epoch':epoch,
#             'fval':fmt.format(l),
#             'pct_err':fmt.format(t1),
#             'train_fval':fmt.format(lt),
#             'train_pct_err':fmt.format(t1t),
#             'time':fmt.format(ttot)})

    print('[Epoch %2d] Average test loss: %.3f, error: %.2f%%'
            %(epoch, l, t1))
    print('%28s: %.3f, error: %.2f%%\n'
            %('training loss',lt,t1t))

    return test_loss.value()[0], top1.value()[0]

In [12]:
main()

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


[Epoch  0] Average test loss: 4.150, error: 95.96%
               training loss: 4.161, error: 95.86%

[Epoch  1] Average test loss: 3.782, error: 89.09%
               training loss: 3.789, error: 90.01%

[Epoch  2] Average test loss: 3.221, error: 82.06%
               training loss: 3.227, error: 82.05%

[Epoch  3] Average test loss: 2.870, error: 75.12%
               training loss: 2.884, error: 75.34%

[Epoch  4] Average test loss: 2.601, error: 69.76%
               training loss: 2.586, error: 69.87%

[Epoch  5] Average test loss: 2.369, error: 64.41%
               training loss: 2.334, error: 63.86%

[Epoch  6] Average test loss: 2.216, error: 60.08%
               training loss: 2.174, error: 59.21%

[Epoch  7] Average test loss: 2.147, error: 57.94%
               training loss: 2.059, error: 57.31%

[Epoch  8] Average test loss: 1.984, error: 54.45%
               training loss: 1.890, error: 53.06%

[Epoch  9] Average test loss: 1.929, error: 52.82%
               trainin

[Epoch 80] Average test loss: 1.319, error: 33.85%
               training loss: 0.419, error: 12.40%

[Epoch 81] Average test loss: 1.300, error: 33.22%
               training loss: 0.374, error: 11.10%

[Epoch 82] Average test loss: 1.346, error: 34.23%
               training loss: 0.429, error: 12.65%

[Epoch 83] Average test loss: 1.316, error: 33.60%
               training loss: 0.387, error: 11.32%

[Epoch 84] Average test loss: 1.301, error: 33.34%
               training loss: 0.375, error: 11.07%

[Epoch 85] Average test loss: 1.322, error: 34.16%
               training loss: 0.396, error: 11.80%

[Epoch 86] Average test loss: 1.349, error: 34.62%
               training loss: 0.385, error: 11.44%

[Epoch 87] Average test loss: 1.297, error: 33.29%
               training loss: 0.347, error: 10.14%

[Epoch 88] Average test loss: 1.332, error: 33.99%
               training loss: 0.363, error: 10.90%

[Epoch 89] Average test loss: 1.352, error: 34.39%
               trainin

In [13]:
filename = 'vgg16_cifar100_input_grad_reg.pth'
torch.save(model.state_dict(), filename)