# "Deep Vision" architecture

![title](../img/arch-simplified.png)

<img src="../img/side-branch.png" alt="drawing" width="500"/>

In [1]:
# begin by importing our "Deep Vision" module (or dv in short)
import dv
from dv.model import DeepVision_VGG16
from dv.ImageFolder import CarsDataset
import dv.helpers as helpers
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image, ImageOps
import os

In [2]:
use_gpu = False
num_workers = 4
batch_size = 64
best_top1 = 0
resume = False

In [3]:
# Paper use input size of 448 x 448, we will use random crop to this size

def transform_train():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:helpers.rescale(x, 448)))
    transform_list.append(torchvision.transforms.RandomHorizontalFlip(p=0.3))
    transform_list.append(torchvision.transforms.RandomCrop((448, 448)))
    transform_list.append(torchvision.transforms.ToTensor())
    transform_list.append(torchvision.transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return torchvision.transforms.Compose(transform_list)

def transform_test():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:helpers.rescale(x, 560)))
    transform_list.append(torchvision.transforms.TenCrop(448)) 
    transform_list.append(torchvision.transforms.Lambda(lambda crops: torch.stack([torchvision.transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))((torchvision.transforms.ToTensor())(crop)) for crop in crops])) )
    return torchvision.transforms.Compose(transform_list)

def transform_test_noTC():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:helpers.rescale(x, 448)))
    transform_list.append(torchvision.transforms.CenterCrop((448, 448)))
    transform_list.append(torchvision.transforms.ToTensor())
    transform_list.append(torchvision.transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return torchvision.transforms.Compose(transform_list)

In [4]:
lr0 = 0.1 #initial learning rate
momentum = 0.9
weight_decay = 0.000005
net = DeepVision_VGG16(k = 10, M = 200)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr0, momentum=momentum, weight_decay = weight_decay)

# Optionally resume from a checkpoint
if resume:
        if os.path.isfile(args.resume):
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print('DFL-CNN <==> Part2 : Load Network  <==> Continue from {} epoch {}'.format(args.resume, checkpoint['epoch']))
        else:
            print('DFL-CNN <==> Part2 : Load Network  <==> Failed')

Dataset is from https://ai.stanford.edu/~jkrause/cars/car_dataset.html

In [5]:
# Load dataset

img_dir = '/Users/kevinsiswandi/dvfp/data'
train_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_train_annos.mat'),
                            os.path.join(img_dir,'cars_train'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_train()
                            )

test_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_test_annos_withlabels.mat'),
                            os.path.join(img_dir,'cars_test'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_test()
                            )

test_dataset_noTC = CarsDataset(os.path.join(img_dir,'devkit/cars_test_annos_withlabels.mat'),
                            os.path.join(img_dir,'cars_test'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_test_noTC()
                            )

train_loader = DataLoader(train_dataset, batch_size=batch_size,
                            shuffle=True, num_workers=num_workers)
print("train size:", len(train_dataset))

test_loader = DataLoader(test_dataset, batch_size=batch_size,
                            shuffle=True, num_workers=num_workers)

test_loader_noTC = DataLoader(test_dataset_noTC, batch_size=batch_size,
                            shuffle=True, num_workers=num_workers)

print("test size:", len(test_dataset))

train size: 8144
test size: 8041


In [6]:
# training

for epoch in range(1):
    lr = lr0 * 0.9 ** (epoch//10) #decay the learning rate every 10 epochs
    for group in optimizer.param_groups:
        group['lr'] = lr
    net.train()
    
    running_loss = 0
    running_loss_g = 0
    running_loss_p = 0
    running_loss_s = 0
    correct_top1 = 0
    correct_top5 = 0
    checkpoints = 20 #for printing loss
    
    for i, data in enumerate(train_loader):
        inputs, labels = data
        labels = labels.type(torch.LongTensor)
        
        # forward pass
        out_g, out_p, out_s, _ = net(inputs)

        loss_g = criterion(out_g, labels)
        loss_p = criterion(out_p, labels)
        loss_s = criterion(out_s, labels)
        
        loss = 1.0 * loss_g + 1.0 * loss_p + 0.1 * loss_s # the weights for each stream as in the paper
        
        running_loss += loss.item()
        assert not np.isnan(running_loss), "Fuck loss blows up!"
        
        running_loss_g += loss_g.item()
        running_loss_p += loss_p.item()
        running_loss_s += loss_s.item()
        
        out = 1.0 * out_g + 1.0 * out_p + 0.1 * out_s 
        top1, top5 = dv.helpers.get_accuracy(out, labels, topk=(1, 5))  # paper cited only top-1
        
        correct_top1 += top1.item()
        correct_top5 += top5.item()
        
        # backward pass + optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i % checkpoints == 0:
            losses = np.array([running_loss, running_loss_g, running_loss_p, running_loss_s, correct_top1, correct_top5])/(i+1)
            print('Train epoch: {} [{}/{}] || Loss: {:.4f} ||\n'
            'Loss G-Stream: {:.4f} || Loss P-Stream: {:.4f} || Loss side branch: {:.4f} ||\n'
            'Top-1 Acc: {:.2f} || Top-5 Acc: {:.2f}'.format(
            epoch, i, len(train_loader), losses[0], losses[1], losses[2], losses[3], losses[4], losses[5]))
            
            helpers.save_train_info(epoch, i, len(train_loader), losses)
            
    

Train epoch: 0 [0/128] || Loss: 11.1782 ||
Loss G-Stream: 5.3060 || Loss P-Stream: 5.3425 || Loss side branch: 5.2964 ||
Top-1 Acc: 1.56 || Top-5 Acc: 1.56


Exception ignored in: <bound method _DataLoaderIter.__del__ of <torch.utils.data.dataloader._DataLoaderIter object at 0xb2392a518>>
Traceback (most recent call last):
  File "/anaconda3/lib/python3.6/site-packages/torch/utils/data/dataloader.py", line 675, in __del__
    def __del__(self):
KeyboardInterrupt


KeyboardInterrupt: 

In [None]:
# functions to show an image
def imshow(img):
    img = img / 2 + 0.5     # unnormalize
    npimg = img.numpy()
    plt.imshow(np.transpose(npimg, (1, 2, 0)))
    #plt.show()

# get some random training images
dataiter = iter(test_loader_noTC)
images, labels = dataiter.next()

# show images
imshow(torchvision.utils.make_grid(images))

In [None]:
# evaluate on test set
correct = 0
total = 0

for i, data in enumerate(test_loader):
    
    images, labels = data
    
    if use_gpu:
        images = images.cuda()
        labels = labels.cuda()
        
    for j, image in enumerate(images): # shape [batchsize, 10_crop, 3, 448, 448]
        image = image.unsqueeze(0) # image [1, 3, 448, 448]
        out_g, out_p, out_s, _ = net(image)
        out = out_g + out_p + 0.1 * out_s
        
        top1, top5 = dv.helpers.get_accuracy(out, labels, topk=(1, 5))  # paper cited only top-1
        correct += top1
        total += 1
        print('Test <==> Img:{} Top1 {:.3f} Top5 {:.3f}'.format(i, top1.cpu().numpy()[0], top5.cpu().numpy()[0]))

print('Test Total <==> Top1 {:.3f}%'.format(correct/total))

In [None]:
# remember best prec@1 and save checkpoint

save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': net.state_dict(),
                'best_top1': best_top1,
                'optimizer' : optimizer.state_dict(),
                'top1'     : correct/total,
            }, is_best=True) 