# "Deep Vision" architecture

![title](img/arch-simplified.png)

<img src="img/side-branch.png" alt="drawing" width="500"/>

In [1]:
# begin by importing our "Deep Vision" module (or dv in short)
import dv
from dv.model import DeepVision_VGG16
from dv.ImageFolder import CarsDataset
import dv.helpers as helpers
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
import numpy as np
from PIL import Image, ImageOps
import os

In [2]:
# Paper use input size of 448 x 448, we will use random crop to this size

def transform_train():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:helpers.rescale(x, 448)))
    transform_list.append(torchvision.transforms.RandomHorizontalFlip(p=0.3))
    transform_list.append(torchvision.transforms.RandomCrop((448, 448)))
    transform_list.append(torchvision.transforms.ToTensor())
    transform_list.append(torchvision.transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return torchvision.transforms.Compose(transform_list)

def transform_test():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:helpers.rescale(x, 560)))
    transform_list.append(torchvision.transforms.TenCrop(448)) 
    transform_list.append(torchvision.transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))((transforms.ToTensor())(crop)) for crop in crops])) )
    return torchvision.transforms.Compose(transform_list)

def transform_test_simple():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:helpers.rescale(x, 448)))
    transform_list.append(torchvision.transforms.CenterCrop((448, 448)))
    transform_list.append(torchvision.transforms.ToTensor())
    transform_list.append(torchvision.transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return torchvision.transforms.Compose(transform_list)

In [3]:
lr0 = 0.1 #initial learning rate
momentum = 0.9
weight_decay = 0.000005
net = DeepVision_VGG16(k = 10, M = 200)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr0, momentum=momentum, weight_decay = weight_decay)

Dataset is from https://ai.stanford.edu/~jkrause/cars/car_dataset.html

In [4]:
# Load dataset

img_dir = '/Users/kevinsiswandi/dvfp/data'
train_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_train_annos.mat'),
                            os.path.join(img_dir,'cars_train'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_train()
                            )

test_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_test_annos_withlabels.mat'),
                            os.path.join(img_dir,'cars_test'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_test()
                            )

train_loader = DataLoader(train_dataset, batch_size=64,
                            shuffle=True, num_workers=1)
print("train size:", len(train_dataset))

test_loader = DataLoader(test_dataset, batch_size=64,
                            shuffle=True, num_workers=1)
print("test size:", len(test_dataset))

train size: 8144
test size: 8041


In [5]:
# training

for epoch in range(5):
    lr = lr0 * 0.9 ** (epoch//10) #decay the learning rate every 10 epochs
    for group in optimizer.param_groups:
        group['lr'] = lr
    net.train()
    
    running_loss = 0
    correct_top1 = 0
    correct_top5 = 0
    checkpoints = 2000 #for printing loss
    
    for i, data in enumerate(train_loader):
        inputs, labels = data
        labels = labels.type(torch.LongTensor)
        
        # forward pass
        out_g, out_p, out_s, _ = net(inputs)

        loss_g = criterion(out_g, labels)
        loss_p = criterion(out_p, labels)
        loss_s = criterion(out_s, labels)
        
        loss = 1.0 * loss_g + 1.0 * loss_p + 0.1 * loss_s # the weights for each stream as in the paper
        
        running_loss += loss.item()
        assert not np.isnan(running_loss), "Fuck loss blows up!"
        
        running_loss_g += loss_g.item()
        running_loss_p += loss_p.item()
        running_loss_s += loss_s.item()
        
        out = 1.0 * out_g + 1.0 * out_p + 0.1 * out_s 
        top1, top5 = dv.helpers.get_accuracy(out, labels, topk=(1, 5))  # paper cited only top-1
        
        correct_top1 += top1
        correct_top5 += top5
        
        # backward pass + optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        if i % checkpoints: # size of data is around 8000 in total
            print('Train epoch: {}, {}, {} || Loss: {:.4f} ||\n'
            'Loss G-Stream: {:.4f} || Loss P-Stream: {:.4f} || Loss side branch : {.4f} ||\n'
            'Top-1 Acc: {:.2f} || Top-5 Acc: {:.2f} %%'.format(
            epoch, i, len(train_loader), running_loss/checkpoints, running_loss_g/checkpoints, running_loss_p/checkpoints, running_loss_s/checkpoints, correct_top1, correct_top5))
            
            losses = np.array([running_loss, running_loss_g, running_loss_p, running_loss_s])/checkpoints
            
            # reset the loss and accuracy at every checkpoints
            running_loss = 0
            running_loss_g = 0
            running_loss_p = 0
            running_loss_s = 0
            
            save_train_info(epoch, i, len(train_loader), losses, correct_top1, correct_top5)
            
    

NameError: name 'running_loss_g' is not defined