# "Deep Vision" architecture

![title](arch-simplified.png)

<img src="side-branch.png" alt="drawing" width="500"/>

In [9]:
# begin by importing our "Deep Vision" module (or dv in short)
import dv
from dv.model import DeepVision_VGG16
from dv.ImageFolder import CarsDataset
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image, ImageOps
import os

In [4]:
# Paper use input size of 448 x 448, we will use random crop to this size

def scale_keep_ar_min_fixed(img, fixed_min):
    ow, oh = img.size
    if ow < oh:      
        nw = fixed_min
        nh = nw * oh // ow   
    else:      
        nh = fixed_min 
        nw = nh * ow // oh
    return img.resize((nw, nh), Image.BICUBIC)

def transform_train():
    transform_list = []
    transform_list.append(torchvision.transforms.Lambda(lambda x:scale_keep_ar_min_fixed(x, 448)))
    transform_list.append(torchvision.transforms.RandomHorizontalFlip(p=0.3))
    transform_list.append(torchvision.transforms.RandomCrop((448, 448)))
    transform_list.append(torchvision.transforms.ToTensor())
    transform_list.append(torchvision.transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return transforms.Compose(transform_list)

def transform_test():
    transform_list = []
    transform_list.append(transforms.Lambda(lambda x:scale_keep_ar_min_fixed(x, 560)))
    transform_list.append(transforms.TenCrop(448)) 
    transform_list.append(transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))((transforms.ToTensor())(crop)) for crop in crops])) )
    return transforms.Compose(transform_list)

def transform_test_simple():
    transform_list = []
    transform_list.append(transforms.Lambda(lambda x:scale_keep_ar_min_fixed(x, 448)))
    transform_list.append(transforms.CenterCrop((448, 448)))
    transform_list.append(transforms.ToTensor())
    transform_list.append(transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return transforms.Compose(transform_list)

In [7]:
lr = 0.1
momentum = 0.9
weight_decay = 0.000005
net = DeepVision_VGG16(k = 10, M = 200)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=lr, momentum=momentum, weight_decay = weight_decay)

In [None]:
Datas

In [12]:
img_dir = '/Users/kevinsiswandi/dvfp/data'
train_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_train_annos.mat'),
                            os.path.join(img_dir,'cars_train'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            cleaned=None,
                            transform=transform_train
                            )

test_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_test_annos_withlabels.mat'),
                            os.path.join(img_dir,'cars_test'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            cleaned=None,
                            transform=transform_test
                            )

In [16]:
# Load dataset

train_loader = DataLoader(train_dataset, batch_size=64,
                            shuffle=True, num_workers=1)
print("train size:", len(train_dataset))

test_loader = DataLoader(test_dataset, batch_size=64,
                            shuffle=True, num_workers=1)
print("test size:", len(test_dataset))

train size: 8144
test size: 8041
