Data loading and setup

In [2]:
# begin by importing our "Deep Vision" module (or dv in short)
import dv
from dv.model import DeepVision_VGG16
from dv.ImageFolder import CarsDataset
import dv.helpers as helpers
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim import lr_scheduler
from torch.autograd import Variable
import numpy as np
from PIL import Image, ImageOps
import os
from tqdm import tqdm

settings = {'use_gpu':False,
            'num_epochs':2,
    'num_workers':4,
    'batch_size':4,
    'lr0':0.1, #initial learning rate
    'lr_updates':10, # stepsize frequency of learning rate decay
    'lr_gamma':0.1, #how much to drop the learning rate
    'momentum':0.9,
    'weight_decay':0.000005,
    'resume':False, #path to model checkpoint
    'data_dir':'/Users/kevinsiswandi/dvfp/data'
}

# Paper use input size of 448 x 448, we will use random crop to this size
def transform_train():
    transform_list = []
    transform_list.append(transforms.Lambda(lambda x:helpers.rescale(x, 448)))
    transform_list.append(transforms.RandomHorizontalFlip(p=0.3))
    transform_list.append(transforms.RandomCrop((448, 448)))
    transform_list.append(transforms.ToTensor())
    transform_list.append(transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return transforms.Compose(transform_list)

def transform_test():
    transform_list = []
    transform_list.append(transforms.Lambda(lambda x:helpers.rescale(x, 560)))
    transform_list.append(transforms.TenCrop(448))
    transform_list.append(transforms.Lambda(lambda crops: torch.stack([transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5))((transforms.ToTensor())(crop)) for crop in crops])) )
    return transforms.Compose(transform_list)

def transform_test_noTC():
    transform_list = []
    transform_list.append(transforms.Lambda(lambda x:helpers.rescale(x, 448)))
    transform_list.append(transforms.CenterCrop((448, 448)))
    transform_list.append(transforms.ToTensor())
    transform_list.append(transforms.Normalize(mean=(0.5,0.5,0.5),std=(0.5,0.5,0.5)))
    return transforms.Compose(transform_list)

if __name__ == '__main__':

    print('Deep Vision (+:===:+) PART 1 : setup (+:===:+) (*^o^*) Begin')

    lr0 = settings['lr0']
    lr_updates = settings['lr_updates']
    lr_gamma = settings['lr_gamma']
    mom = settings['momentum']
    wdecay = settings['weight_decay']
    m = settings['batch_size']
    img_dir = settings['data_dir']
    nworkers = settings['num_workers']
    num_epochs = settings['num_epochs']

    net = DeepVision_VGG16(k = 10, M = 200)

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    if settings['use_gpu']:
        print("Using ", torch.cuda.device_count(), " GPUs...")
        net = torch.nn.DataParallel(net)
        net = net.to(device)
        #cudnn.benchmark = True

    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(net.parameters(), lr=lr0, momentum=mom, weight_decay = wdecay)
    lr_policy = lr_scheduler.StepLR(optimizer, step_size=lr_updates, gamma=lr_gamma)

    # Optionally resume from a checkpoint
    if settings['resume']:

        if os.path.isfile(settings['resume']):
            # load stuffs
            checkpoint = torch.load(settings['resume'])
            start_epoch = checkpoint['epoch']
            best_top1 = checkpoint['best_top1']
            net.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print('Loading Network  <==> Continue from {} at epoch #{}'.format(settings['resume'], checkpoint['epoch']))

        else:
            print('Loading Network  <==> Failed')
    else:
        best_top1 = 0

    print('Deep Vision (+:===:+) PART 1 : setup (+:===:+) *\(^o^)/* End')

    ####################################################################
    ####################################################################
    print('Deep Vision (+:===:+) PART 2 : loading dataset (+:===:+) (*^o^*) Begin')

    train_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_train_annos.mat'),
                            os.path.join(img_dir,'cars_train'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_train()
                            )

    test_dataset = CarsDataset(os.path.join(img_dir,'devkit/cars_test_annos_withlabels.mat'),
                            os.path.join(img_dir,'cars_test'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_test()
                            )

    test_dataset_noTC = CarsDataset(os.path.join(img_dir,'devkit/cars_test_annos_withlabels.mat'),
                            os.path.join(img_dir,'cars_test'),
                            os.path.join(img_dir,'devkit/cars_meta.mat'),
                            transform=transform_test_noTC()
                            )

    train_loader = DataLoader(train_dataset, batch_size=m, shuffle=True, num_workers=nworkers, pin_memory=True)
    print("train size:", len(train_dataset))
    test_loader = DataLoader(test_dataset, batch_size=m, shuffle=True, num_workers=nworkers, pin_memory=True)
    test_loader_noTC = DataLoader(test_dataset_noTC, batch_size=m, shuffle=True, num_workers=nworkers, pin_memory=True)
    print("test size without tencrop:", len(test_dataset_noTC))
    print("test size:", len(test_dataset))
    

    print('Deep Vision (+:===:+) PART 2 : loading dataset (+:===:+) *\(^o^)/* End')
    #######################################################
    #######################################################

Deep Vision (+:===:+) PART 1 : setup (+:===:+) (*^o^*) Begin
Deep Vision (+:===:+) PART 1 : setup (+:===:+) *\(^o^)/* End
Deep Vision (+:===:+) PART 2 : loading dataset (+:===:+) (*^o^*) Begin
train size: 8144
test size without tencrop: 8041
test size: 8041
Deep Vision (+:===:+) PART 2 : loading dataset (+:===:+) *\(^o^)/* End


In [6]:
# may need to get classes
#car_classes = train_dataset.map_class()

# load model

predict image label and draw boxes


In [5]:
result_dir = './vis_result'
if not os.path.isdir(result_dir):
        os.mkdir(result_dir)

img_dir = './vis_image'
num_imgs = len(os.listdir(img_dir))

for img in range(num_imgs):
    img_path = os.path.join(img_dir, '{}.jpg'.format(img))
    
    # for prediction
    transform1 = transform_test_noTC() 
    # for visualization
    transform2 = transforms.Compose([
                                transforms.Resize(448),
                                transforms.CenterCrop((448, 448)),
                                transforms.Pad((42, 42))
                            ])
    
    img = Image.open(img_path)
    
    img_pad = transform2(img)
    img_tensor = transform1(img)
    img_tensor = img_tensor.unsqueeze(0)
    out1, out2, out3, indices = model(img_tensor)
    out = out1 + out2 + 0.1 *out3
    