In [1]:
%cd ../

/home/tadenoud/Git/simple-faster-rcnn-pytorch


In [2]:
RANDOM_SEED = 42
SUBSET_SAMPLE_DATA_PERCENTAGE = 0.1
MIN_IOU_REQUIRED = 0.7  # The minimum IOU required to be a good detection (ie higher is more strict)
NUM_PROPOSALS = 300
CHECKPOINT_DIR = './idd_cars_drop_checkpoints_with_nms'

%matplotlib inline

%load_ext autoreload
%autoreload 2

import os
import torch as t
from model import FasterRCNNVGG16
from trainer import FasterRCNNTrainer
import numpy as np
import pickle
import h5py
from tqdm.auto import tqdm, trange
import matplotlib.pyplot as plt

from data.util import  read_image
from utils.vis_tool import vis_bbox
from utils import array_tool as at
from utils.config import opt
from utils.idd_utils import get_annotations
from model.utils.bbox_tools import bbox_iou

from data.dataset import Dataset, TestDataset, KittiDataset
from torch.utils.data import DataLoader
from utils.vis_tool import vis_bbox, vis_image
from utils.idd_utils import IndiaDrivingDataset
from ood_metrics import auroc, plot_roc, calc_metrics
from sklearn import svm
import pickle
import itertools
import torch
from model.faster_rcnn import nograd
from data.dataset import preprocess
from utils.mahalanobis import Mahal_Dist


import warnings
warnings.filterwarnings(action='once')

np.random.seed(RANDOM_SEED)

In [3]:
opt.voc_data_dir = "/media/tadenoud/DATADisk/datasets/kitti_2d/VOC2012/"

kitti_val_dataset = KittiDataset(opt, split='val')
kitti_val_samples = len(kitti_val_dataset)

In [4]:
faster_rcnn = FasterRCNNVGG16()
trainer = FasterRCNNTrainer(faster_rcnn, visdom=False).cuda()

In [5]:
%ls ./checkpoints/cars_drop/

fasterrcnn_11060017_0.7454640828611436  fasterrcnn_11061638_0.7497252402944325
fasterrcnn_11061427_0.7282598515502949  fasterrcnn_11061705_0.7526206908232453
fasterrcnn_11061454_0.733123502219126   fasterrcnn_11062316_0.7550167879591324
fasterrcnn_11061507_0.7394557961451338


In [6]:
save_dir = './checkpoints/cars_drop'
trainer.load(os.path.join(save_dir, 'fasterrcnn_11062316_0.7550167879591324'))
trainer.faster_rcnn.eval()

FasterRCNNVGG16(
  (extractor): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=

In [7]:
model = trainer.faster_rcnn

In [8]:
# Get a single image
dataloader = DataLoader(kitti_val_dataset,
                batch_size=1,
                shuffle=False,
                pin_memory=True,
                num_workers=1)

imgs, sizes, gt_bboxes, gt_labels, _ = next(iter(dataloader))
sizes = [sizes[0][0].item(), sizes[1][0].item()]
gt_bboxes = gt_bboxes.squeeze(dim=0)
gt_labels = gt_labels.squeeze(dim=0)

# Max Softmax

In [30]:
%reset -y array

UsageError: option -y not recognized ( allowed: "sf" )


In [10]:
def max_softmax():
    res = model.predict_with_features(imgs, visualize=True)
    return res

In [11]:
%%timeit -r 10
max_softmax()



126 ms ± 660 µs per loop (mean ± std. dev. of 10 runs, 1 loop each)


# ODIN

In [12]:
def odin():
    res = model.predict_with_features(imgs, visualize=True, temperature=1000, perturbation=0.002)
    return res

In [13]:
%%timeit -r 10
odin()



342 ms ± 9.04 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


# Mahalanobis

In [14]:
mahal_model_name = os.path.join(CHECKPOINT_DIR, "kitti-cars-fc-4096-4-mahalanobis.h5")
md = Mahal_Dist.load(mahal_model_name)

def mahal():
    res = model.predict_with_features(imgs, visualize=True)
    pred_bboxes, pred_labels, pred_scores, pred_features = res
    pred_bboxes, pred_labels, pred_scores, pred_features = \
            pred_bboxes[0], pred_labels[0], pred_scores[0], pred_features[0]
    return md(pred_features[4])

In [15]:
%%timeit -r 10
mahal()

136 ms ± 378 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)


# SVM FC

In [16]:
# Train a one-class SVM networks with those features
ONE_CLASS_SAVE_PATH = os.path.join(CHECKPOINT_DIR, "kitti-fc-4096-1-one-class.pickle")    
with open(ONE_CLASS_SAVE_PATH, 'rb') as f:
    clf = pickle.load(f)

def svm_fc():
    res = model.predict_with_features(imgs, visualize=True)
    pred_bboxes, pred_labels, pred_scores, pred_features = res
    pred_bboxes, pred_labels, pred_scores, pred_features = \
            pred_bboxes[0], pred_labels[0], pred_scores[0], pred_features[0]
    
    return clf.predict(pred_features[1])

In [17]:
%%timeit -r 10
svm_fc()











128 ms ± 249 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)




# SVM Conv

In [18]:
ONE_CLASS_SAVE_PATH = os.path.join(CHECKPOINT_DIR, "kitti-conv-512-one-class.pickle")
with open(ONE_CLASS_SAVE_PATH, 'rb') as f:
    clf = pickle.load(f)

def svm_conv():
    res = model.predict_with_features(imgs, visualize=True)
    pred_bboxes, pred_labels, pred_scores, pred_features = res
    pred_bboxes, pred_labels, pred_scores, pred_features = \
            pred_bboxes[0], pred_labels[0], pred_scores[0], pred_features[0]
    
    return clf.predict(pred_features[0])

In [19]:
%%timeit -r 10
svm_conv()











127 ms ± 249 µs per loop (mean ± std. dev. of 10 runs, 10 loops each)




# SVM Vahdat

In [20]:
@nograd
def get_all_features(self, imgs, visualize=False):
    self.eval()
    if visualize:
        prepared_imgs = list()
        for img in imgs:
            img = preprocess(at.tonumpy(img))
            prepared_imgs.append(img)
    else:
         prepared_imgs = imgs 

    for img in prepared_imgs:
        img = at.totensor(img[None], cuda=True).float()
        features = [img]
        
        for l in self.extractor:
            features.append(l(features[-1]))
        
        # Detach all features from GPU
        features = [at.tonumpy(f) for f in features]
        # Remove the first feature (ie the image)
        features = features[1:]
        # Average the features over the final two dimensions
        features = [np.mean(f, axis=(2,3)).squeeze(axis=0) for f in features]
        return features
    
model.get_all_features = get_all_features.__get__(model)

In [21]:
nu = 0.01
gamma = 'auto'
kernel = 'rbf'
oodl = 23

save_path = os.path.join(CHECKPOINT_DIR, "one_class_models/kitti-one-class-extended-feat%d-nu%.4f-gamma%s-kernel%s.pickle" % (oodl, nu, gamma, kernel))
with open(save_path, 'rb') as f:
    clf = pickle.load(f)

def svm_vahdat():
    res = model.predict_with_features(imgs, visualize=True)
    pred_bboxes, pred_labels, pred_scores, pred_features = res
    pred_bboxes, pred_labels, pred_scores, pred_features = \
    pred_bboxes[0], pred_labels[0], pred_scores[0], pred_features[0]
    
    features = []
    for box in at.totensor(pred_bboxes):
        y1, x1, y2, x2 = box.round().type(torch.int)
        im = imgs[:,:,y1:y2,x1:x2]

        feat = model.get_all_features(im, visualize=True)
        features.append(feat[oodl])
        
    return clf.predict(features)

In [22]:
%%timeit -r 10
svm_vahdat()



12.3 s ± 32.9 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


# Entropy

In [23]:
import mc_uncertainty as mcu
from model.utils.bbox_tools import loc2bbox, bbox2loc, bbox_iou
from torch.nn import functional as F

def _predict_mc(self, imgs, sizes=None, visualize=False, num_passes=1):
    if visualize:
        self.use_preset('visualize')
        prepared_imgs = list()
        sizes = list()
        for img in imgs:
            size = img.shape[1:]
            img = preprocess(at.tonumpy(img))
            prepared_imgs.append(img)
            sizes.append(size)
    else:
         prepared_imgs = imgs 
    bboxes = list()
    labels = list()
    scores = list()
    mc_scores = list()

    for img, size in zip(prepared_imgs, sizes):
        scale = img.shape[2] / size[1]

        img = at.totensor(img[None], cuda=True).float()

        h = self.extractor(img)
        rpn_locs, rpn_scores, rois, roi_indices, anchor = self.rpn(h, img.shape[2:], scale)
        roi_cls_locs, roi_scores = self.head(h, rois, roi_indices)
        
        roi_scores = [roi_scores] + [self.head(h, rois, roi_indices)[1] for _ in range(num_passes-1)]

        # We are assuming that batch size is 1.
        roi_score = [s.data for s in roi_scores]
        roi_cls_loc = roi_cls_locs.data
        roi = at.totensor(rois) / scale

        # Convert predictions to bounding boxes in image coordinates.
        # Bounding boxes are scaled to the scale of the input images.
        mean = t.Tensor(self.loc_normalize_mean).cuda(). \
            repeat(self.n_class)[None]
        std = t.Tensor(self.loc_normalize_std).cuda(). \
            repeat(self.n_class)[None]

        roi_cls_loc = (roi_cls_loc * std + mean)
        roi_cls_loc = roi_cls_loc.view(-1, self.n_class, 4)
        roi = roi.view(-1, 1, 4).expand_as(roi_cls_loc)
        cls_bbox = loc2bbox(at.tonumpy(roi).reshape((-1, 4)),
                            at.tonumpy(roi_cls_loc).reshape((-1, 4)))
        cls_bbox = at.totensor(cls_bbox)
        cls_bbox = cls_bbox.view(-1, self.n_class * 4)
        # clip bounding box
        cls_bbox[:, 0::2] = (cls_bbox[:, 0::2]).clamp(min=0, max=size[0])
        cls_bbox[:, 1::2] = (cls_bbox[:, 1::2]).clamp(min=0, max=size[1])

        prob = [at.tonumpy(F.softmax(at.totensor(s), dim=1)) for s in roi_score]

        raw_cls_bbox = at.tonumpy(cls_bbox)
        raw_prob = [at.tonumpy(p) for p in prob]

        bbox, label, score, mc_score = self._suppress_with_features(raw_cls_bbox, raw_prob[0], raw_prob)

        bboxes.append(bbox)
        labels.append(label)
        scores.append(score)
        mc_scores.append(mc_score)

    # self.use_preset('evaluate')

    return bboxes, labels, scores, mc_scores

model.predict_mc = _predict_mc.__get__(model)

In [24]:
def entropy():
    model.train()
    
    res = model.predict_mc(imgs, visualize=True, num_passes=10)
    pred_bboxes, pred_labels, pred_scores, pred_mc_scores = res
    pred_bboxes, pred_labels, pred_scores, pred_mc_scores = \
            pred_bboxes[0], pred_labels[0], pred_scores[0], np.array(pred_mc_scores[0])
    
    model.eval()
    
    return -mcu.predicted_entropy(pred_mc_scores)

In [25]:
%%timeit -r 10
entropy()



356 ms ± 3.41 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


# Mutual Information

In [26]:
def mutual_information():
    model.train()
    
    res = model.predict_mc(imgs, visualize=True, num_passes=10)
    pred_bboxes, pred_labels, pred_scores, pred_mc_scores = res
    pred_bboxes, pred_labels, pred_scores, pred_mc_scores = \
            pred_bboxes[0], pred_labels[0], pred_scores[0], np.array(pred_mc_scores[0])

    model.eval()
    
    return mcu.mutual_information(pred_mc_scores)

In [27]:
%%timeit -r 10
mutual_information()

357 ms ± 3.34 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)


# Mahalanobis pert

In [28]:
mahal_model_name = os.path.join(CHECKPOINT_DIR, "kitti-cars-fc-4096-4-mahalanobis.h5")
md = Mahal_Dist.load(mahal_model_name)

def mahal_pert():
    res = model.predict_with_features(imgs, visualize=True, temperature=1000, perturbation=0.002)
    pred_bboxes, pred_labels, pred_scores, pred_features = res
    pred_bboxes, pred_labels, pred_scores, pred_features = \
            pred_bboxes[0], pred_labels[0], pred_scores[0], pred_features[0]
    return md(pred_features[4])

In [29]:
%%timeit -r 10
mahal_pert()

328 ms ± 5.19 ms per loop (mean ± std. dev. of 10 runs, 1 loop each)
