# Autonomous Perception Robustness Testing Framework (APRTF)
### Development Journal

We show that our general framework can be used on the [NuScenes](https://www.nuscenes.org/) dataset using a multi-stage analysis proposed in ["Perception robustness testing at different levels of generality"](https://www.journalfieldrobotics.org/FR/Papers_files/10_Pezzementi.pdf).

In [1]:
import os
import numpy as np
import torch

# augmentation
import torchvision.transforms as T
import aprtf.augmentations as A

# dataset
from nuscenes.utils.geometry_utils import view_points
from nuscenes import NuScenes
data_dir = './data/sets/nuScenes'
nusc = NuScenes(version='v1.0-mini', dataroot=data_dir, verbose=True)

# torchvision reference code
import aprtf.dataset as D
from aprtf.torchvision_detection.coco_utils import get_coco_api_from_dataset
from aprtf.pycocotools_robustness.cocoeval import COCOeval
from aprtf.torchvision_detection.coco_eval import CocoEvaluator
from aprtf.config import cfg
from aprtf.models import ModelBuilder
from aprtf.torchvision_detection import utils
from aprtf.visuals import show_predictions, show_results
#from aprtf.analysis import Analyzer

# logging
from tqdm import tqdm

print("All packages imported!")


SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

print("Random seed set")

Loading NuScenes tables for version v1.0-mini...
23 category,
8 attribute,
4 visibility,
911 instance,
12 sensor,
120 calibrated_sensor,
31206 ego_pose,
8 log,
10 scene,
404 sample,
31206 sample_data,
18538 sample_annotation,
4 map,
Done loading in 0.445 seconds.
Reverse indexing ...
Done reverse indexing in 0.1 seconds.
All packages imported!
Random seed set




## I. Pedestrian Detection

### Data and Labels

Time-ordered iterator of images and bounding boxes.

In [2]:
"""
# nuScenes
def box2bb(box, cam_intrinsic):
    corners = torch.tensor(view_points(box.corners(), view=cam_intrinsic, normalize=True)[:2, :])
    bb = torch.cat([torch.min(corners, dim=1).values, torch.max(corners, dim=1).values]).tolist()
    return bb
    
category = 'pedestrian'
sensor = 'CAM_FRONT'
visibility_threshold = 2

odgt = []

for scene in nusc.scene:
    next_sample_token = scene['first_sample_token']
    while next_sample_token:
        sample = nusc.get('sample', next_sample_token)
        sample_data = nusc.get('sample_data', sample['data'][sensor])

        # image filepaths
        sample_data_fp = os.path.join(data_dir,sample_data['filename'])

        # bounding boxes
        sample_data_bbs = []
        for ann in sample['anns']:
            _, box, cam_intrinsic = nusc.get_sample_data(sample['data'][sensor], selected_anntokens=[ann])
            if len(box) > 1:
                raise ValueError('more than one annotation')

            visibility_token = nusc.get('sample_annotation', ann)['visibility_token']
            visibility = int(visibility_token)
            if (len(box) == 1) and (category in box[0].name) and (visibility >= visibility_threshold):
                bb = box2bb(box[0], cam_intrinsic)
                sample_data_bbs.append(bb)

        # odgt
        odgt.append(
            {
                'image': sample_data_fp,
                'annotations': sample_data_bbs
            }
        )

        # next sample
        next_sample_token = sample['next']

dataset = D.PedestrianDetectionDataset(odgt, get_transform(train=False))
"""

"\n# nuScenes\ndef box2bb(box, cam_intrinsic):\n    corners = torch.tensor(view_points(box.corners(), view=cam_intrinsic, normalize=True)[:2, :])\n    bb = torch.cat([torch.min(corners, dim=1).values, torch.max(corners, dim=1).values]).tolist()\n    return bb\n    \ncategory = 'pedestrian'\nsensor = 'CAM_FRONT'\nvisibility_threshold = 2\n\nodgt = []\n\nfor scene in nusc.scene:\n    next_sample_token = scene['first_sample_token']\n    while next_sample_token:\n        sample = nusc.get('sample', next_sample_token)\n        sample_data = nusc.get('sample_data', sample['data'][sensor])\n\n        # image filepaths\n        sample_data_fp = os.path.join(data_dir,sample_data['filename'])\n\n        # bounding boxes\n        sample_data_bbs = []\n        for ann in sample['anns']:\n            _, box, cam_intrinsic = nusc.get_sample_data(sample['data'][sensor], selected_anntokens=[ann])\n            if len(box) > 1:\n                raise ValueError('more than one annotation')\n\n           

In [3]:
cfg_path = os.path.join('ckpt', 'fasterrcnn_resnet50_fpn-pennfudanped', 'config.yaml')
cfg.merge_from_file(cfg_path)

In [4]:
dataset_path = os.path.join('data','sets', 'PennFudanPed', cfg.DATASET.LIST.val)

normal_transform = D.get_transform(train=False) 
aug_transform = T.GaussianBlur(5,3)
all_transform = A.TransformAugmentationCompose(normal_transform, aug_transform)

dataset = D.PedestrianDetectionDataset(dataset_path, transforms=all_transform)

In [5]:
data_loader = torch.utils.data.DataLoader(
 dataset, batch_size=1, shuffle=False, num_workers=1,
 collate_fn=utils.collate_fn)

### Model

In [6]:
# train on the GPU or on the CPU, if a GPU is not available
if torch.cuda.is_available():
    device = torch.device('cuda')
else:
    logging.info('No GPU found! Training on CPU')
    device = torch.device('cpu')

weights_path = os.path.join('ckpt', 'fasterrcnn_resnet50_fpn-pennfudanped', 'weights_best.pth')
model = ModelBuilder.build_detector(args=cfg.MODEL, weights=weights_path)
model.to(device)
model.eval()

Loading weights for detector


FasterRCNN(
  (transform): GeneralizedRCNNTransform(
      Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
      Resize(min_size=(800,), max_size=1333, mode='bilinear')
  )
  (backbone): BackboneWithFPN(
    (body): IntermediateLayerGetter(
      (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
      (layer1): Sequential(
        (0): Bottleneck(
          (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
       

### Metric

Recall and $FPR_A$.

In [7]:
def evaluate(model, data_loader, device):
    cpu_device = torch.device("cpu")
    model.eval()
    coco = get_coco_api_from_dataset(data_loader.dataset)
    coco_evaluator = CocoEvaluator(COCOeval, coco, ['bbox'], score_min=0.9)

    for images, targets in tqdm(data_loader):
        images = list(img.to(device) for img in images)
        with torch.no_grad():
            outputs = model(images)
            outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
        del images
        # you need to do .item() because an int is not treated the same as a tensor int
        res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
        coco_evaluator.update(res)

    coco_evaluator.accumulate()
    coco_evaluator.summarize()
    return coco_evaluator

In [8]:
evaluate_log = evaluate(model, data_loader, device)

creating index...
index created!


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 34/34 [00:17<00:00,  1.98it/s]

Accumulating evaluation results...
DONE (t=0.00s).
IoU metric: bbox
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.900
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.900
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.900
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.900
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.900
 Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = -1.000
 Average Recall     (AR) @[ IoU=0.50:0.95 | area




In [9]:
evaluate_log

<aprtf.torchvision_detection.coco_eval.CocoEvaluator at 0x7fbf1a7ab790>

In [10]:
# sensitivity, i.e., predicted fpr on validation set
scores = np.arange(0,1.01,0.01)
# compute fpr at each score range, match each score to closest predicted fpr 
sens = np.ones_like(scores) * -1.0

for i, score in enumerate(scores):
    fpr = 0
    sens[i] = fpr
    
std_sens = [10 ** x for x in [-4, -3, -2, -1, 0, 1, 2]]
std_score = [scores[(np. abs(x - sens)).argmin()] for x in std_sens]
print(std_sens, std_score)


# loop through all scores

[0.0001, 0.001, 0.01, 0.1, 1, 10, 100] [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]


In [11]:
def sROCa():
    pass

## Evaluation

In [12]:
'''
data = [dt1, dt2, dt3, dt4]
augs = [tf1, None, None, None] 
model = [detector, robotizer, pathifier, controller]
metric = [roc, dist_acc, safety_violation, speed_efficiency]

analyzer = Analyzer(data, augs, model, metric)
for i in range(len(analyzer)):
    result = analyzer.step()
    print(result)
'''

'\ndata = [dt1, dt2, dt3, dt4]\naugs = [tf1, None, None, None] \nmodel = [detector, robotizer, pathifier, controller]\nmetric = [roc, dist_acc, safety_violation, speed_efficiency]\n\nanalyzer = Analyzer(data, augs, model, metric)\nfor i in range(len(analyzer)):\n    result = analyzer.step()\n    print(result)\n'