## YOLO V2

In [None]:
import time
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torch
import cv2
import numpy as np
import json
import glob
import os

from darknet_v2 import Darknet
from util_model_img import load_classes, letterbox_image, image_to_tensor, predict_transform_v2, write_results


def detection_defense(model, val_anno, img_dir, results_json, class_id, inp_dim, fps=False, device="cuda"):

    '''
    val_anno: annotations of validation set images
    img_dir: dir of validation set images
    results_json: detection result
    class_id: HA target class - person (0) and AA target class - stop sign (11)
    '''

    device = torch.device(device)

    file2id = dict()
    with open(val_anno) as f:
        dic = json.load(f)
        images = dic['images']
        for i in range(len(images)):
            image = images[i]
            file2id[image['file_name']] = image['id']

    #imgs = glob.glob("%s/*.png" % img_dir) + glob.glob("%s/*.jpg" % img_dir)
    imgs = [f for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]


    results = []

    if fps:
        start = time.time()

    for img in imgs:

        image_id = file2id[img]

        img_cv = cv2.imread(os.path.join(img_dir, img))
        img_h, img_w, _ = img_cv.shape
        img_cv = letterbox_image(img_cv, [inp_dim, inp_dim])
        im_dim = torch.FloatTensor((img_w, img_h)).repeat(1,2).to(device)
        img_ts = image_to_tensor(img_cv).to(device)
        
        output = model(img_ts)
        
        if type(output) != int:
            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1).cuda()
            output[:,[0,2]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
            output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
            output[:,0:4] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [0,2]] = torch.clamp(output[i, [0,2]], 0.0, im_dim[i,0])
                output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,1])
                try:
                    conf = output[i, 4].float().item()
                except IndexError:
                    print(output)
                    exit()
                cls = output[i, -1].int().item()
                x = output[i, 0].int().item()
                y = output[i, 1].int().item()
                w = (output[i, 2] - output[i, 0]).float().item()
                h = (output[i, 3] - output[i, 1]).float().item()

                #print(image_id, cls, [xc, yc, w, h], conf)

                results.append({'image_id': image_id,
                                'category_id': cls,
                                'bbox': [x, y, w, h],
                                'score': conf})
        
    if fps:
        end = time.time()
        total_time = end-start
        print("%d imgs, %f seconds, average: %f fps" % (len(imgs), total_time, len(imgs)/total_time))

    with open(results_json, 'w') as f:
        f.write(json.dumps(results, indent=4))


    cocoGt = COCO(val_anno)
    cocoDt = cocoGt.loadRes(results_json)
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.catIds = [class_id]
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

In [2]:
import patchFilter as pf

device = "cuda"
model_pf = pf.defense_model_v2()


### INRIA 数据集


In [3]:
detection_defense(
    model=model_pf,
    val_anno="Dataset/HA/test_annotations.json",
    img_dir="Dataset/HA/INRIA/pos/",
    results_json="test_map/yolov2_inria_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 10.638337 seconds, average: 27.071900 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.11s).
Accumulating evaluation results...
DONE (t=0.01s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.499
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.867
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.509
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.228
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.549
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.329
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.572
 Average Rec


#### Adv Patch

In [4]:
detection_defense(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/patch_v2/",
    results_json="test_map/yolov2_inria_patchv2_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 7.115052 seconds, average: 40.477569 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.08s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.430
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.784
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.438
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.025
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.305
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.582
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.305
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.492
 Average Recal

#### Adv Cloak

In [None]:

detection_defense(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/cloak_v2/",
    results_json="test_map/yolov2_inria_cloakv2_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 7.285420 seconds, average: 39.531008 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.08s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.410
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.749
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.410
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.004
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.302
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.555
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.298
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.476
 Average Recal

#### Adv Tshirt

In [None]:
detection_defense(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/tshirt_v2/",
    results_json="test_map/yolov2_inria_tshirtv2_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 7.228490 seconds, average: 39.842347 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.18s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.373
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.716
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.342
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.257
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.520
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.286
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.445
 Average Recal

#### Natural Patch

In [7]:
detection_defense(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/natural_v2/",
    results_json="test_map/yolov2_inria_naturalv2_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 7.317182 seconds, average: 39.359416 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.08s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.293
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.630
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.241
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.248
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.360
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.219
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.364
 Average Recal

### AA

In [8]:
detection_defense(
    model=model_pf,
    val_anno="Dataset/AA/stop_test_annotations.json",
    img_dir="Dataset/AA/imgs_s/",
    results_json="test_map/yolov2_aa_stopsign_pf.json",
    class_id=11,
    inp_dim=416,
    fps=True
)

1000 imgs, 23.854434 seconds, average: 41.920927 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.079
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.114
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.100
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.037
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.129
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.085
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.085
 Average Re

In [9]:

detection_defense(
    model=model_pf,
    val_anno="Dataset/AA/person_test_annotations.json",
    img_dir="Dataset/AA/imgs_p/",
    results_json="test_map/yolov2_aa_person_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

1000 imgs, 23.788232 seconds, average: 42.037592 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.21s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.010
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.017
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.012
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.047
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.034
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.041
 Average Re

## YOLO V3

In [None]:
import torch
import cv2
import numpy as np
import time
import json
import glob
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

from util_model_img import write_result, letterbox_image, image_to_tensor


def detection_defense_v3(model, val_anno, img_dir, results_json, class_id, inp_dim, fps=False, device="cuda"):

    '''
    val_anno: annotations of validation set images
    img_dir: dir of validation set images
    results_json: detection result
    class_id: HA target class - person (0) and AA target class - stop sign (11)
    '''

    file2id = dict()
    with open(val_anno) as f:
        dic = json.load(f)
        images = dic['images']
        for i in range(len(images)):
            image = images[i]
            file2id[image['file_name']] = image['id']

    #imgs = glob.glob("%s/*.png" % img_dir) + glob.glob("%s/*.jpg" % img_dir)
    imgs = [f for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]

    results = []

    if fps:
        start = time.time()

    for img in imgs:

        image_id = file2id[img]

        img_cv = cv2.imread(os.path.join(img_dir, img))
        img_h, img_w, _ = img_cv.shape
        img_cv = letterbox_image(img_cv, [inp_dim, inp_dim])
        im_dim = torch.FloatTensor((img_w, img_h)).repeat(1,2).to(device)
        img_ts = image_to_tensor(img_cv).to(device)
        
        output = model(img_ts)

        if type(output) != int:
            im_dim = im_dim.repeat(output.size(0), 1)
            scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1).cuda()
            output[:,[0,2]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1))/2
            output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1))/2
            output[:,0:4] /= scaling_factor
            for i in range(output.shape[0]):
                output[i, [0,2]] = torch.clamp(output[i, [0,2]], 0.0, im_dim[i,0])
                output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,1])
                try:
                    conf = output[i, 4].float().item()
                except IndexError:
                    print(output)
                    exit()
                cls = output[i, -1].int().item()
                x = output[i, 0].int().item()
                y = output[i, 1].int().item()
                w = (output[i, 2] - output[i, 0]).float().item()
                h = (output[i, 3] - output[i, 1]).float().item()

                #print(image_id, cls, [xc, yc, w, h], conf)

                results.append({'image_id': image_id,
                                'category_id': cls,
                                'bbox': [x, y, w, h],
                                'score': conf})
        
    if fps:
        end = time.time()
        total_time = end-start
        print("%d imgs, %f seconds, average: %f fps" % (len(imgs), total_time, len(imgs)/total_time))

    with open(results_json, 'w') as f:
        f.write(json.dumps(results, indent=4))


    cocoGt = COCO(val_anno)
    cocoDt = cocoGt.loadRes(results_json)
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.catIds = [class_id]
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()



In [2]:

import patchFilter as pf

device = "cuda"
model_pf = pf.defense_model_v3()


### INRIA

In [3]:
detection_defense_v3(
    model=model_pf,
    val_anno="Dataset/HA/test_annotations.json",
    img_dir="Dataset/HA/INRIA/pos/",
    results_json="test_map/yolov3_inria_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 16.150583 seconds, average: 17.832173 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.19s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.581
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.951
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.644
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.448
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.604
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.347
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.663
 Average Rec

In [4]:
detection_defense_v3(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/patch_v3/",
    results_json="test_map/yolov3_inria_patchv3_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 12.990185 seconds, average: 22.170584 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.12s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.13s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.462
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.824
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.506
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.040
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.384
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.577
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.318
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.549
 Average Reca

In [5]:
detection_defense_v3(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/natural_v3/",
    results_json="test_map/yolov2_inria_naturalv3_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 13.177919 seconds, average: 21.854741 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.14s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.392
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.738
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.363
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.067
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.386
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.461
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.267
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.515
 Average Reca

### AA

In [6]:
detection_defense_v3(
    model=model_pf,
    val_anno="Dataset/AA/stop_test_annotations.json",
    img_dir="Dataset/AA/imgs_s/",
    results_json="test_map/yolov3_aa_stopsign_pf.json",
    class_id=11,
    inp_dim=416,
    fps=True
)

1000 imgs, 41.352319 seconds, average: 24.182441 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.053
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.081
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.064
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.074
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.038
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.060
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.060
 Average Re

In [7]:
detection_defense_v3(
    model=model_pf,
    val_anno="Dataset/AA/person_test_annotations.json",
    img_dir="Dataset/AA/imgs_p/",
    results_json="test_map/yolov3_aa_person_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

1000 imgs, 42.420693 seconds, average: 23.573401 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.15s).
Accumulating evaluation results...
DONE (t=0.04s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.002
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.001
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.013
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.015
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.016
 Average Re

## YOLO V4

In [None]:
import time
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
import torch
import cv2
import numpy as np
import json
import glob

from util_model_img import load_classes, letterbox_image, image_to_tensor


def detection_defense_v4(model, val_anno, img_dir, results_json, class_id, inp_dim, fps=False, device="cuda"):

    '''
    val_anno: annotations of validation set images
    img_dir: dir of validation set images
    results_json: detection result
    class_id: HA target class - person (0) and AA target class - stop sign (11)
    '''

    device = torch.device(device)

    file2id = dict()
    with open(val_anno) as f:
        dic = json.load(f)
        images = dic['images']
        for i in range(len(images)):
            image = images[i]
            file2id[image['file_name']] = image['id']

    #imgs = glob.glob("%s/*.png" % img_dir) + glob.glob("%s/*.jpg" % img_dir)
    imgs = [f for f in os.listdir(img_dir) if f.endswith('.jpg') or f.endswith('.png')]


    results = []

    if fps:
        start = time.time()

    for img in imgs:

        image_id = file2id[img]

        img_cv = cv2.imread(os.path.join(img_dir, img))
        img_h, img_w, _ = img_cv.shape
        img_cv = letterbox_image(img_cv, [inp_dim, inp_dim])
        im_dim = torch.FloatTensor((img_w, img_h)).repeat(1,2).to(device)
        img_ts = image_to_tensor(img_cv).to(device)
        
        output = model(img_ts)
        
        if output is not None:

            im_dim = im_dim.repeat(output.shape[0], 1)
            scaling_factor = torch.min(inp_dim/im_dim,1)[0].view(-1,1).cpu().numpy()
            output[:,[0,2]] -= (inp_dim - scaling_factor*im_dim[:,0].view(-1,1).cpu().numpy())/2
            output[:,[1,3]] -= (inp_dim - scaling_factor*im_dim[:,1].view(-1,1).cpu().numpy())/2
            output[:,0:4] /= scaling_factor

            for i in range(output.shape[0]):
                #output[i, [0,2]] = torch.clamp(output[i, [0,2]], 0.0, im_dim[i,0])
                #output[i, [1,3]] = torch.clamp(output[i, [1,3]], 0.0, im_dim[i,1])
                #try:
                conf = output[i, 4]#.float().item()
                #except IndexError:
                #    print(output)
                #    exit()
                cls = output[i, -1]#.int().item()
                x = output[i, 0]#.int().item()
                y = output[i, 1]#.int().item()
                w = (output[i, 2] - output[i, 0])#.float().item()
                h = (output[i, 3] - output[i, 1])#.float().item()

                #print(image_id, cls, [xc, yc, w, h], conf)

                results.append({'image_id': image_id,
                                'category_id': cls,
                                'bbox': [x, y, w, h],
                                'score': conf})
        
    if fps:
        end = time.time()
        total_time = end-start
        print("%d imgs, %f seconds, average: %f fps" % (len(imgs), total_time, len(imgs)/total_time))

    with open(results_json, 'w') as f:
        f.write(json.dumps(results, indent=4))


    cocoGt = COCO(val_anno)
    cocoDt = cocoGt.loadRes(results_json)
    cocoEval = COCOeval(cocoGt, cocoDt, 'bbox')
    cocoEval.params.catIds = [class_id]
    cocoEval.evaluate()
    cocoEval.accumulate()
    cocoEval.summarize()

In [2]:
import patchFilter as pf

device = "cuda"
model_pf = pf.defense_model_v4()

### INRIA 

In [3]:
detection_defense_v4(
    model=model_pf,
    val_anno="Dataset/HA/test_annotations.json",
    img_dir="Dataset/HA/INRIA/pos/",
    results_json="test_map/yolov4_inria_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 19.261607 seconds, average: 14.952023 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.627
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.947
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.733
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.454
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.654
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.366
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.698
 Average Rec

In [4]:
detection_defense_v4(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/patch_v4/",
    results_json="test_map/yolov4_inria_patchv4_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 16.697686 seconds, average: 17.247898 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.524
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.838
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.562
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.085
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.448
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.643
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.343
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.614
 Average Reca

In [5]:
detection_defense_v4(
    model=model_pf,
    val_anno="Dataset/HA/test_adv_annotations.json",
    img_dir="Dataset/HA/INRIA/natural_v4/",
    results_json="test_map/yolov4_inria_naturalv4_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

288 imgs, 16.469796 seconds, average: 17.486555 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.00s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.02s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.507
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.815
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.569
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.102
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.418
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.637
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.336
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.601
 Average Reca

### AA

In [6]:
detection_defense_v4(
    model=model_pf,
    val_anno="Dataset/AA/stop_test_annotations.json",
    img_dir="Dataset/AA/imgs_s/",
    results_json="test_map/yolov4_aa_stopsign_pf.json",
    class_id=11,
    inp_dim=416,
    fps=True
)

  return 1 / (1 + np.exp(-x))


1000 imgs, 57.390890 seconds, average: 17.424368 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.01s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.12s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.009
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.013
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.006
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.011
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.009
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.008
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.008
 Average Re

In [7]:
detection_defense_v4(
    model=model_pf,
    val_anno="Dataset/AA/person_test_annotations.json",
    img_dir="Dataset/AA/imgs_p/",
    results_json="test_map/yolov4_aa_person_pf.json",
    class_id=0,
    inp_dim=416,
    fps=True
)

1000 imgs, 57.579350 seconds, average: 17.367338 fps
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!
Loading and preparing results...
DONE (t=0.13s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=0.15s).
Accumulating evaluation results...
DONE (t=0.03s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = -1.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.000
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.001
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.002
 Average Re