In [1]:
import os
from datetime import datetime
import traceback

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets, models, transforms
import torchvision
import numpy as np
import pandas as pd

from tqdm import tqdm
from tensorboardX import SummaryWriter

from dataset import voc
from retinanet import model, val
from retinanet import transforms as aug
%matplotlib inline

In [2]:
# consts
tag = 'debug'
split_name = 'voc-1'
root_path = '/home/voyager/data/root/voc/'

device_name = 'cpu'
batch_size = 24
epochs = 200
depth = 50
lr = 1e-5
patience = 3
image_size = 512
score_thresholds = [0.01, 0.02, 0.03, 0.04, 0.045, 0.05, 0.055, 0.06, 0.065, 0.07, 0.075, 0.08, 0.085, 0.09, 0.095, 0.1, 0.11, 0.12, 0.13, 0.14, 0.15]
max_detections = 4
num_classes = 2

# info and deps
now = datetime.now()

if not os.path.exists('./result'):
    os.mkdir('./result')

result_path = './result/{}_{}_{}_{}'.format(
    tag,
    depth,
    split_name,
#     now.strftime('%Y%m%d_%H%M%S')
    "20190516_175626"
)

model_name = "151_0.3333"
iou_threshold = 0.5

In [3]:
val_trans = aug.Compose([
    aug.Pad(),
    aug.Resize(image_size, image_size),
    aug.AutoLevel(min_level_rate=1, max_level_rate=1),
    aug.AutoContrast(),
    aug.Contrast(1.25),
    aug.ToTensor()
])

val_set = voc.VOCDetection(
    root_path,
    image_set="{}_val".format(split_name),
#     image_set="{}_train".format(split_name),
    transforms=val_trans
)

val_loader = DataLoader(
    val_set,
    batch_size=batch_size,
    shuffle=False,
    collate_fn=voc.collate
)
val_set

Dataset VOCDetection
    Number of datapoints: 317
    Root location: /home/voyager/data/root/voc/
    <retinanet.transforms.Compose object at 0x7f9b991bb0f0>

In [4]:
net = torch.load(os.path.join(result_path, model_name +".pth"))

In [5]:
def _get_detections(dataset, retinanet, num_classes=2, score_threshold=0.05, max_detections=100, save_path=None, model_name=None):
    
    all_detections = [[None for i in range(num_classes)] for j in range(len(dataset))]
    retinanet.eval()
    
    with torch.no_grad():

        for index in range(len(dataset)):
            data = dataset[index]
            # data is a tuple: (img, ([labels], [gts], {scale, pad_loc...}))
            scale = data[1][2]["scale"]
            # get pad loc, up, down, left, right
            pad_loc = data[1][2]["pad_loc"]

            # run network
            scores, labels, boxes = retinanet(data[0].permute(0, 1, 2).cuda().float().unsqueeze(dim=0))
            scores = scores.cpu().numpy()
            labels = labels.cpu().numpy()
            boxes  = boxes.cpu().numpy()
            
            # correct boxes for image scale
            boxes /= scale
#             print(boxes.shape)
            for bbx in boxes:
                # height fix
                bbx[1] -= pad_loc[0]
                bbx[3] -= pad_loc[0]
                # width fix
                bbx[0] -= pad_loc[2]
                bbx[2] -= pad_loc[2]
            

            # select indices which have a score above the threshold
            indices = np.where(scores > score_threshold)[0]
            if indices.shape[0] > 0:
                # select those scores
                scores = scores[indices]

                # find the order with which to sort the scores
                scores_sort = np.argsort(-scores)[:max_detections]

                # select detections
                image_boxes      = boxes[indices[scores_sort], :]
                image_scores     = scores[scores_sort]
                image_labels     = labels[indices[scores_sort]]
                image_detections = np.concatenate([image_boxes, np.expand_dims(image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1)
                
                # copy detections to all_detections
                for label in range(num_classes):
                    all_detections[index][label] = image_detections[image_detections[:, -1] == label, :-1]
            else:
                # copy detections to all_detections
                for label in range(num_classes):
                    all_detections[index][label] = np.zeros((0, 5))
            
#             print("{}/{}".format(index + 1, len(dataset)), end='\r')
            print("\r {}/{}".format(index + 1, len(dataset)), end='')
    
    # save all_detections to a csv under save_path
    if save_path:
        csv_data = pd.DataFrame(columns=['id', 'serial', 'class', 'score', 'x1', 'y1', 'x2', 'y2', 'prob'])
    
        for index in range(len(all_detections)):
            image_name = dataset.images[index].split('/')[-1].split('.')[0]
            for label in range(len(all_detections[index])):
                for box in range(len(all_detections[index][label])):
                    bbx = all_detections[index][label][box]
                    csv_data.loc[index] = [image_name, box, label, 0, bbx[0], bbx[1], bbx[2], bbx[3], bbx[4]]
        file_name = "{}_{}_{}_{}".format(
            model_name,
            num_classes,
            score_threshold,
            max_detections
        )
        csv_data.to_csv(os.path.join(save_path, file_name)+".csv")
    
    return all_detections

In [12]:
score_threshold = score_thresholds[0]
score_threshold = 0.9
all_detections = _get_detections(val_set, net, num_classes=num_classes, score_threshold=score_threshold, max_detections=max_detections, save_path=result_path, model_name=model_name)



 317/317

In [6]:
def _get_annotations(generator, num_classes):
    all_annotations = [[None for i in range(num_classes)] for j in range(len(generator))]
    
    for index in range(len(generator)):
        # load the annotations, annotations is a tuple: (img, ([labels], [gts], {scale, pad_loc...}))
        annotations = generator[index][1]
        labels = annotations[0].numpy()
        boxes = annotations[1].numpy()
        
        for label in range(num_classes):
            if label in labels:
                id_list = np.where(labels == label)[0]
                all_annotations[index][label] = boxes[id_list]
            else:
                all_annotations[index][label] = np.zeros((0, 5))

        # print('{}/{}'.format(i + 1, len(generator)), end='\r')

    return all_annotations

# all_annotations = _get_annotations(val_set, num_classes=num_classes)

In [10]:
def _compute_ap(recall, precision):
    """ Compute the average precision, given the recall and precision curves.
    Code originally from https://github.com/rbgirshick/py-faster-rcnn.
    # Arguments
        recall:    The recall curve (list).
        precision: The precision curve (list).
    # Returns
        The average precision as computed in py-faster-rcnn.
    """
    # correct AP calculation
    # first append sentinel values at the end
    mrec = np.concatenate(([0.], recall, [1.]))
    mpre = np.concatenate(([0.], precision, [0.]))

    # compute the precision envelope
    for i in range(mpre.size - 1, 0, -1):
        mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])

    # to calculate area under PR curve, look for points
    # where X axis (recall) changes value
    i = np.where(mrec[1:] != mrec[:-1])[0]

    # and sum (\Delta recall) * prec
    ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
    return ap

def compute_overlap(a, b):
    """
    Parameters
    ----------
    a: (N, 4) ndarray of float
    b: (K, 4) ndarray of float
    Returns
    -------
    overlaps: (N, K) ndarray of overlap between boxes and query_boxes
    """
    area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1])

    iw = np.minimum(np.expand_dims(a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0])
    ih = np.minimum(np.expand_dims(a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1])

    iw = np.maximum(iw, 0)
    ih = np.maximum(ih, 0)

    ua = np.expand_dims((a[:, 2] - a[:, 0]) * (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih

    ua = np.maximum(ua, np.finfo(float).eps)

    intersection = iw * ih

    return intersection / ua



In [103]:
generator = val_set
retinanet = net
score_threshold = 0.8

all_detections     = _get_detections(generator, retinanet, num_classes=num_classes, score_threshold=score_threshold, max_detections=max_detections)
all_annotations    = _get_annotations(generator, num_classes=num_classes)
all_annotations[0], all_detections[0]



 317/317

([array([[255.16884 ,  81.454544, 331.63635 , 358.23376 ]], dtype=float32),
  array([[239.37662,  82.28571, 360.72726, 437.1948 ]], dtype=float32)],
 [array([], shape=(0, 5), dtype=float64),
  array([[217.24816895, 145.56228638, 363.01742554, 531.32775879,
            0.93271393]])])

In [24]:
all_detections

[[array([], shape=(0, 5), dtype=float64),
  array([[217.24816895, 145.56228638, 363.01742554, 531.32775879,
            0.93271393]])],
 [array([], shape=(0, 5), dtype=float64),
  array([[ 55.40867615,  89.84758759, 355.47259521, 299.86749268,
            0.9589262 ]])],
 [array([], shape=(0, 5), dtype=float64),
  array([[130.63682556,  48.68155289, 354.71682739, 423.53625488,
            0.97539943]])],
 [array([], shape=(0, 5), dtype=float64),
  array([[ 40.92389297,  -1.12163925, 217.42948914, 330.97583008,
            0.9305132 ]])],
 [array([], shape=(0, 5), dtype=float64),
  array([], shape=(0, 5), dtype=float64)],
 [array([], shape=(0, 5), dtype=float64),
  array([], shape=(0, 5), dtype=float64)],
 [array([], shape=(0, 5), dtype=float64),
  array([[176.25926208,  49.93022156, 378.74789429, 298.76980591,
            0.93834645]])],
 [array([], shape=(0, 5), dtype=float64),
  array([[ 70.07771301, 111.01226807, 291.19366455, 391.29067993,
            0.94031918]])],
 [array([], sh

In [109]:
def evaluate(generator, retinanet, iou_threshold=0.5, score_threshold=0.9, max_detections=4, num_classes=2, all_annotations=None, all_detections=None):
    # gather all detections and annotations
#     all_detections     = _get_detections(generator, retinanet, num_classes=num_classes, score_threshold=score_threshold, max_detections=max_detections)
#     all_annotations    = _get_annotations(generator, num_classes=num_classes)
#     print(all_annotations[0], all_detections[0])
    
    average_precisions = {}

    for label in range(num_classes):
#         if label == 1:
#             continue
        
        false_positives = np.zeros((0,))
        true_positives  = np.zeros((0,))
        scores          = np.zeros((0,))
        num_annotations = 0.0
        num_detections = 0.0
        
        # len(generator) == 317
        for i in range(len(generator)):
            detections           = all_detections[i][label]
            annotations          = all_annotations[i][label]
            num_annotations     += annotations.shape[0]
            detected_annotations = []
            
#             print(detections)
            # 很多label 0 并没有预测出来，所以导致recall = true_positives / num_annotations分母过大
            if detections.shape[0] > 0:
                num_detections += detections.shape[0]
                # 因为总是取覆盖率最好的框，所以只 + 1
            else:
                continue
            
            for d in detections:
                # detection[4] are scores 
                scores = np.append(scores, d[4])
                # 如果原图都没有框，则 fp + 1, tp + 0
                if annotations.shape[0] == 0:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)
                    continue
                
                # d 是预测的每一个box, overlaps是覆盖率的list
                overlaps            = compute_overlap(np.expand_dims(d, axis=0), annotations)
                # 覆盖率最大的坐标
                assigned_annotation = np.argmax(overlaps, axis=1)
                # 最大覆盖率
                max_overlap         = overlaps[0, assigned_annotation]
#                 print(max_overlap)
                
                # 如果覆盖率大于iou_threshold，则 tp + 1， fp + 0，否则反之
                # assigned_annotation not in detected_annotations 应该总是true
                if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations:
                    false_positives = np.append(false_positives, 0)
                    true_positives  = np.append(true_positives, 1)
                    detected_annotations.append(assigned_annotation)
                else:
                    false_positives = np.append(false_positives, 1)
                    true_positives  = np.append(true_positives, 0)
            
        # no annotations -> AP for this class is 0 (is this correct?)
        # 原数据，两个类别分别是 0：384， 1：358个框
        if num_annotations == 0:
            average_precisions[label] = 0, 0
            continue
            
        print(label, "  true_positives：", len(true_positives))
        print(label, "  预测框数         ", num_detections)
        print(label, "  预测比例         ", num_detections / num_annotations)
        print(label, "  总标注框数       ", num_annotations)
        num_annotations = num_detections
        print(label, "  可对比标注框数    ", num_annotations)
        
        # sort by score
        indices         = np.argsort(-scores)
        false_positives = false_positives[indices]
        true_positives  = true_positives[indices]
        # compute false positives and true positives
        false_positives = np.cumsum(false_positives)
        true_positives  = np.cumsum(true_positives)
        
        # compute recall and precision
        recall    = true_positives / num_annotations
#         print(recall)
        precision = true_positives / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
#         print(precision)
        
        # compute average precision
#         print(recall, precision, "!!!!")
        average_precision  = _compute_ap(recall, precision)
        average_precisions[label] = average_precision, num_annotations
        
        print("在有预测框的情况下，对比预测的平均精确度")
        print(average_precision, '\n')
    
    return average_precisions

score_threshold = 0.8
iou_threshold = 0.4
ap = evaluate(val_set, net, iou_threshold=iou_threshold, score_threshold=score_threshold, max_detections=max_detections, num_classes=num_classes, all_annotations=all_annotations, all_detections=all_detections)

0   true_positives： 113
0   预测框数          113.0
0   预测比例          0.2942708333333333
0   总标注框数        384.0
0   可对比标注框数     113.0
在有预测框的情况下，对比预测的平均精确度
0.20148640714013896 

1   true_positives： 273
1   预测框数          273.0
1   预测比例          0.7625698324022346
1   总标注框数        358.0
1   可对比标注框数     273.0
在有预测框的情况下，对比预测的平均精确度
0.4118044519847159 



In [91]:
ap

{0: (0.049957658095703864, 349.0), 1: (0.9677883975102972, 144.0)}