In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

from tqdm.notebook import tqdm
from chainercv.evaluations import eval_detection_voc
from models import MIDN_Alexnet, MIDN_VGG16, OICR
from config import cfg
from datasets import VOCDectectionDataset
from torchvision.ops import roi_pool, nms
from sklearn.metrics import average_precision_score
from utils import *

In [20]:
year='2007'
pretrained='vgg16'
oicr = None
if pretrained == 'alexnet':
    midn = MIDN_Alexnet()
elif pretrained == 'vgg16':
    midn = MIDN_VGG16()
midn.to(cfg.DEVICE)

oicr = OICR(cfg.K)
oicr.to(cfg.DEVICE)

midn_checkpoints = torch.load(cfg.PATH.PT_PATH + "Model_2007_vgg16_20_old.pt")
midn.load_state_dict(midn_checkpoints['midn_model_state_dict'])
oicr.load_state_dict(midn_checkpoints['oicr_model_state_dict'])


testdata = VOCDectectionDataset("~/data/", year, 'test')
test_loader = data.DataLoader(testdata, 1, shuffle=False)

log_file = cfg.PATH.LOG_PATH + f"Validate_{pretrained}" + ".txt"
write_log(log_file, f"model_name: OICR_{pretrained}")

y_pred = []
y_true = []

total_pred_boxes = []
total_pred_labels = []
total_pred_scores = []
total_true_boxes = []
total_true_labels = []


In [21]:
with torch.no_grad():
    midn.eval()
    oicr.eval()
    for n_imgs, gt, n_regions, region in tqdm(test_loader, "Evaluation"):
#         print(region.shape)
        region = region.to(cfg.DEVICE)
        avg_scores = torch.zeros((len(region[0]), 20), dtype=torch.float32)
        for i in range(3):
            per_img = n_imgs[i].to(cfg.DEVICE)
            per_region = n_regions[i].to(cfg.DEVICE)
            fc7, combined_scores = midn(per_img, per_region)
            refine_scores = oicr(fc7)
            avg_scores = (sum(refine_scores).cpu())/ cfg.K
        avg_scores /= 3
        
#         avg_scores = torch.zeros((len(region[0]), 20), dtype=torch.float32)
#         for i in range(3):
#             per_img = n_imgs[i].to(cfg.DEVICE)
#             per_region = n_regions[i].to(cfg.DEVICE)
#             fc7, combined_scores = midn(per_img, per_region)
#             avg_scores += avg_scores.cpu()
#         avg_scores /= (3 * 1)
        
    
        gt = gt.numpy()[0]
        gt_boxex = gt[:, :4]
        gt_labels = gt[:, -1]
    
        gt_labels_onehot = np.zeros(20)
        for label in gt_labels:
            gt_labels_onehot[int(label)] = 1
        y_pred.append(avg_scores.sum(0).detach().cpu().numpy().tolist())
        y_true.append(gt_labels_onehot.tolist())

        per_pred_boxes = []
        per_pred_scores = []
        per_pred_labels = []
        
        region = region[0].cpu()
        
        for i in range(20):
            cls_scores = avg_scores[:, i]
            cls_region = region

            nms_filter = nms(cls_region, cls_scores, 0.3)
            per_pred_boxes.append(cls_region[nms_filter].numpy())
            per_pred_scores.append(cls_scores[nms_filter].numpy())
            per_pred_labels.append(np.full(len(nms_filter), i, dtype=np.int32))
            
        total_pred_boxes.append(np.concatenate(per_pred_boxes, axis=0))
        total_pred_scores.append(np.concatenate(per_pred_scores, axis=0))
        total_pred_labels.append(np.concatenate(per_pred_labels, axis=0))
        total_true_boxes.append(gt_boxex)
        total_true_labels.append(gt_labels)
        
    result = eval_detection_voc(
        total_pred_boxes,
        total_pred_labels,
        total_pred_scores,
        total_true_boxes,
        total_true_labels,
        iou_thresh=0.5,
        use_07_metric=True,
    )
    print(f"Avg AP: {result['ap']}")
    print(f"Avg mAP: {result['map']}")
#         write_log(log_file, f"Avg AP: {result['ap']}")
#         write_log(log_file, f"Avg mAP: {result['map']}")
    cls_ap = []
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    for i in range(20):
        cls_ap.append(average_precision_score(y_true[:,i], y_pred[:,i]))
    print(f"Testset classify AP is {str(cls_ap)}")
    print(f"Testset classify mAP is {str(sum(cls_ap)/20)}")

HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=4952.0, style=ProgressStyle(description_…


Avg AP: [0.06543496 0.13772103 0.11171478 0.01045832 0.09219723 0.16593716
 0.17469437 0.12095688 0.03295824 0.03254094 0.05913931 0.08997836
 0.03253466 0.11140212 0.04204161 0.02496726 0.04479538 0.1121503
 0.18538617 0.02084201]
Avg mAP: 0.08339255502995957
Testset classify AP is [0.7686124651884377, 0.6377192036212179, 0.6931169515273672, 0.7769481020981058, 0.45303283092926716, 0.707234311634705, 0.8481437394589839, 0.7803962350291742, 0.5253592185531923, 0.6637031229904103, 0.5576079120049549, 0.7550650626487099, 0.5030783657096044, 0.5952222735886301, 0.9176201364654784, 0.5581422241756182, 0.6086998789970127, 0.5823820529295567, 0.7571790186201262, 0.7552319592696703]
Testset classify mAP is 0.6722247532720114


vgg16 结果

Avg AP: [0.07594125 0.15202542 0.11302452 0.01443283 0.09229122 0.18310884
 0.17307478 0.19584398 0.04892519 0.03835734 0.10568615 0.10578527
 0.03219307 0.11462246 0.04233103 0.02550928 0.04346568 0.12203952
 0.18385794 0.02253245]
 
Avg mAP: 0.0942524098604717


Avg AP: [0.0380435  0.09586847 0.02967028 0.01421967 0.02488741 0.12029387
 0.12345616 0.14256291 0.00230058 0.09248682 0.09352449 0.11741401
 0.10741478 0.08556007 0.0268022  0.09133743 0.01528107 0.05298027
 0.13549627 0.03642921]
 
Avg mAP: 0.07230147397022946

Avg AP: [0.06553017 0.11218434 0.04370746 0.01982238 0.0250097  0.1920574
 0.184809   0.21933456 0.00361981 0.09866133 0.1002228  0.16470673
 0.13001323 0.14395161 0.04144958 0.09434894 0.02223967 0.06508374
 0.20686167 0.04100427]

Avg mAP: 0.09873091926663828

In [8]:
region.shape

torch.Size([1, 693, 4])