In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data

from tqdm.notebook import tqdm
from chainercv.evaluations import eval_detection_voc
from models import *
from config import cfg
from datasets import VOCDectectionDataset
from torchvision.ops import roi_pool, nms
from sklearn.metrics import average_precision_score
from utils import *

In [4]:
year='2007'
pretrained='alexnet'
if pretrained == 'alexnet':
    model = Combined_Alexnet()
elif pretrained == 'vgg16':
    model = Combined_VGG16()
model.to(cfg.DEVICE)

checkpoints = torch.load(cfg.PATH.PT_PATH + "OK_WholeModel_2007_alexnet_40_.pt")
model.load_state_dict(checkpoints['whole_model_state_dict'])


testdata = VOCDectectionDataset("~/data/", year, 'test')
test_loader = data.DataLoader(testdata, 1, shuffle=False)

log_file = cfg.PATH.LOG_PATH + f"Validate_{pretrained}" + ".txt"
write_log(log_file, f"model_name: OICR_{pretrained}")

y_pred = []
y_true = []

total_pred_boxes = []
total_pred_labels = []
total_pred_scores = []
total_true_boxes = []
total_true_labels = []


In [5]:
with torch.no_grad():
    model.eval()
    n = len(cfg.DATA.SCALES)
    for n_imgs, gt, n_regions, region in tqdm(test_loader, "Evaluation"):
#         print(region.shape)
        region = region.to(cfg.DEVICE)
        avg_scores = torch.zeros((len(region[0]), 20), dtype=torch.float32)
        
#         avg_scores = torch.zeros((len(region[0]), 20), dtype=torch.float32)
        scales = len(cfg.DATA.SCALES)
        k = cfg.K
        for i in range(scales):
            per_img = n_imgs[i].to(cfg.DEVICE)
            per_region = n_regions[i].to(cfg.DEVICE)
            refine_scores, proposal_scores = model(per_img, per_region)
            avg_scores += sum(refine_scores)[:, :20].detach().cpu() / k
#             avg_scores += proposal_scores.cpu()
        avg_scores /= scales
        
    
        gt = gt.numpy()[0]
        gt_boxex = gt[:, :4]
        gt_labels = gt[:, -1]
    
        gt_labels_onehot = np.zeros(20)
        for label in gt_labels:
            gt_labels_onehot[int(label)] = 1
        y_pred.append(avg_scores.sum(0).detach().cpu().numpy().tolist())
        y_true.append(gt_labels_onehot.tolist())

        per_pred_boxes = []
        per_pred_scores = []
        per_pred_labels = []
        
        region = region[0].cpu()
        
        for i in range(20):
            cls_scores = avg_scores[:, i]
            cls_region = region
            
#             score_filter = cls_scores > 1e-6
#             cls_scores = cls_scores[score_filter]
#             cls_region = cls_region[score_filter]
            
#             if cls_scores.numel() == 0 or cls_region.numel() == 0:
#                 continue

            nms_filter = nms(cls_region, cls_scores, 0.3)
            per_pred_boxes.append(cls_region[nms_filter].numpy())
            per_pred_scores.append(cls_scores[nms_filter].numpy())
            per_pred_labels.append(np.full(len(nms_filter), i, dtype=np.int32))
            
        total_pred_boxes.append(np.concatenate(per_pred_boxes, axis=0))
        total_pred_scores.append(np.concatenate(per_pred_scores, axis=0))
        total_pred_labels.append(np.concatenate(per_pred_labels, axis=0))
        total_true_boxes.append(gt_boxex)
        total_true_labels.append(gt_labels)
        
    result = eval_detection_voc(
        total_pred_boxes,
        total_pred_labels,
        total_pred_scores,
        total_true_boxes,
        total_true_labels,
        iou_thresh=0.5,
        use_07_metric=True,
    )
    print(f"Avg AP: {result['ap']}")
    print(f"Avg mAP: {result['map']}")
#         write_log(log_file, f"Avg AP: {result['ap']}")
#         write_log(log_file, f"Avg mAP: {result['map']}")
    cls_ap = []
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    for i in range(20):
        cls_ap.append(average_precision_score(y_true[:,i], y_pred[:,i]))
    print(f"Testset classify AP is {str(cls_ap)}")
    print(f"Testset classify mAP is {str(sum(cls_ap)/20)}")

HBox(children=(FloatProgress(value=0.0, description='Evaluation', max=4952.0, style=ProgressStyle(description_…


Avg AP: [0.04996746 0.12549802 0.04110544 0.03914947 0.00034629 0.17533075
 0.10073885 0.15085075 0.00435763 0.03197182 0.002767   0.0649913
 0.00803389 0.14943469 0.03472256 0.04305286 0.01599128 0.05259232
 0.1898722  0.05069405]
Avg mAP: 0.06657343075574837
Testset classify AP is [0.5439304002101464, 0.3523727417489, 0.3281571936374155, 0.22592611044707314, 0.05656811429318637, 0.37828380290931113, 0.7216212807123739, 0.43806435132036814, 0.158380143505157, 0.05977517274869668, 0.08961779067309253, 0.37888591590240067, 0.05119689906636412, 0.2197437526478686, 0.7932922500104936, 0.30585290069314974, 0.20082542240744827, 0.19125449687520113, 0.584040426302915, 0.48333703837941167]
Testset classify mAP is 0.3280563102245487


Avg AP: [0.04996746 0.12549802 0.04110544 0.03914947 0.00034629 0.17533075
 0.10073885 0.15085075 0.00435763 0.03197182 0.002767   0.0649913
 0.00803389 0.14943469 0.03472256 0.04305286 0.01599128 0.05259232
 0.1898722  0.05069405]
 
Avg mAP: 0.06657343075574837

Testset classify AP is [0.5439304002101464, 0.3523727417489, 0.3281571936374155, 0.22592611044707314, 0.05656811429318637, 0.37828380290931113, 0.7216212807123739, 0.43806435132036814, 0.158380143505157, 0.05977517274869668, 0.08961779067309253, 0.37888591590240067, 0.05119689906636412, 0.2197437526478686, 0.7932922500104936, 0.30585290069314974, 0.20082542240744827, 0.19125449687520113, 0.584040426302915, 0.48333703837941167]

Testset classify mAP is 0.3280563102245487


alexnet 

* 40 epoch
    * 表现为 欠训练
    * 0 ~ 20 -> 1e-4
    * 20 ~ 40 -> 1e-5
    * 40 ~ 50 -> 1e-4
    * 无 gn
    * 前期使用 wrk * 10， 后15epoch恢复
    * 使用wsddn 模型预训练模型
    * 尝试继续训练
* b_loss = 0.3925406465735535
* r_loss = 0.1087707682915589
    
Avg AP: [0.14108608 0.13945433 0.07550967 0.04488512 0.0155112  0.21452098
 0.18949988 0.277173   0.01626784 0.12129407 0.04030327 0.17166375
 0.0340261  0.14652872 0.11831426 0.0943084  0.02809882 0.0808251
 0.22344707 0.10310592]
 
Avg mAP: 0.11379117919071788

Testset classify AP is [0.7446839481485761, 0.46312593526715523, 0.7229443084640751, 0.6720686580982431, 0.2711831636207078, 0.6316798087749204, 0.7808170323118933, 0.7192427670917786, 0.34050279268210454, 0.5513226437714495, 0.4250681860497294, 0.6522903973695652, 0.37552704290830424, 0.4955287776599825, 0.8895805759044773, 0.5292153332109615, 0.5930822630599464, 0.5015778652077435, 0.7728477087300736, 0.6390370031489183]

Testset classify mAP is 0.5885663105740303


* 28 epoch
    * 0 ~ 12 -> 1e-3
    * 12 ~ 28 ->1e-4
    * 无 gn
   
Avg AP: [0.00250069 0.00268959 0.00593918 0.01238983 0.00037103 0.00520011
 0.05094934 0.0025402  0.00145823 0.00060337 0.00230691 0.00568992
 0.00343817 0.00329781 0.02234189 0.00066166 0.00028807 0.00268314
 0.00682418 0.0011487 ]
 
Avg mAP: 0.006666101292653241

Testset classify AP is [0.031194349732388986, 0.0572540864953559, 0.04718262608471971, 0.027607681272222413, 0.05280227671969535, 0.05056773772898126, 0.2652140489262636, 0.04465063139115444, 0.08254710888368796, 0.020101784423735305, 0.04062878114999654, 0.07325480353317904, 0.05545111365999122, 0.0861678516354079, 0.5328506722903639, 0.05101622583523328, 0.014253184515077868, 0.03858852637022686, 0.05871458627724349, 0.04824319210879111]

Testset classify mAP is 0.08391456345168583

-----


vgg16 联合训练 oicr + GroupNorm

Avg AP: [0.09051399 0.18203187 0.08157918 0.03588963 0.00938754 0.19129429
 0.1791767  0.26907684 0.00336021 0.10906965 0.08120123 0.18342123
 0.06307971 0.15827458 0.03537233 0.0963341  0.02026587 0.13565538
 0.23144397 0.01841274]
 
Avg mAP: 0.10874205257675588

Testset classify AP is [0.861379509457694, 0.5464344555413259, 0.8003924640583021, 0.7809129075735384, 0.17323967359198622, 0.6694490297538691, 0.8282779424696932, 0.8666258007735645, 0.3152865776140701, 0.5462951295709837, 0.3261202627186228, 0.7544566528512925, 0.37742541489619386, 0.4449411781005752, 0.8810713641118815, 0.5723464026070882, 0.6089818297132764, 0.6139789231030357, 0.8293171620131501, 0.7585100285661969]

Testset classify mAP is 0.6277721354543171


vgg16 结果

Avg AP: [0.07594125 0.15202542 0.11302452 0.01443283 0.09229122 0.18310884
 0.17307478 0.19584398 0.04892519 0.03835734 0.10568615 0.10578527
 0.03219307 0.11462246 0.04233103 0.02550928 0.04346568 0.12203952
 0.18385794 0.02253245]
 
Avg mAP: 0.0942524098604717


Avg AP: [0.0380435  0.09586847 0.02967028 0.01421967 0.02488741 0.12029387
 0.12345616 0.14256291 0.00230058 0.09248682 0.09352449 0.11741401
 0.10741478 0.08556007 0.0268022  0.09133743 0.01528107 0.05298027
 0.13549627 0.03642921]
 
Avg mAP: 0.07230147397022946

Avg AP: [0.06553017 0.11218434 0.04370746 0.01982238 0.0250097  0.1920574
 0.184809   0.21933456 0.00361981 0.09866133 0.1002228  0.16470673
 0.13001323 0.14395161 0.04144958 0.09434894 0.02223967 0.06508374
 0.20686167 0.04100427]

Avg mAP: 0.09873091926663828

In [21]:
cls_region

tensor([], size=(0, 4))

In [22]:
cls_scores

tensor([])

In [23]:
cls_scores.numel() == 0

True