In [1]:
import argparse
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import datetime

from tqdm.notebook import tqdm
from torch import optim
from torchvision.ops import roi_pool, nms
from sklearn.metrics import average_precision_score
from config import cfg
from utils import *
from models import *
from refine_loss import WeightedRefineLoss
from datasets import VOCDectectionDataset
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

In [2]:
year='2007'
pretrained='vgg16'
oicr = None
midn = None
check_points = True


eva_th = 1
lr = 1e-5
lr_step = 5
epochs = 5
# start_epoch = 29
start_epoch = 0


if pretrained == 'alexnet':
    model = Combined_Alexnet(cfg.K, cfg.Groups)
if pretrained == 'vgg16':
    model = Combined_VGG16(cfg.K)

model.init_model()
model.to(cfg.DEVICE)

checkpoints = torch.load(cfg.PATH.PT_PATH + "BestModel_2007_vgg16_14.pt")
model.load_state_dict(checkpoints['whole_model_state_dict'])

trainval = VOCDectectionDataset("~/data/", year, 'trainval')
train_loader = data.DataLoader(trainval, cfg.TRAIN.BATCH_SIZE, shuffle=False)
testdata = VOCDectectionDataset("~/data/", year, 'test')
test_loader = data.DataLoader(testdata, 1, shuffle=False)


bias_params = []
bias_param_names = []
nonbias_params = []
nonbias_param_names = []
nograd_param_names = []
for key, value in model.named_parameters():
    if value.requires_grad:
        if 'bias' in key:
            bias_params.append(value)
            bias_param_names.append(key)
        else:
            nonbias_params.append(value)
            nonbias_param_names.append(key)
            
params = [
    {'params': nonbias_params,
     'lr': lr,
     'weight_decay': cfg.TRAIN.WD},
    {'params': bias_params,
     'lr': lr * (cfg.TRAIN.BIAS_DOUBLE_LR + 1),
     'weight_decay':  0},
]

optimizer = optim.SGD(params,
                      momentum=cfg.TRAIN.MOMENTUM)

#   scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
#                                              milestones=[lr_step,
#                                                          epochs],
#                                              gamma=cfg.TRAIN.LR_MUL)
iter_id = 0
mAP = 0
best_mAP = 0
best_model = None
best_epoch = 0

N = len(train_loader)
bceloss = nn.BCELoss(reduction="sum")
refineloss = WeightedRefineLoss()

In [3]:
def oicr_algorithm(xr0, gt_label, regions, K=3):
    R = regions.size()[1] # R
    # then do the online instance classifier refinement
    wrk_list = torch.zeros((K, R)).to(cfg.DEVICE)
    # R x 21 x k
    yrk_list = torch.zeros((K, R, (1 + len(VOC_CLASSES))))
    yrk_list[:, :, -1] = 1.0
    yrk_list = yrk_list.to(cfg.DEVICE)
#     # here is just to calculate the supervised information 
#     # do not need grad any more
    with torch.no_grad():
        for k in range(K):
            wrk = wrk_list[k, :]
            yrk = yrk_list[k, :, :]
            IoUs = torch.full((R, ), - np.inf).to(cfg.DEVICE)
            for c in range(len(VOC_CLASSES)):
                if gt_label[0][c] == 1.0:
                    top_id = torch.argmax(xr0[k][:, c])
                    top_score = xr0[k][top_id][c]
#                     writer.add_scalar("top_score", top_score, 0)
#                     print(top_score)
                    top_box = regions[0][top_id:top_id+1]
                    IoUs_temp = one2allbox_iou(top_box, regions[0])
                    IoU_mask = torch.where(IoUs_temp > IoUs)
                    IoUs[IoU_mask] = IoUs_temp[IoU_mask]
                    wrk[IoU_mask] = top_score
                    y_mask = torch.where(IoUs[IoU_mask] > cfg.TRAIN.It)
                    yrk[y_mask] = 0.0
                    yrk[y_mask] += torch.eye(1 + len(VOC_CLASSES))[c].to(cfg.DEVICE)
    return wrk_list, yrk_list

In [None]:
oicr_algorithm()

In [9]:
import pyximport
import numpy as np
pyximport.install(setup_args={"include_dirs":np.get_include()},
                  reload_support=True)
import torch
import pdb
from bbox import bbox_overlaps

ref1 =  ref1.view(1, -1, R)

In [6]:
yrk_list[0]

tensor([[0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 1.]], device='cuda:0')

In [11]:
wrk_list[1]

tensor([0.0039, 0.0039, 0.0039,  ..., 0.0039, 0.0039, 0.0039], device='cuda:0')

In [13]:
# proposal_scores =  proposal_scores.view(1, -1, R)
y, w = OICRLayer(regions, ref1.view(1, -1, R), gt_label)

In [16]:
torch.from_numpy(y).detach().to(cfg.DEVICE)

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        ...,
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.],
        [1., 0., 0.,  ..., 0., 0., 0.]], device='cuda:0', dtype=torch.float64)

In [24]:
ref1 =  ref1.view(1, -1, R)

In [4]:
def OICRLayer(boxes, cls_prob, im_labels, cfg_TRAIN_FG_THRESH = 0.5):
    # boxes = boxes[...,1:]
    # boxes:[1, 3189, 4]
    # 上一层传来的cls_prob:[1, 3819, 20/21]
    # image level label -> im_labels:[1, 20]
    proposals = _get_highest_score_proposals(boxes, cls_prob, im_labels)
    labels, cls_loss_weights = _sample_rois(boxes, proposals, 21)
    return labels, cls_loss_weights

def _get_highest_score_proposals(boxes, cls_prob, im_labels):
    """Get proposals with highest score."""

    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # 图片labels
    im_labels_tmp = im_labels[0, :]
    gt_boxes = np.zeros((0, 4), dtype=np.float32)
    gt_classes = np.zeros((0, 1), dtype=np.int32)
    gt_scores = np.zeros((0, 1), dtype=np.float32)

        # 裁剪掉背景分类
    if 21 == cls_prob.shape[2] : # added 1016
        cls_prob = cls_prob[:, :, 1:]

    # 统计GT类得分最高的box
    for i in range(num_classes):
        if im_labels_tmp[i] == 1:
            cls_prob_tmp = cls_prob[:,:, i].data.cpu()
            max_index = np.argmax(cls_prob_tmp)
            # m = boxes[:,max_index, :].reshape(1, -1).cpu()
            gt_boxes = np.vstack((gt_boxes, boxes[:,max_index, :].reshape(1, -1).cpu()))
            gt_classes = np.vstack((gt_classes, (i + 1) * np.ones((1, 1), dtype=np.int32))) # for pushing ground
            gt_scores = np.vstack((gt_scores,
                cls_prob_tmp[:, max_index] ))  # * np.ones((1, 1), dtype=np.float32)))
            cls_prob[:, max_index, :] = 0 #in-place operation <- OICR code but I do not agree
    # proposals {[1, 4], [1, 1], [1, 1]}
    proposals = {'gt_boxes' : gt_boxes,
                 'gt_classes': gt_classes,
                 'gt_scores': gt_scores}
    return proposals


def _sample_rois(all_rois, proposals, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[0].cpu(), dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    try :
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
    except :
        pdb.set_trace()

    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= 0.5)[0]

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where(max_overlaps < 0.5)[0]

    labels[bg_inds] = 0
    real_labels = np.zeros((labels.shape[0], 21))
    for i in range(labels.shape[0]) :
        real_labels[i, labels[i]] = 1
    rois = all_rois.cpu()
    return real_labels, cls_loss_weights


In [5]:
for epoch in tqdm(range(start_epoch, epochs+1), "Total"):
    epoch_b_loss = 0.0
    epoch_r_loss = 0.0
    for img, gt_box, gt_label, regions in tqdm(train_loader, f"Epoch {epoch}"):
        img = img.to(cfg.DEVICE)  # 1, 3, h ,w 
        regions = regions.to(cfg.DEVICE) # 1, R, 4
        R = regions.size()[1] # R
        gt_label = gt_label.to(cfg.DEVICE) # 1, C
        
        ref1, ref2, ref3, proposal_scores = model(img, regions)
        cls_scores = torch.sum(proposal_scores, dim=0)
        cls_scores = torch.clamp(cls_scores, min=0, max=1)
        
        b_loss = bceloss(cls_scores, gt_label[0])
        epoch_b_loss += b_loss.item()


        ref_scores1, ref_scores2, ref_scores3, proposal_scores = model(img, regions)
        cls_scores = torch.sum(proposal_scores, dim=0)
        cls_scores = torch.clamp(cls_scores, min=0, max=1)
        
        b_loss = bceloss(cls_scores, gt_label[0])
        epoch_b_loss += b_loss.item()


        xr0 = torch.zeros((R, 21)).to(cfg.DEVICE) # xj0
        xr0[:, :20] = proposal_scores.detach()
            # R+1 x 21
        xrk_list = []
        xrk_list.append(xr0)
        xrk_list.append(ref_scores1.detach())
        xrk_list.append(ref_scores2.detach())
        
        wrk_list, yrk_list = oicr_algorithm(xrk_list, gt_label, regions, cfg.K)


        r_loss_1 = refineloss(ref_scores1, 
                              yrk_list[0],
                              wrk_list[0])
        r_loss_2 = refineloss(ref_scores2, 
                              yrk_list[1],
                              wrk_list[1])
        r_loss_3 = refineloss(ref_scores3, 
                              yrk_list[2],
                              wrk_list[2])

        loss = b_loss + r_loss_1 + r_loss_2 + r_loss_3
        loss.backward()
        epoch_r_loss += (r_loss_1 + r_loss_2 + r_loss_3).item()

        iter_id += 1
        if iter_id % cfg.TRAIN.ITER_SIZE == 0:
            optimizer.step()
            optimizer.zero_grad()
        if iter_id == 2:
            break
    break

Total:   0%|          | 0/6 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/5011 [00:00<?, ?it/s][A
Epoch 0:   0%|          | 1/5011 [00:02<2:49:11,  2.03s/it][A
Total:   0%|          | 0/6 [00:02<?, ?it/s]


In [8]:
# def oicr_algorithm(xr0, gt_label, regions, K=3):
K = 3
R = regions.size()[1] # R
# then do the online instance classifier refinement
wrk_list = torch.zeros((K, R)).to(cfg.DEVICE)
# R x 21 x k
yrk_list = torch.zeros((K, R, (1 + len(VOC_CLASSES))))
yrk_list[:, :, -1] = 1.0
yrk_list = yrk_list.to(cfg.DEVICE)
# here is just to calculate the supervi

In [9]:
with torch.no_grad():
    k = 1
    xr0 = xrk_list
    wrk = wrk_list[k, :]
    yrk = yrk_list[k, :, :]
    IoUs = torch.full((R, ), - np.inf).to(cfg.DEVICE)
    for c in range(len(VOC_CLASSES)):
        # 8 17
        if gt_label[0][c] == 1.0:
            print("True Label is : " + str(c))
            top_id = torch.argmax(xr0[k][:, c])
            print("top id : " + str(top_id.item()))
            top_score = xr0[k][top_id][c]

            top_box = regions[0][top_id:top_id+1]
            IoUs_temp = one2allbox_iou(top_box, regions[0])
            IoU_mask = torch.where(IoUs_temp > IoUs)
            IoUs[IoU_mask] = IoUs_temp[IoU_mask]
            wrk[IoU_mask] = top_score
            y_mask = torch.where(IoUs[IoU_mask] > cfg.TRAIN.It)
            yrk[y_mask] = 0.0
            yrk[y_mask] += torch.eye(1 + len(VOC_CLASSES))[c].to(cfg.DEVICE)
            print(y_mask)
            print(yrk.sum(dim=0))
            print('-' * 30)
#             break

True Label is : 12
top id : 826
(tensor([596, 818, 826, 847, 874, 919, 920, 928], device='cuda:0'),)
tensor([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          8.,   0.,   0.,   0.,   0.,   0.,   0.,   0., 951.], device='cuda:0')
------------------------------
True Label is : 14
top id : 518
(tensor([115, 127, 178, 210], device='cuda:0'),)
tensor([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          8.,   0.,   4.,   0.,   0.,   0.,   0.,   0., 947.], device='cuda:0')
------------------------------


In [79]:
True Label is : 6
top id : 452
(tensor([ 54,  55,  58, 350, 352, 353, 386, 387, 388, 397, 446, 447, 448, 449,
        450, 451, 452, 453, 454, 471, 473, 485, 486, 487, 525, 571, 651, 652,
        705, 706, 812, 814, 822, 836, 837, 843], device='cuda:0'),)
tensor([   0.,    0.,    0.,    0.,    0.,    0.,   36.,    0.,    0.,    0.,
           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
        1228.], device='cuda:0')
------------------------------

tensor([0.0016, 0.0439, 0.1009,  ..., 0.0006, 0.0003, 0.0003], device='cuda:0')

In [82]:
IoUs[torch.where(IoUs > 0)]

tensor([0.0016, 0.0439, 0.1009,  ..., 0.0017, 0.0012, 0.0005], device='cuda:0')

In [33]:
for i in wrk_list[0][:10]:
    print(i)

tensor(0.9992, device='cuda:0')
tensor(0.9992, device='cuda:0')
tensor(0.9992, device='cuda:0')
tensor(0.9992, device='cuda:0')
tensor(0.4169, device='cuda:0')
tensor(0.9992, device='cuda:0')
tensor(0.9992, device='cuda:0')
tensor(0.4169, device='cuda:0')
tensor(0.4169, device='cuda:0')
tensor(0.4169, device='cuda:0')


In [36]:
gt_label

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0.]], device='cuda:0')

In [35]:
for i in yrk_list[0][mask][:, :]:
    print(torch.where(i == 1.))

(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([17], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([17], device='cuda:0'),)
(tensor([17], device='cuda:0'),)
(tensor([17], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([20], device='cuda:0'),)
(tensor([2

In [23]:
yrk_list[0].shape

torch.Size([2074, 21])