In [1]:
import argparse
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import datetime

from tqdm.notebook import tqdm
from torch import optim
from torchvision.ops import roi_pool, nms
from sklearn.metrics import average_precision_score
from config import cfg
from utils import *
from models import *
from refine_loss import WeightedRefineLoss
from datasets import VOCDectectionDataset
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

import pyximport
import numpy as np
pyximport.install(setup_args={"include_dirs":np.get_include()},
                  reload_support=True)
import torch
import pdb
from bbox import bbox_overlaps

In [2]:
year='2007'
pretrained='vgg16'
oicr = None
midn = None
check_points = True

eva_th = 1
lr = 1e-4
lr_step = 5
epochs = 5
# start_epoch = 29
start_epoch = 0

if pretrained == 'alexnet':
    model = Combined_Alexnet(cfg.K, cfg.Groups)
if pretrained == 'vgg16':
    model = Combined_VGG16(cfg.K)

model.init_model()
model.to(cfg.DEVICE)

checkpoints = torch.load(cfg.PATH.PT_PATH + "BestModel_2007_vgg16_23.pt")
model.load_state_dict(checkpoints['whole_model_state_dict'])

trainval = VOCDectectionDataset("~/data/", year, 'trainval')
train_loader = data.DataLoader(trainval, cfg.TRAIN.BATCH_SIZE, shuffle=False)
testdata = VOCDectectionDataset("~/data/", year, 'test')
test_loader = data.DataLoader(testdata, 1, shuffle=False)


bias_params = []
bias_param_names = []
nonbias_params = []
nonbias_param_names = []
nograd_param_names = []
for key, value in model.named_parameters():
    if value.requires_grad:
        if 'bias' in key:
            bias_params.append(value)
            bias_param_names.append(key)
        else:
            nonbias_params.append(value)
            nonbias_param_names.append(key)
            
params = [
    {'params': nonbias_params,
     'lr': lr,
     'weight_decay': cfg.TRAIN.WD},
    {'params': bias_params,
     'lr': lr * (cfg.TRAIN.BIAS_DOUBLE_LR + 1),
     'weight_decay':  0},
]

optimizer = optim.SGD(params,
                      momentum=cfg.TRAIN.MOMENTUM)

#   scheduler = optim.lr_scheduler.MultiStepLR(optimizer,
#                                              milestones=[lr_step,
#                                                          epochs],
#                                              gamma=cfg.TRAIN.LR_MUL)
mAP = 0
best_mAP = 0
best_model = None
best_epoch = 0

N = len(train_loader)
bceloss = nn.BCELoss(reduction="sum")
refineloss = WeightedRefineLoss()

In [2]:
def oicr_algorithm(xrk_list, gt_label, regions, K=3):
    R = regions.size()[1] # R
    # then do the online instance classifier refinement
    wrk_list = torch.zeros((K, R)).to(cfg.DEVICE)
    # R x 21 x k
    yrk_list = torch.zeros((K, R, (1 + len(VOC_CLASSES))))
    yrk_list[:, :, -1] = 1.0
    yrk_list = yrk_list.to(cfg.DEVICE)
#     # here is just to calculate the supervised information 
#     # do not need grad any more
    with torch.no_grad():
        for k in range(K):
            wrk = wrk_list[k, :]
            yrk = yrk_list[k, :, :]
            IoUs = torch.full((R, ), - np.inf).to(cfg.DEVICE)
            for c in range(len(VOC_CLASSES)):
                if gt_label[0][c] == 1.0:
                    top_id = torch.argmax(xrk_list[k][:, c])
                    top_score = xrk_list[k][top_id][c]
#                     writer.add_scalar("top_score", top_score, 0)
#                     print(top_score)
                    top_box = regions[0][top_id:top_id+1]
                    IoUs_temp = one2allbox_iou(top_box, regions[0])
                    IoU_mask = torch.where(IoUs_temp > IoUs)
                    IoUs[IoU_mask] = IoUs_temp[IoU_mask]
                    wrk[IoU_mask] = top_score
                    y_mask = torch.where(IoUs[IoU_mask] > cfg.TRAIN.It)
                    yrk[y_mask] = 0.0
                    yrk[y_mask] += torch.eye(1 + len(VOC_CLASSES))[c].to(cfg.DEVICE)
    return wrk_list, yrk_list

In [33]:
def OICRLayer(boxes, cls_prob, im_labels, cfg_TRAIN_FG_THRESH = 0.5):
    # boxes = boxes[...,1:]
    # boxes:[1, 3189, 4]
    # 上一层传来的cls_prob:[1, 3819, 20/21]
    # image level label -> im_labels:[1, 20]
    proposals = _get_highest_score_proposals(boxes, cls_prob, im_labels)
    labels, cls_loss_weights = _sample_rois(boxes, proposals, 21)
    return labels, cls_loss_weights

def _get_highest_score_proposals(boxes, cls_prob, im_labels):
    """Get proposals with highest score."""

    num_images, num_classes = im_labels.shape
    assert num_images == 1, 'batch size shoud be equal to 1'
    # 图片labels
    im_labels_tmp = im_labels[0, :]
    gt_boxes = np.zeros((0, 4), dtype=np.float32)
    gt_classes = np.zeros((0, 1), dtype=np.int32)
    gt_scores = np.zeros((0, 1), dtype=np.float32)

        # 裁剪掉背景分类
    if 21 == cls_prob.shape[2] : # added 1016
        cls_prob = cls_prob[:, :, :-1]

    # 统计GT类得分最高的box
    for i in range(num_classes):
        if im_labels_tmp[i] == 1:
            print(i)
            cls_prob_tmp = cls_prob[:,:, i].data.cpu()
            
            max_index = np.argmax(cls_prob_tmp)
            print(max_index)
            # m = boxes[:,max_index, :].reshape(1, -1).cpu()
            gt_boxes = np.vstack((gt_boxes, boxes[:,max_index, :].reshape(1, -1).cpu()))
            gt_classes = np.vstack((gt_classes, (i + 1) * np.ones((1, 1), dtype=np.int32))) # for pushing ground
            gt_scores = np.vstack((gt_scores,
                cls_prob_tmp[:, max_index] ))  # * np.ones((1, 1), dtype=np.float32)))
            cls_prob[:, max_index, :] = 0 #in-place operation <- OICR code but I do not agree
    # proposals {[1, 4], [1, 1], [1, 1]}
    proposals = {'gt_boxes' : gt_boxes,
                 'gt_classes': gt_classes,
                 'gt_scores': gt_scores}
    return proposals


def _sample_rois(all_rois, proposals, num_classes):
    """Generate a random sample of RoIs comprising foreground and background
    examples.
    """
    # overlaps: (rois x gt_boxes)
    gt_boxes = proposals['gt_boxes']
    gt_labels = proposals['gt_classes']
    gt_scores = proposals['gt_scores']
    overlaps = bbox_overlaps(
        np.ascontiguousarray(all_rois[0].cpu(), dtype=np.float),
        np.ascontiguousarray(gt_boxes, dtype=np.float))
    try :
        gt_assignment = overlaps.argmax(axis=1)
        max_overlaps = overlaps.max(axis=1)
    except :
        pdb.set_trace()

    labels = gt_labels[gt_assignment, 0]
    cls_loss_weights = gt_scores[gt_assignment, 0]
    # Select foreground RoIs as those with >= FG_THRESH overlap
    fg_inds = np.where(max_overlaps >= 0.5)[0]

    # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
    bg_inds = np.where(max_overlaps < 0.5)[0]

    labels[bg_inds] = 0
    real_labels = np.zeros((labels.shape[0], 21))
    for i in range(labels.shape[0]) :
        real_labels[i, labels[i]] = 1
    rois = all_rois.cpu()
    return real_labels, cls_loss_weights


In [34]:
iter_id = 0

for epoch in tqdm(range(start_epoch, epochs+1), "Total"):
    epoch_b_loss = 0.0
    epoch_r_loss = 0.0
    for img, gt_box, gt_label, regions in tqdm(train_loader, f"Epoch {epoch}"):
        img = img.to(cfg.DEVICE)  # 1, 3, h ,w 
        regions = regions.to(cfg.DEVICE) # 1, R, 4
        R = regions.size()[1] # R
        gt_label = gt_label.to(cfg.DEVICE) # 1, C

        ref_scores1, ref_scores2, ref_scores3, proposal_scores = model(img, regions)
        cls_scores = torch.sum(proposal_scores, dim=0)
        cls_scores = torch.clamp(cls_scores, min=0, max=1)
        
        b_loss = bceloss(cls_scores, gt_label[0])
        epoch_b_loss += b_loss.item()


        xr0 = torch.zeros((R, 21)).to(cfg.DEVICE) # xj0
        xr0[:, :20] = proposal_scores.clone()
            # R+1 x 21
        xrk_list = []
        xrk_list.append(xr0)
        xrk_list.append(ref_scores1.clone())
        xrk_list.append(ref_scores2.clone())
        
        wrk_list, yrk_list = oicr_algorithm(xrk_list, gt_label, regions, cfg.K)


        r_loss_1 = refineloss(ref_scores1, 
                              yrk_list[0],
                              wrk_list[0])
        r_loss_2 = refineloss(ref_scores2, 
                              yrk_list[1],
                              wrk_list[1])
        r_loss_3 = refineloss(ref_scores3, 
                              yrk_list[2],
                              wrk_list[2])

        loss = b_loss + r_loss_1 + r_loss_2 + r_loss_3
        loss.backward()
        epoch_r_loss += (r_loss_1 + r_loss_2 + r_loss_3).item()

        iter_id += 1
        if iter_id % cfg.TRAIN.ITER_SIZE == 0:
            optimizer.step()
#             if iter_id == 2:
#                 break
            optimizer.zero_grad()
        if iter_id == 5:
            break
    break

Total:   0%|          | 0/6 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/5011 [00:00<?, ?it/s][A
Epoch 0:   0%|          | 1/5011 [00:00<37:42,  2.21it/s][A
Epoch 0:   0%|          | 2/5011 [00:01<42:16,  1.98it/s][A
Epoch 0:   0%|          | 3/5011 [00:01<48:38,  1.72it/s][A
Epoch 0:   0%|          | 4/5011 [00:03<1:03:06,  1.32it/s]A
Total:   0%|          | 0/6 [00:03<?, ?it/s]


In [8]:
model = Combined_VGG16(cfg.K)
year=2007

model.init_model()
model.to(cfg.DEVICE)

checkpoints = torch.load(cfg.PATH.PT_PATH + "BestModel_2007_vgg16_23.pt")
model.load_state_dict(checkpoints['whole_model_state_dict'])

trainval = VOCDectectionDataset("~/data/", year, 'trainval', over_box=False, small_box=False)
trainval_over = VOCDectectionDataset("~/data/", year, 'trainval', over_box=True, small_box=False)
trainval_all = VOCDectectionDataset("~/data/", year, 'trainval', over_box=True, small_box=True)

In [12]:
print_item(gt_label.squeeze(0).cpu())

Index 1: Item 1.0


In [13]:
gt_label

tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]], device='cuda:0')

In [29]:
with torch.no_grad():
    for i in range(1, 40, 4):
        print('.' * 20)
        print(f"img No.{i}")
        for data in [trainval, trainval_over, trainval_all]:
            img, gt_box, gt_label, regions = data[i]
            img = torch.Tensor(img).to(cfg.DEVICE)  # 1, 3, h ,w 
            img = img.resize(1, *img.size())
            regions = torch.Tensor(regions).to(cfg.DEVICE) # 1, R, 4
            regions = regions.resize(1, *regions.size())
            R = regions.size()[1] # R
            gt_label = torch.Tensor(gt_label).to(cfg.DEVICE) # 1, C
            gt_label = gt_label.resize(1, *gt_label.size())

            ref_scores1, ref_scores2, ref_scores3, proposal_scores = model(img, regions)

            xr0 = torch.zeros((R, 21)).to(cfg.DEVICE) # xj0
            xr0[:, :20] = proposal_scores.clone()
                # R+1 x 21
            xrk_list = []
            xrk_list.append(xr0)
            xrk_list.append(ref_scores1.clone())
            xrk_list.append(ref_scores2.clone())

            wrk_list, yrk_list = oicr_algorithm(xrk_list, gt_label, regions, cfg.K)
            y1, y2, y3 = yrk_list


#             print_item(gt_label.squeeze(0).cpu())
            s1 = print_item(y1.sum(dim=0))
            s2 = print_item(y2.sum(dim=0))
            s3 = print_item(y3.sum(dim=0))
            print(f'All box is {s1 + s2 + s3}')
            print('\n')



....................
img No.1
Index 6: Item 11
Index 6: Item 11
Index 6: Item 11
All box is 33


Index 6: Item 18
Index 6: Item 14
Index 6: Item 18
All box is 50


Index 6: Item 19
Index 6: Item 29
Index 6: Item 18
All box is 66


....................
img No.5
Index 12: Item 32
Index 14: Item 9
Index 12: Item 22
Index 14: Item 4
Index 12: Item 23
Index 14: Item 10
All box is 100


Index 12: Item 50
Index 14: Item 18
Index 12: Item 41
Index 14: Item 4
Index 12: Item 41
Index 14: Item 11
All box is 165


Index 12: Item 49
Index 14: Item 16
Index 12: Item 47
Index 14: Item 1
Index 12: Item 51
Index 14: Item 12
All box is 176


....................
img No.9
Index 1: Item 6
Index 14: Item 21
Index 1: Item 13
Index 14: Item 22
Index 1: Item 3
Index 14: Item 18
All box is 83


Index 1: Item 34
Index 14: Item 23
Index 1: Item 29
Index 14: Item 30
Index 1: Item 36
Index 14: Item 26
All box is 178


Index 1: Item 17
Index 14: Item 29
Index 1: Item 19
Index 14: Item 25
Index 1: Item 25
Index 14: 

In [27]:
def print_item(l):
    s = 0
    for i in range(len(l)):
        if l[i] == 0:
            continue
        elif i != 20:
            print(f"Index {i}: Item {int(l[i])}")
            s += int(l[i])
    return s

In [None]:
y2

In [39]:
wrk_list, yrk_list = oicr_algorithm(xrk_list, gt_label, regions)

In [57]:
# proposal_scores =  proposal_scores.view(1, -1, R)
y1, w1 = OICRLayer(regions, proposal_scores.clone().view(1, R, -1), gt_label)
y2, w2 = OICRLayer(regions, ref_scores1.clone().view(1, R, -1), gt_label)
y3, w3 = OICRLayer(regions, ref_scores2.clone().view(1, R, -1), gt_label)

1
tensor(477)
1
tensor(313)
1
tensor(313)


In [69]:
np.where(y1[:, 1:] == 1.0)

(array([ 30,  34, 336, 441, 475, 476, 477, 485]),
 array([1, 1, 1, 1, 1, 1, 1, 1]))

In [64]:
gt

tensor([[0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
         0., 0.]], device='cuda:0')

In [74]:
proposal_scores[:, 1][30]

tensor(0.0005, device='cuda:0', grad_fn=<SelectBackward>)

In [81]:
regions[0][30, :]

tensor([203.,   5., 572., 608.], device='cuda:0')

In [56]:
wrk_list[1].numel()

1089

In [45]:
y

array([[1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       ...,
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.],
       [1., 0., 0., ..., 0., 0., 0.]])

Total:   0%|          | 0/6 [00:00<?, ?it/s]
Epoch 0:   0%|          | 0/5011 [00:00<?, ?it/s][A
Epoch 0:   0%|          | 1/5011 [00:00<1:05:20,  1.28it/s][A
Epoch 0:   0%|          | 2/5011 [00:01<1:01:41,  1.35it/s][A
Epoch 0:   0%|          | 3/5011 [00:01<53:48,  1.55it/s]  [A
Epoch 0:   0%|          | 4/5011 [00:02<58:36,  1.42it/s][A
Epoch 0:   0%|          | 5/5011 [00:03<56:13,  1.48it/s][A
Epoch 0:   0%|          | 6/5011 [00:03<50:32,  1.65it/s][A
Epoch 0:   0%|          | 7/5011 [00:04<47:20,  1.76it/s][A
Epoch 0:   0%|          | 8/5011 [00:04<44:41,  1.87it/s][A
Epoch 0:   0%|          | 9/5011 [00:05<53:32,  1.56it/s][A
Total:   0%|          | 0/6 [00:05<?, ?it/s]


In [30]:
model.ic_score1

Linear(in_features=4096, out_features=21, bias=True)

In [8]:
# def oicr_algorithm(xr0, gt_label, regions, K=3):
K = 3
R = regions.size()[1] # R
# then do the online instance classifier refinement
wrk_list = torch.zeros((K, R)).to(cfg.DEVICE)
# R x 21 x k
yrk_list = torch.zeros((K, R, (1 + len(VOC_CLASSES))))
yrk_list[:, :, -1] = 1.0
yrk_list = yrk_list.to(cfg.DEVICE)
# here is just to calculate the supervi

In [9]:
with torch.no_grad():
    k = 1
    xr0 = xrk_list
    wrk = wrk_list[k, :]
    yrk = yrk_list[k, :, :]
    IoUs = torch.full((R, ), - np.inf).to(cfg.DEVICE)
    for c in range(len(VOC_CLASSES)):
        # 8 17
        if gt_label[0][c] == 1.0:
            print("True Label is : " + str(c))
            top_id = torch.argmax(xr0[k][:, c])
            print("top id : " + str(top_id.item()))
            top_score = xr0[k][top_id][c]

            top_box = regions[0][top_id:top_id+1]
            IoUs_temp = one2allbox_iou(top_box, regions[0])
            IoU_mask = torch.where(IoUs_temp > IoUs)
            IoUs[IoU_mask] = IoUs_temp[IoU_mask]
            wrk[IoU_mask] = top_score
            y_mask = torch.where(IoUs[IoU_mask] > cfg.TRAIN.It)
            yrk[y_mask] = 0.0
            yrk[y_mask] += torch.eye(1 + len(VOC_CLASSES))[c].to(cfg.DEVICE)
            print(y_mask)
            print(yrk.sum(dim=0))
            print('-' * 30)
#             break

True Label is : 12
top id : 826
(tensor([596, 818, 826, 847, 874, 919, 920, 928], device='cuda:0'),)
tensor([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          8.,   0.,   0.,   0.,   0.,   0.,   0.,   0., 951.], device='cuda:0')
------------------------------
True Label is : 14
top id : 518
(tensor([115, 127, 178, 210], device='cuda:0'),)
tensor([  0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,   0.,
          8.,   0.,   4.,   0.,   0.,   0.,   0.,   0., 947.], device='cuda:0')
------------------------------


In [79]:
True Label is : 6
top id : 452
(tensor([ 54,  55,  58, 350, 352, 353, 386, 387, 388, 397, 446, 447, 448, 449,
        450, 451, 452, 453, 454, 471, 473, 485, 486, 487, 525, 571, 651, 652,
        705, 706, 812, 814, 822, 836, 837, 843], device='cuda:0'),)
tensor([   0.,    0.,    0.,    0.,    0.,    0.,   36.,    0.,    0.,    0.,
           0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,    0.,
        1228.], device='cuda:0')
------------------------------

tensor([0.0016, 0.0439, 0.1009,  ..., 0.0006, 0.0003, 0.0003], device='cuda:0')

In [82]:
IoUs[torch.where(IoUs > 0)]

tensor([0.0016, 0.0439, 0.1009,  ..., 0.0017, 0.0012, 0.0005], device='cuda:0')

In [36]:
gt_label

tensor([[0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
         0., 0.]], device='cuda:0')

In [23]:
yrk_list[0].shape

torch.Size([2074, 21])