In [56]:
import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.utils.data as data
import datetime
import numpy as np


from sklearn.metrics import average_precision_score
from model import WSDDN_S, WSDDN_M
from tqdm.notebook import tqdm
from utils import *
from torch import optim
from torchvision.ops import roi_pool, nms
from pretrained import VGG_CNN_F, VGG_CNN_M_1024, VGG_VD_1024
from VOCdatasets import VOCDectectionDataset


In [57]:
def one2allbox_iou(target_box, others):
    """
     calculate the iou of box A to list of boxes
     target_box : Tensor()  1 * 4 
     others : Tensor()      N * 4 
     return  N * 1  ...iou
    
    """

    # get the min of xmax and ymax which organize the Intersection
    max_xy = torch.min(target_box[:, 2:], others[:, 2:]) 
    min_xy = torch.max(target_box[:, :2], others[:, :2])
    # get the xdistance and y distance
    # add 1 because distance = point2 - point1 + 1
    inter_wh = torch.clamp((max_xy - min_xy + 1), min=0)
    I = inter_wh[:, 0] * inter_wh[:, 1]
    A = (target_box[:, 2] - target_box[:, 0] + 1) * (target_box[:, 3] - target_box[:, 1] + 1)
    B = (others[:, 2] - others[:, 0] + 1) * (others[:, 3] - others[:, 1] + 1)
    return I / (A + B - I)

In [62]:
regions = regions[0]
combine_scores = combined
labels = gt_target[0]

In [63]:
regions

tensor([[408.,   0., 479.,  33.],
        [408.,   0., 479.,  41.],
        [405.,   0., 479.,  48.],
        ...,
        [456., 298., 479., 319.],
        [189., 298., 226., 319.],
        [157., 298., 204., 319.]], device='cuda:0')

In [54]:
topk_boxes

tensor([221.,  74.,  23.,  26., 220.,  98., 323., 101.,  37.,  77.],
       device='cuda:0')

In [51]:
topk_scores

tensor([0.0011, 0.0011, 0.0011, 0.0011, 0.0010, 0.0010, 0.0009, 0.0009, 0.0009,
        0.0009], device='cuda:0', grad_fn=<TopkBackward>)

In [71]:
# iou_th = 0.6
# K = 10 #  top 10 scores
# reg = 0
# positives = 0
# for c in range(20):
#     # extract positive ones
#     if labels[c].item() == 0:
#         continue
#     positives += 1
#     topk_scores, topk_filter = combine_scores[:, c].topk(K, dim=0)
#     topk_boxes = regions[topk_filter]
#     topk_fc7 = fc7[topk_filter]
    
#     # get box with the best box | iou > 0.6
#     iou_mask = one2allbox_iou(topk_boxes[0:1, :], topk_boxes).view(K)
#     iou_mask = (iou_mask > iou_th).float()
    
#     fc7_diff = topk_fc7 - topk_fc7[0]
#     score_diff = topk_scores.detach().view(K, 1)
    
#     diff = fc7_diff * score_diff
    
#     reg += 0.5 * (torch.pow(diff, 2).sum(1) * iou_mask).sum()
    
#     reg /= positives
#     break

In [58]:
def spatial_regulariser(regions, fc7, combine_scores, labels):
    iou_th = 0.6
    K = 10 #  top 10 scores
    reg = 0
    positives = 0
    for c in range(20):
        # extract positive ones
        if labels[c].item() == 0:
            continue
        positives += 1
        topk_scores, topk_filter = combine_scores[:, c].topk(K, dim=0)
        topk_boxes = regions[topk_filter]
        topk_fc7 = fc7[topk_filter]
        
        # get box with the best box | iou > 0.6
        iou_mask = one2allbox_iou(topk_boxes[0:1, :], topk_boxes).view(K)
        iou_mask = (iou_mask > iou_th).float()
        
        fc7_diff = topk_fc7 - topk_fc7[0]
        score_diff = topk_scores.detach().view(K, 1)
        
        diff = fc7_diff * score_diff
        
        reg += 0.5 * (torch.pow(diff, 2).sum(1) * iou_mask).sum()
        
        reg /= positives
            
    return reg

In [59]:
class WSDDN_S(nn.Module):
    def __init__(self):
        super(WSDDN_S, self).__init__()
        self.pretrain_net = VGG_CNN_F()
        self.pretrain_net.load_mat()

        self.roi_output_size = (6, 6)
        
        self.fc6 = nn.Linear(6*6*256, 4096)
        self.fc7 = nn.Linear(4096, 4096)
        self.fc8c = nn.Linear(4096, 20)
        self.fc8d = nn.Linear(4096, 20)
        
    def forward(self, x, regions, scores=None):
        #   x    : bs, c ,h, w
        # regions: bs, R, 4
        #  scores: bs, R
        regions = [regions[0]] # roi_pool require [Tensor(K, 4)]
        R = len(regions[0])
        out = self.pretrain_net(x) # bs, 256， h/16, w/16
        out = roi_pool(out, regions, self.roi_output_size, 1.0/16)  # R, 256, 6, 6
        out = out.view(R, -1)

        if scores is not None:
            out = out * scores[0] * 10

        out = F.relu(self.fc6(out))
        out = F.relu(self.fc7(out))
        
        fc7 = out 
        # fc8x(out)   R, 20
        cls_score = F.softmax(self.fc8c(out), dim = 1)
        det_score = F.softmax(self.fc8d(out), dim = 0)
        combined = cls_score * det_score

        return combined, fc7


In [90]:
voc_07_trainval = VOCDectectionDataset("~/data/", 2007, 'trainval')
train_loader = data.DataLoader(voc_07_trainval, 1, shuffle=True)

wsddn = WSDDN_S().to(DEVICE)

wsddn.load_state_dict(torch.load(SAVE_PATH + "ssw_2007_wsddn_s.pt"))

wsddn.train()
optimizer = optim.SGD(wsddn.parameters(), lr=LR, momentum=0.9)
scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[10, 20], gamma=0.1)
bce_loss = nn.BCELoss(reduction="mean")
N = len(train_loader)
propose_way = 'eb'

In [127]:
alpha = 1e-1
for epoch in tqdm(range(EPOCHS), "Total"):
    epoch_loss = 0
    y_pred = []
    y_true = []
    
    for img, gt_box, gt_target, regions, scores in tqdm(train_loader, f"Epoch {epoch}"):
        optimizer.zero_grad()
        # img   : Tensor(1, 3, h, w)
        # gt_tar: Tensor(1, R_gt)
        # region: Tensor(1, R, 4)
        img = img.to(DEVICE)
        regions = regions.to(DEVICE)
        gt_target = gt_target.to(DEVICE)
        if propose_way != "edge_box":
            scores = None
        else:
            scores = scores.to(DEVICE)
        combined, fc7 = wsddn(img, regions, scores=scores)
        
        image_level_cls_score = torch.sum(combined, dim=0) # y
        
        reg = alpha * spatial_regulariser(regions[0], fc7, combined, gt_target[0])
        loss = bce_loss(image_level_cls_score, gt_target[0])
        
        out = loss + reg
        
        y_pred.append(image_level_cls_score.detach().cpu().numpy().tolist())
        y_true.append(gt_target[0].detach().cpu().numpy().tolist())
       
        epoch_loss += out.item()
        out.backward()
        optimizer.step()
        break
    cls_ap = []
    y_pred = np.array(y_pred)
    y_true = np.array(y_true)
    for i in range(20):
        cls_ap.append(average_precision_score(y_true[:,i], y_pred[:,i]))
    
    print(f"Epoch {epoch} classify AP is {str(cls_ap)}")
    print(f"Epoch {epoch} classify mAP is {str(sum(cls_ap)/20)}")
    print(f"Epoch {epoch} Loss is {epoch_loss/N}")
    print("-" * 10)
    scheduler.step()
    break


HBox(children=(FloatProgress(value=0.0, description='Total', max=20.0, style=ProgressStyle(description_width='…

HBox(children=(FloatProgress(value=0.0, description='Epoch 0', max=5011.0, style=ProgressStyle(description_wid…


Epoch 0 classify AP is [nan, nan, nan, nan, nan, nan, 1.0, nan, nan, nan, nan, nan, nan, nan, 1.0, nan, nan, nan, nan, nan]
Epoch 0 classify mAP is nan
Epoch 0 Loss is 5.61206337459262e-05
----------



In [128]:
print(loss)
print(reg)
print(gt_target[0])

tensor(0.1025, device='cuda:0', grad_fn=<BinaryCrossEntropyBackward>)
tensor(0.1787, device='cuda:0', grad_fn=<MulBackward0>)
tensor([0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0.,
        0., 0.], device='cuda:0')


In [129]:
reg

tensor(0.1787, device='cuda:0', grad_fn=<MulBackward0>)

In [28]:
pred_y.append(image_level_cls_score.detach().cpu().numpy().tolist())

In [40]:
np.array(pred_y)[:,0]

array([0.00014353, 0.00014353])

In [36]:
np.concatenate(np.array(pred_y), axis=1)

AxisError: axis 1 is out of bounds for array of dimension 1

In [138]:
ap_list = [0 for _ in range(20)]
for c in range(20):
    ap_list[c] = cls_ap(pred_y[:, c], true_y[:, c])
ap_list

[0.15438907993924983,
 0.1399516771780243,
 0.15653251484717573,
 0.10210185269866995,
 0.0923963152251606,
 0.11109624669835773,
 0.35321136563529093,
 0.17532447611271065,
 0.2134705714456663,
 0.06527645240015797,
 0.09321122799057938,
 0.1800990635107902,
 0.14303616964333313,
 0.1510239025495608,
 0.6474392721886746,
 0.0968885852131021,
 0.05644009620750633,
 0.11363372686090151,
 0.1525522964509208,
 0.13825218855989238]

In [135]:
def cls_ap(pred, gt):
    idx = np.argsort(-pred)
    pred_sort = pred[idx]
    gt_sort = gt[idx]
    N = len(pred_sort)
    ap_list = [0 for _ in range(N)]
    for i in range(N): # every time to add one example to positive
        all_positive = i + 1
        true_positive = sum(gt_sort[:i+1])
        if true_positive == 0.0:
            ap_list[i] = 0
        else:
            ap_list[i] = true_positive/all_positive    
    return sum(ap_list) / N
        
    

In [137]:
cls_ap(pred_y[:, 1], true_y[:, 1])

0.1399516771780243

In [123]:
x[:0]

array([], dtype=float32)

In [112]:
idx = np.argsort(-x)
idx

array([1868,  109, 4376, ..., 4461,  343,  164])

In [15]:
np.vstack([true_y, pred_y])

array([[0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.],
       [1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0.]])

In [27]:
pred_y.shape

(61, 20)

In [28]:
yp = torch.Tensor(np.array([True, True, True, False]))

yt = torch.Tensor(np.array([1, 1, 0, 0]))

In [31]:
np.zeros(20)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0.])

In [15]:
yp == yt

array([[ True,  True,  True,  True],
       [ True, False, False,  True]])

tensor([ True,  True, False,  True])