本文档是基于eriklindernoren的yolov3开源代码进行的相关学习记录

In [2]:
import torch
import torch.nn as nn
import numpy as np

## models.py

### compute_grid_offsets

In [16]:
'''
grid_x
'''
g = 13 # grid_size
grid_x = torch.arange(g)
print(grid_x)
grid_x = grid_x.repeat(g,1)
print(grid_x,grid_x.size())
grid_x = grid_x.view([1,1,g,g])
print(grid_x.size())

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])
tensor([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12],
        [ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12]]) torch.Size([13, 13])
torch.Size([1, 1, 13, 13])


In [10]:
'''
scaled_anchors
'''
anchors = [(10,13),  (16,30),  (33,23)]
num_anchors = len(anchors)
stride = 32
scaled_anchors = torch.FloatTensor([(a_w/stride, a_h/stride) for a_w, a_h in anchors])
print(scaled_anchors)

tensor([[0.3125, 0.4062],
        [0.5000, 0.9375],
        [1.0312, 0.7188]])


In [14]:
'''
anchor_w, anchor_h
'''
anchor_w = scaled_anchors[:, 0:1].view((1, num_anchors, 1, 1))
print(anchor_w, anchor_w.size())

tensor([[[[0.3125]],

         [[0.5000]],

         [[1.0312]]]]) torch.Size([1, 3, 1, 1])


In [15]:
anchor_h = scaled_anchors[:, 1:2].view((1, num_anchors, 1, 1))
print(anchor_h, anchor_h.size())

tensor([[[[0.4062]],

         [[0.9375]],

         [[0.7188]]]]) torch.Size([1, 3, 1, 1])


## utils.py

### bbox_wh_iou

In [17]:
def bbox_wh_iou(wh1, wh2):
    wh2 = wh2.t()
    w1, h1 = wh1[0], wh1[1]
    w2, h2 = wh2[0], wh2[1]
    inter_area = torch.min(w1, w2) * torch.min(h1, h2)
    union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area
    return inter_area / union_area

In [29]:
anchors = [(116,90),  (156,198),  (373,326)]
stride = 32
scaled_anchors = torch.FloatTensor([(a_w/stride, a_h/stride) for a_w, a_h in anchors])

In [30]:
num_grid = 13
targets = torch.tensor([[0.0000, 0.0000, 0.5100, 0.5000, 0.1454, 0.1829],
        [1.0000, 0.0000, 0.7122, 0.6250, 0.2630, 0.4297],
        [1.0000, 1.0000, 0.5540, 0.4870, 0.5951, 0.3932],
        [1.0000, 2.0000, 0.2454, 0.2780, 0.2982, 0.1471]])
gwh = targets[:,-2:]

In [31]:
iou = bbox_wh_iou(scaled_anchors[0], gwh)
iou

tensor([0.0026, 0.0111, 0.0230, 0.0043])

In [32]:
ious = torch.stack([bbox_wh_iou(anchor, gwh) for anchor in scaled_anchors])
ious

tensor([[0.0026, 0.0111, 0.0230, 0.0043],
        [0.0009, 0.0037, 0.0078, 0.0015],
        [0.0002, 0.0010, 0.0020, 0.0004]])

### bbox_iou

In [None]:
def bbox_iou(box1, box2, x1y1x2y2=True):
    """
    Returns the IoU of two bounding boxes
    """
    if not x1y1x2y2:
        # Transform from center and width to exact coordinates
        b1_x1, b1_x2 = box1[:, 0] - box1[:, 2] / 2, box1[:, 0] + box1[:, 2] / 2
        b1_y1, b1_y2 = box1[:, 1] - box1[:, 3] / 2, box1[:, 1] + box1[:, 3] / 2
        b2_x1, b2_x2 = box2[:, 0] - box2[:, 2] / 2, box2[:, 0] + box2[:, 2] / 2
        b2_y1, b2_y2 = box2[:, 1] - box2[:, 3] / 2, box2[:, 1] + box2[:, 3] / 2
    else:
        # Get the coordinates of bounding boxes
        b1_x1, b1_y1, b1_x2, b1_y2 = box1[:, 0], box1[:, 1], box1[:, 2], box1[:, 3]
        b2_x1, b2_y1, b2_x2, b2_y2 = box2[:, 0], box2[:, 1], box2[:, 2], box2[:, 3]

    # get the corrdinates of the intersection rectangle
    inter_rect_x1 = torch.max(b1_x1, b2_x1)
    inter_rect_y1 = torch.max(b1_y1, b2_y1)
    inter_rect_x2 = torch.min(b1_x2, b2_x2)
    inter_rect_y2 = torch.min(b1_y2, b2_y2)
    # Intersection area
    inter_area = torch.clamp(inter_rect_x2 - inter_rect_x1 + 1, min=0) * torch.clamp(
        inter_rect_y2 - inter_rect_y1 + 1, min=0
    )
    # Union Area
    b1_area = (b1_x2 - b1_x1 + 1) * (b1_y2 - b1_y1 + 1)
    b2_area = (b2_x2 - b2_x1 + 1) * (b2_y2 - b2_y1 + 1)

    iou = inter_area / (b1_area + b2_area - inter_area + 1e-16)

    return iou

In [None]:
pred_boxes = torch.randn(2,)

### build_targets

In [2]:
''' 
Get anchors with best iou
'''
ious = torch.tensor([[0.2095, 0.8026, 0.5161, 0.3455],
        [0.0567, 0.2411, 0.3884, 0.0936],
        [0.0359, 0.1525, 0.3157, 0.0592]])

In [3]:
best_ious, best_n = ious.max(0)
print(best_ious)
print(best_n)

tensor([0.2095, 0.8026, 0.5161, 0.3455])
tensor([0, 0, 0, 0])


In [4]:
'''
coordinates
'''


'\ncoordinates\n'

In [8]:
'''
Compute label correctness
'''
b = torch.tensor([0,1,1,1])
best_n = torch.tensor([0,1,1,2])
gj = torch.tensor([1,5,6,10])
gi = torch.tensor([2,8,3,1])
class_mask = torch.FloatTensor(2,3,13,13)
pred_cls = torch.randn(2,3,13,13,3)
target_labels = torch.tensor([0,1,1,0])
#
p_max = pred_cls[b,best_n,gj,gi].argmax(-1)
print(p_max)
msk = (p_max == target_labels).float()
print(msk)
# 出现目标的grids，将预测正确类别的grid所在位置置为1
class_mask[b,best_n,gj,gi] = msk

tensor([1, 1, 1, 1])
tensor([0., 1., 1., 0.])
