# Practice

In [175]:
import numpy as np
import torch
import torch.nn as nn

In [37]:
def calc_iou(box1, box2):
    """
    Param: box1, box2
    Return: Intersection over Union of two boxes
    
    Each boxes should be like [x1, y1, x2, y2],
    and x1 <= x2, y1 <= y2
    """
    
    (ax1, ay1, ax2, ay2) = box1
    (bx1, by1, bx2, by2) = box2
    
    assert (ax1 <= ax2) & (ay1 <= ay2)
    assert (bx1 <= bx2) & (by1 <= by2)
    
    cx1 = max(ax1, bx1)
    cy1 = max(ay1, by1)
    cx2 = min(ax2, bx2)
    cy2 = min(ay2, by2)
    
    assert (cx1 <= cx2) & (cy1 <= cy2)
        
    a_area = (ax2 - ax1) * (ay2 - ay1)
    b_area = (bx2 - bx1) * (by2 - by1)
    c_area = (cx2 - cx1) * (cy2 - cy1)
        
    union_area = a_area + b_area - c_area
    intersecion_area = c_area
    
    smooth = 1e-6
#     print(intersecion_area)
    
    return (intersecion_area + smooth) / (union_area + smooth)

In [99]:
def calc_iou_many_to_one(boxes, ground_truth):
    """
    Param: boxes: shape([N, 4]), ground_truth: shape([4])
    Return: IoU of boxes over on ground truth box
    
    Each boxes should be like [x1, y1, x2, y2],
    and x1 <= x2, y1 <= y2
    """
    
    (gt_x1, gt_y1, gt_x2, gt_y2) = ground_truth
    boxes_x1s = boxes[:, 0]
    boxes_y1s = boxes[:, 1]
    boxes_x2s = boxes[:, 2]
    boxes_y2s = boxes[:, 3]
    
    assert (gt_x1 <= gt_x2) & (gt_y1 <= gt_y2)
    assert (boxes_x1s <= boxes_x2s).all() & (boxes_y1s <= boxes_y2s).all()
    
    inter_x1s = torch.max(boxes_x1s, gt_x1)
    inter_y1s = torch.max(boxes_y1s, gt_y1)
    inter_x2s = torch.min(boxes_x2s, gt_x2)
    inter_y2s = torch.min(boxes_y2s, gt_y2)
    
    assert (inter_x1s <= inter_x2s).all() & (inter_y1s <= inter_y2s).all()
        
    gt_area = (gt_x2 - gt_x1) * (gt_y2 - gt_y1)
    box_areas = (boxes_x2s - boxes_x1s) * (boxes_y2s - boxes_y1s)
    intersect_areas = (inter_x2s - inter_x1s) * (inter_y2s - inter_y1s)
    
    union_area = gt_area + box_areas - intersect_areas
    intersecion_area = intersect_areas

    smooth = 1e-6    
    return (intersecion_area + smooth) / (union_area + smooth)


In [234]:
def determine_anchor_label(anchors, ground_truth, pos_threshold=0.7, neg_threshold=0.3):
    """
    Determine a label of anchors.
    
    Params:
        Anchors: array of [x1, y1, x2, y2]. shape([N, 4])
        ground_truth: ground truth bbox. shape([4])
        pos_threshold: IoU Threshold used to determine positive anchor
        neg_threshold: IoU Threshold used to determine negative anchor
    
    Return:
        Tensor of integer values denoting the label of anchors. shape([N])
        
        Positive: 1
        Negative: 0
        Neither positive or negative: -1
    """
    
    num_of_anchors = anchors.shape[0]
    labels = -torch.ones(num_of_anchors)
    
    ious = calc_iou_many_to_one(anchors, ground_truth)
    print(ious)
    
    # First positive condition: Highest IoU with ground truth
    max_index = torch.argmax(ious).item()
    labels[max_index] = 1
    
    # Second positive condition: Higher than pos_threshold or equal wihh pos_threshold IoU with ground truth
    positive_flags = torch.ge(ious, pos_threshold)
    labels[positive_flags] = 1
    
    # Negative condition: Among non-positive anchors, less than neg_threshold IoU
    negative_flags = torch.eq(labels, -1) & torch.lt(ious, neg_threshold)
    labels[negative_flags] = 0
    
    return labels


In [235]:
def rpn_loss_cls(preds, labels):
    """
    Classification loss of RPN Layer.
    Log loss between probability that anchor is object and binary ground truth label
    
    Params:
        preds: Probabilities that anchors are objects
        labels: Labels that anchors are objects
    """
    
    assert torch.all(torch.ge(preds, 0.0))
    assert torch.all(torch.le(preds, 1.0))
    
    binary_cross_entropy = nn.BCELoss(reduction='none')
    output = binary_cross_entropy(preds, labels)
    return output

In [236]:
def smooth_L1(ti, ti_star):
    """
    smooth L1 function:
        0.5 * (x^2) if abs(x) < 1
        abs(x) - 0.5 otherwise

    Params:
        ti: shape([N])
        ti_star: shape([N])
    
    Return: score: shape([N])
    """
    abs_sub = torch.abs(ti - ti_star)
    
    smaller_than_1 = torch.where(abs_sub < 1)
    greater_than_1 = torch.where(abs_sub >= 1)
    
    abs_sub[smaller_than_1] = torch.pow(abs_sub[smaller_than_1], 2) / 2
    abs_sub[greater_than_1] = abs_sub[greater_than_1] - 0.5
        
    return abs_sub

In [237]:
def rpn_loss_reg(pred_boxes, anchor_boxes, gt_box):
    # TODO: gt_box? or gt_boxes?
    """
    Regression loss of RPN Layer.
    
    Params:
        pred_boxes: Predicted boxes by RPN layer. shape([N, 4])
        anchor_boxes: Anchor boxes used by the predictions. shape([N, 4])
        gt_box: Ground truth box of image. shape([4])
    """
    
    x = pred_boxes[:, 0]
    y = pred_boxes[:, 1]
    w = pred_boxes[:, 2] - pred_boxes[:, 0]
    h = pred_boxes[:, 3] - pred_boxes[:, 1]

    x_a = anchor_boxes[:, 0]
    y_a = anchor_boxes[:, 1]
    w_a = anchor_boxes[:, 2] - anchor_boxes[:, 0]
    h_a = anchor_boxes[:, 3] - anchor_boxes[:, 1]

    x_star = gt_box[0]
    y_star = gt_box[1]
    w_star = gt_box[2] - gt_box[0]
    h_star = gt_box[3] - gt_box[1]
    
    t_x = (x - x_a) / w_a
    t_y = (y - y_a) / h_a
    t_w = torch.log(w/w_a)
    t_h = torch.log(h/h_a)
    
    t_x_star = (x_star - x_a) / w_a
    t_y_star = (y_star - y_a) / h_a
    t_w_star = torch.log(w_star/w_a)
    t_h_star = torch.log(h_star/h_a)
    
    losses = torch.zeros(anchor_boxes.shape[0])
    losses += smooth_L1(t_x, t_x_star)
    losses += smooth_L1(t_y, t_y_star)
    losses += smooth_L1(t_w, t_w_star)
    losses += smooth_L1(t_h, t_h_star)
    
    return losses

In [238]:
def multitask_loss(pred_probs,
                   pred_boxes, anchor_boxes, gt_box,
                   anchor_num=9, balance=10):
    """
    
    L(p, t) = (1/N_cls) * sigma{L_cls(pi, pi_star)} + lambda * (1/N_reg) * sigma{pi_star * L_reg(ti, ti_star)}
    """
    
    # Positive: 1 Negative: 0 Neither positive or negative: -1
    labels = determine_anchor_label(anchor_boxes, gt_box)
    
    # Only get positive and negative anchors
    valid_indices = torch.where(labels > -0.5)
    valid_labels = labels[valid_indices] # pi_star
    valid_pred_probs = pred_probs[valid_indices]
    valid_pred_boxes = pred_boxes[valid_indices]
    valid_anchor_boxes = anchor_boxes[valid_indices]
    
    cls_loss = rpn_loss_cls(valid_pred_probs, valid_labels)
    reg_loss = rpn_loss_reg(valid_pred_boxes, valid_anchor_boxes, gt_box)
    positive_reg_loss = reg_loss * valid_labels
    
    n_cls = anchor_boxes.shape[0] / anchor_num
    n_reg = anchor_boxes.shape[0]
    
    cls_term = torch.sum(cls_loss) / n_cls
    reg_term = torch.sum(positive_reg_loss) / n_reg * balance
    
    return cls_term + reg_term

In [224]:
len(pred_boxes)

5

In [254]:
gt_box = torch.tensor([2.0, 2.0, 5.0, 5.0])

pred_boxes = torch.tensor([
    [2.0, 2.0, 5.0, 4.5],
    [1.0, 4.0, 3.0, 6.0],
    [2.0, 2.0, 5.0, 6.0],
    [2.0, 2.0, 4.0, 4.0],
    [3.0, 3.0, 4.0, 4.0]
])

anchor_boxes = torch.tensor([
    [2.0, 2.0, 5.0, 4.3],
    [1.0, 4.0, 3.0, 6.0],
    [4.0, 4.0, 6.0, 6.0],
    [2.0, 2.0, 5.0, 4.5],
    [3.0, 3.0, 4.0, 4.0]
])

In [255]:
pred_probs = torch.tensor([0.1, 0.2, 0.3, 0.4, 0.5])

In [256]:
multitask_loss(pred_probs, pred_boxes, anchor_boxes, gt_box, anchor_num=2, balance=3)

tensor([0.7667, 0.0833, 0.0833, 0.8333, 0.1111])
reg_loss: tensor([0.0166, 0.7894, 0.0414, 0.1644, 2.1972])
labels: tensor([1., 0., 0., 1., 0.])
valid_indices: tensor([1., 0., 0., 1., 0.])
positive_only: tensor([0.0166, 0.0000, 0.0000, 0.1644, 0.0000])


tensor(3.7222)

In [196]:
rpn_loss_reg(pred_boxes, anchor_boxes, gt_box)

tensor([0.0000, 0.7894, 0.0414, 0.1644, 2.1972])

In [186]:
x = pred_boxes[:, 0]
y = pred_boxes[:, 1]
w = pred_boxes[:, 2] - pred_boxes[:, 0]
h = pred_boxes[:, 3] - pred_boxes[:, 1]

x_a = anchor_boxes[:, 0]
y_a = anchor_boxes[:, 1]
w_a = anchor_boxes[:, 2] - anchor_boxes[:, 0]
h_a = anchor_boxes[:, 3] - anchor_boxes[:, 1]

x_star = gt_box[0]
y_star = gt_box[1]
w_star = gt_box[2] - gt_box[0]
h_star = gt_box[3] - gt_box[1]

t_x = (x - x_a) / w_a
t_y = (y - y_a) / h_a
t_w = torch.log(w/w_a)
t_h = torch.log(h/h_a)

t_x_star = (x_star - x_a) / w_a
t_y_star = (y_star - y_a) / h_a
t_w_star = torch.log(w_star/w_a)
t_h_star = torch.log(h_star/h_a)

In [185]:
(x - x_a) / w_a

tensor([ 0.5000,  0.0000, -1.0000,  0.0000,  0.0000])

In [183]:
x_a

tensor([1., 1., 4., 2., 3.])

In [164]:
labels = determine_anchor_label(many_boxes, ground_truth)
labels

tensor([ 1., -1., -1., -1., -1.])
tensor([ 1., -1.,  1., -1., -1.])
tensor([ 1.,  0.,  1., -1.,  0.])


tensor([ 1.,  0.,  1., -1.,  0.])

In [165]:
ret = calc_iou_many_to_one(many_boxes, ground_truth)
ret

tensor([1.0000, 0.0833, 0.7500, 0.4444, 0.1111])

In [124]:
labels = -torch.ones(many_boxes.shape[0])

In [125]:
labels

tensor([-1., -1., -1., -1., -1.])

In [130]:
a = torch.randn(5)
print(a)
max_index = torch.argmax(a).item()

tensor([-0.3040,  2.2147, -0.3318,  0.5717,  1.1755])


In [131]:
max_index

1

In [187]:
labels

tensor([ 1.,  0.,  1., -1.,  0.])

In [188]:
torch.where(labels <= 0)

(tensor([1, 3, 4]),)

In [189]:
labels[torch.where(labels <= 0)]

tensor([ 0., -1.,  0.])

In [167]:
torch.abs(labels)

tensor([1., 0., 1., 1., 0.])

In [168]:
import torch.nn as nn

In [169]:
loss = nn.BCELoss()

In [170]:
in_tensor = torch.randn(3)
target = torch.empty(3).random_(2)

In [171]:
in_tensor

tensor([0.7203, 0.3384, 0.5297])

In [172]:
target

tensor([1., 0., 0.])

In [173]:
output = loss(in_tensor, target)

In [174]:
output

tensor(0.4985)