In [1]:
import cv2
import matplotlib.pyplot as plt
import os
import numpy as np

%run Config.ipynb

In [2]:
# gen_anchor function genrates the shape of the anchors which in turn define how the model looks for text 
# in the given image

def gen_anchor(featuresize, scale):

    heights=[11, 16, 23, 33, 47, 67, 96, 137, 196, 280]
    widths=[16, 16, 16, 16, 16, 16, 16, 16, 16, 16]

    # gen k=9 anchor size (h,w)
    heights=np.array(heights).reshape(len(heights), 1)
    widths=np.array(widths).reshape(len(widths), 1)

    base_anchor=np.array([0, 0, 15, 15])
    # center x,y
    xt=(base_anchor[0]+base_anchor[2]) * 0.5
    yt=(base_anchor[1]+base_anchor[3]) * 0.5

    # x1 y1 x2 y2
    x1=xt-widths * 0.5
    y1=yt-heights * 0.5
    x2=xt+widths * 0.5
    y2=yt+heights * 0.5
    base_anchor=np.hstack((x1, y1, x2, y2)) #column-wise/horizontal stacking of multi-dim arrays
    #print(base_anchor)
    h, w=featuresize
    shift_x=np.arange(0, w) * scale # returns consecutive integers from 0-w multiplied by scale
    shift_y=np.arange(0, h) * scale # returns consecutive integers from 0-h multiplied by scale
    # apply shift
    anchor=[]
    for i in shift_y:
        for j in shift_x:
            anchor.append(base_anchor+[j, i, j, i]) # adds the list to the existing base anchor
    return np.array(anchor).reshape((-1, 4))

a=gen_anchor((10,10),1)

In [3]:
#calculates the ratio of intersection over union for two polygons(rectangles)

def cal_iou(box1, box1_area, boxes2, boxes2_area):
    
    x1=np.maximum(box1[0], boxes2[:, 0])
    x2=np.minimum(box1[2], boxes2[:, 2])
    y1=np.maximum(box1[1], boxes2[:, 1])
    y2=np.minimum(box1[3], boxes2[:, 3])

    intersection=np.maximum(x2-x1, 0)*np.maximum(y2-y1, 0)
    iou=intersection/(box1_area+boxes2_area[:]-intersection[:])
    return iou

In [4]:
def cal_overlaps(boxes1, boxes2):
    
    area1=(boxes1[:, 0]-boxes1[:, 2])*(boxes1[:, 1]-boxes1[:, 3])
    area2=(boxes2[:, 0]-boxes2[:, 2])*(boxes2[:, 1]-boxes2[:, 3])

    overlaps=np.zeros((boxes1.shape[0], boxes2.shape[0]))

    # calculate the intersection of  boxes1(anchor) and boxes2(GT box)
    for i in range(boxes1.shape[0]):
        overlaps[i][:]=cal_iou(boxes1[i], area1[i], boxes2, area2)

    return overlaps

In [5]:
def bbox_transfrom(anchors, gtboxes):

    Cy=(gtboxes[:, 1]+gtboxes[:, 3])*0.5
    Cya=(anchors[:, 1]+anchors[:, 3])*0.5
    Cx=(gtboxes[:, 0]+gtboxes[:, 2])*0.5
    Cxa=(anchors[:, 0]+anchors[:, 2])*0.5
    
    h=gtboxes[:, 3]-gtboxes[:, 1]+1.0
    ha=anchors[:, 3]-anchors[:, 1]+1.0
    wa=anchors[:, 2]-anchors[:, 0]+1.0

    Vc=(Cy-Cya)/ha
    Vh=np.log(h/ha)
    Vx=(Cx-Cxa)/wa

    return np.vstack((Vc, Vh)).transpose(), Vx

In [6]:
#calculate region-proposal-network
def cal_rpn(imgsize, featuresize, scale, gtboxes):
    
    imgh, imgw=imgsize

    # gen base anchor
    base_anchor=gen_anchor(featuresize, scale)

    # calculate iou
    overlaps=cal_overlaps(base_anchor, gtboxes)

    # init labels -1 don't care  0 is negative  1 is positive
    labels=np.empty(base_anchor.shape[0])
    labels.fill(-1)

    # for each GT box corresponds to an anchor which has highest IOU
    gt_argmax_overlaps=overlaps.argmax(axis=0)

    # the anchor with the highest IOU overlap with a GT box
    anchor_argmax_overlaps=overlaps.argmax(axis=1)
    anchor_max_overlaps=overlaps[range(overlaps.shape[0]), anchor_argmax_overlaps]

    # IOU > IOU_POSITIVE
    labels[anchor_max_overlaps > IOU_POSITIVE]=1
    # IOU <IOU_NEGATIVE
    labels[anchor_max_overlaps < IOU_NEGATIVE]=0
    # ensure that every GT box has at least one positive RPN region
    labels[gt_argmax_overlaps]=1

    # only keep anchors inside the image
    outside_anchor = np.where(
        (base_anchor[:, 0] < 0) |
        (base_anchor[:, 1] < 0) |
        (base_anchor[:, 2]>=imgw) |
        (base_anchor[:, 3]>=imgh)
    )[0]
    labels[outside_anchor]=-1
    
    num_pos=len(np.where(labels==1)[0])
    if num_pos>tar_pos:
        for i in np.random.choice(np.where(labels==1)[0], num_pos-tar_pos, replace=False):
            labels[i]=-1
            
    num_neg=len(np.where(labels==0)[0])
    if num_neg>total_anch-tar_pos:
        for i in np.random.choice(np.where(labels==0)[0], num_neg-total_anch+tar_pos, replace=False):
            labels[i]=-1

    bbox_targets, refinements=bbox_transfrom(base_anchor, gtboxes[anchor_argmax_overlaps, :])
    
    return [labels, bbox_targets, refinements], base_anchor