In [None]:
import cv2
import keras
import keras.backend as K
from keras.backend.tensorflow_backend import set_session
from keras.callbacks import Callback
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import random
import os
import time
import glob

from sl_model import SL512, DSODSL512
from ssd_data import InputGenerator
from ssd_data import preprocess
from sl_utils import PriorUtil
from sl_training import SegLinkLoss

from sl_utils import plot_rbox, rbox_to_polygon

from utils.model import load_weights, count_parameters, calc_memory_usage
from utils.training import Logger, LearningRateDecay

mean = lambda x: np.sum(x)/len(x)

In [None]:
import pickle
from data_synthtext import GTUtility

file_name = 'gt_util_synthtext_seglink.pkl'
with open(file_name, 'rb') as f:
    gt_util = pickle.load(f)
gt_util_train, gt_util_val = gt_util.split(0.9)

In [None]:
#model = SL512()
model = DSODSL512()
image_size = model.image_size
batch_size = 24

### Ground Truth

In [None]:
_, inputs, images, data = gt_util_val.sample_random_batch(batch_size=16, input_size=image_size)

prior_util = PriorUtil(model)
gen = InputGenerator(gt_util_train, prior_util, batch_size, image_size, augmentation=False)

test_idx = 0
test_input = inputs[test_idx]
test_img = images[test_idx]
test_gt = data[test_idx]

plt.figure(figsize=[8]*2)
plt.imshow(test_img)
gt_util.plot_gt(test_gt, show_labels=False)
plt.show()

In [None]:
#weights_path = './checkpoints/201711071436_sl512_synthtext/weights.001.h5'
#weights_path = './checkpoints/201711132011_dsodsl512_synthtext/weights.001.h5'
weights_path = './checkpoints/201806021007_dsodsl512_synthtext/weights.012.h5'

segment_threshold = 0.55; link_threshold = 0.45

load_weights(model, weights_path)

In [None]:
preds = model.predict(inputs, batch_size=1, verbose=1)
test_pred = preds[test_idx]

In [None]:
test_encoded_gt = prior_util.encode(test_gt)
#prior_util.print_gt_stats()

In [None]:
prior_util.decode(test_pred, segment_threshold, link_threshold)

plt.figure(figsize=[8]*2)
plt.imshow(test_img)
#gt_util.plot_gt(test_gt, show_labels=False)
prior_util.plot_results(show_labels=False)
plt.show()

In [None]:
#np.set_printoptions(precision=0, suppress=True, formatter={'all': None})
np.set_printoptions(precision=0, suppress=True, formatter={'float': '{:.1f}'.format})

mask = test_encoded_gt[:,0] == 0

egt = test_encoded_gt[mask][:,7:23:2]
mr = test_pred[mask][:,7:23:2]
for i in range(len(mr)):
    #print(egt[i])
    #print(mr[i])
    #print(np.abs(egt[i]-mr[i]))
    #print(np.mean(np.abs(egt[i]-mr[i])))
    pass
#print(np.mean(np.abs(egt-mr)))

### Encoding

In [None]:
#for m_idx in [0,1,2,3]:
for m_idx in [2]:
    plt.figure(figsize=[16]*2)
    plt.axis('off')
    plt.imshow(test_img)
    #prior_util.prior_maps[m_idx-1].plot_locations()
    prior_util.prior_maps[m_idx].plot_locations()
    prior_util.prior_maps[m_idx].plot_boxes(range(0,200,40))
    prior_util.plot_neighbors(m_idx, [0,20], cross_layer=False)
    prior_util.plot_neighbors(m_idx, range(0,200,20), inter_layer=False)
    prior_util.plot_assignment(m_idx)
    plt.show()

In [None]:
plt.figure(figsize=[16]*2)
plt.axis('off')
plt.imshow(test_img)

#prior_util.encode(test_gt, debug=False)
    
#for m_idx in [0,1,2,3]:
for m_idx in range(len(prior_util.prior_maps)):
#for m_idx in [2]:
    #prior_util.prior_maps[m_idx-1].plot_locations()
    #prior_util.prior_maps[m_idx].plot_locations()
    #prior_util.prior_maps[m_idx].plot_boxes(range(0,200,40))
    #prior_util.plot_neighbors(m_idx, [0,20], cross_layer=False)
    #prior_util.plot_neighbors(m_idx, range(0,200,20), inter_layer=False)
    prior_util.plot_assignment(m_idx)

plt.show()

### Decoding

In [None]:
dummy_output = np.copy(test_encoded_gt)
dummy_output[:,2:4] += np.random.randn(*dummy_output[:,2:4].shape)*0.5

plt.figure(figsize=[16]*2)
ax = plt.gca()
plt.imshow(test_img)
res = prior_util.decode(dummy_output, debug=False, debug_combining=True)
#res = decode(prior_util, dummy_output, debug=False)
#prior_util.plot_gt()
prior_util.plot_results(res)
plt.axis('off'); plt.xlim(0, 512); plt.ylim(512,0)
plt.show()

In [None]:
_, inputs, images, data = gt_util_val.sample_random_batch(batch_size=16, input_size=image_size)

preds = model.predict(inputs, batch_size=1, verbose=1)

In [None]:
experiment = 'sl512_synthtext'

for fl in glob.glob('checkpoints/%s/result_*' % (experiment,)):
    #os.remove(fl)
    pass

for i in range(len(preds)):
    plt.figure(figsize=[8]*2)
    plt.axis('off')
    plt.imshow(images[i])
    res = prior_util.decode(preds[i], segment_threshold=0.55, link_threshold=0.35)
    prior_util.encode(data[i])
    #prior_util.plot_gt()
    prior_util.plot_results(res)
    #plt.savefig('checkpoints/%s/result_%03d.jpg' % (experiment, i))
    plt.show()

In [None]:
def decode(self, model_output,
            segment_threshold=0.55, link_threshold=0.35, debug=False, debug_combining=False):
    """Decode local classification and regression results to combined bounding boxes.
    
    # Arguments
        model_output: Array with SegLink model output of shape 
            (segments, 2 x segment_label + 5 x segment_offset + 16 x inter_layer_links_label 
            + 8 x cross_layer_links_label)
        segment_threshold: Threshold for filtering segment confidence, float betwen 0 and 1.
        link_threshold: Threshold for filtering link confidence, float betwen 0 and 1.

    # Return
        Array with rboxes of shape (results, x + y + w + h + theta + confidence).
    """
    # TODO: handle the case when the line is vertical, tan(theta) == inf and x_proj[max_idx] == x_proj[max_idx]
    
    segment_labels = model_output[:,0:2]
    segment_offsets = model_output[:,2:7]
    inter_layer_links_labels = model_output[:,7:23]
    cross_layer_links_labels = model_output[:,23:31]

    priors_xy = self.priors_xy
    priors_wh = self.priors_wh
    priors_variances = self.priors_variances
    inter_layer_neighbors_idxs = self.inter_layer_neighbors_idxs
    cross_layer_neighbors_idxs = self.cross_layer_neighbors_idxs
    map_offsets = self.map_offsets
    first_map_offset = map_offsets[1] # 64*64
    
    # filter segments, only pos segments
    segment_mask = segment_labels[:,1] > segment_threshold

    # filter links, pos links connected with pos segments 
    inter_layer_link_mask = (inter_layer_links_labels[:,1::2] > link_threshold) & np.repeat(segment_mask[np.newaxis, :], 8, axis=0).T
    cross_layer_link_mask = (cross_layer_links_labels[:,1::2] > link_threshold) & np.repeat(segment_mask[np.newaxis, :], 4, axis=0).T

    # all pos segments
    segment_idxs = np.ix_(segment_mask)[0]
    # all segments with pos links
    #inter_layer_link_idxs = np.ix_(np.logical_and.reduce(inter_layer_link_mask, axis=1))[0]
    #cross_layer_link_idxs = np.ix_(np.logical_and.reduce(cross_layer_link_mask, axis=1))[0]
    
    # decode segments
    offsets = segment_offsets[segment_idxs] # delta(x,y,w,h,theta)_s
    offsets = np.copy(offsets)
    offsets[:,:4] *= priors_variances[segment_idxs] # variances

    rboxes_s = np.empty([len(offsets), 5]) # (x,y,w,h,theta)_s
    rboxes_s[:,0:2] = priors_wh[segment_idxs] * offsets[:,0:2] + priors_xy[segment_idxs]
    rboxes_s[:,2:4] = priors_wh[segment_idxs] * np.exp(offsets[:,2:4]) # priors_wh is filled with a_l by default
    rboxes_s[:,4] = offsets[:,4]
    rboxes_s_dict = {segment_idxs[i]: rboxes_s[i] for i in range(len(segment_idxs))}

    nodes = list(segment_idxs)
    adjacency = {n:set() for n in segment_idxs}
    for s_idx in segment_idxs:
        # collect inter layer links
        for n in np.ix_(inter_layer_link_mask[s_idx])[0]:
            n_idx = inter_layer_neighbors_idxs[s_idx, n]
            if n_idx in nodes:
                # since we add only links to pos segments, they are also valid
                adjacency[s_idx].add(n_idx)
                adjacency[n_idx].add(s_idx)
        # collect cross layer links
        if s_idx >= first_map_offset:
            for n in np.ix_(cross_layer_link_mask[s_idx])[0]:
                n_idx = cross_layer_neighbors_idxs[s_idx-first_map_offset, n]
                if n_idx in nodes:
                    adjacency[s_idx].add(n_idx)
                    adjacency[n_idx].add(s_idx)
    
    # find connected components
    ids = {n:None for n in segment_idxs}

    def dfs(node, group_id):
        if ids[node] == None:
            ids[node] = group_id
            for a in adjacency[node]:
                dfs(a, group_id)

    for i in range(len(nodes)):
        dfs(nodes[i], i)
    groups = {i:[] for i in set(ids.values())}
    for k, v in ids.items():
        groups[v].append(k)
    
    # combine segments
    results = []
    for f, k in enumerate(groups):
        # decoded segment rboxes in group
        idxs = np.array(groups[k])
        rboxes_s = np.array([rboxes_s_dict[i] for i in idxs]) # (x,y,w,h,theta)_s
        
        # step 2, algorithm 1
        #print('rboxes_s[:,4]', rboxes_s[:,4].shape)
        theta_b = mean(rboxes_s[:,4])

        # step 3, algorithm 1, find minimizing b in y = a*x + b
        # minimize sum (a*x_i + b - y_i)^2 leads to b = mean(y_i - a*x_i)
        a = np.tan(-theta_b)
        b = mean(rboxes_s[:,1] - a * rboxes_s[:,0])

        # step 4, algorithm 1, project centers on the line
        # construct line y_p = a_p*x_p + b_p that contains the point and is orthognonal to y = a*x + b
        # with a_p = -1/a and b_p = y_p - a_p * x_p we get th point of intersection
        # x_s = (b_p - b) / (a - a_p) 
        # y_s = a * x_s + b
        x_proj = (rboxes_s[:,1] + 1/a * rboxes_s[:,0] - b) / (a + 1/a)
        y_proj = a * x_proj + b

        # find the extreme points
        max_idx = np.argmax(x_proj)
        min_idx = np.argmin(x_proj)
        x_p, y_p = x_proj[min_idx], y_proj[min_idx]
        x_q, y_q = x_proj[max_idx], y_proj[max_idx]

        # step 5 to 10, algorithm 1, compute the rbox values
        w_p = rboxes_s[min_idx,2]
        w_q = rboxes_s[max_idx,2]

        x_b = (x_p + x_q) / 2
        y_b = (y_p + y_q) / 2
        w_b = ((x_q - x_p)**2 + (y_q - y_p)**2)**0.5 + (w_p + w_q) / 2
        h_b = mean(rboxes_s[:,3])
        
        rbox_b = [x_b, y_b, w_b, h_b, theta_b]
        
        # confidence
        confs_s = segment_labels[idxs,1]
        #conf_b = mean(confs_s)
        # weighted confidence by area of segments
        boxes_s_area = rboxes_s[:, 2]*rboxes_s[:, 3]
        conf_b = np.sum(confs_s * boxes_s_area) / np.sum(boxes_s_area)

        results.append(rbox_b + [conf_b])
        
        # for debugging geometric construction
        if debug_combining:
            ax = plt.gca()
            for rbox in rboxes_s:
                c = 'grbck'
                c = 'mkgcyb'
                c = c[f%len(c)]
                plot_rbox(rbox, color=c, linewidth=1)
                # segment centers
                plt.plot(rbox[0], rbox[1], 'o'+c, markersize=4)
                # projected segment centers
                plt.plot(x_proj, y_proj, 'oy', markersize=4)
            # lines
            x_l = np.array([0,512])
            y_l = a * x_l + b
            plt.plot(x_l, y_l, 'r')
            # endpoints
            plt.plot(x_p, y_p, 'or', markersize=6)
            plt.plot(x_q, y_q, 'or', markersize=6)
            # combined box
            plot_rbox(rbox_b, color='r', linewidth=2)

    if len(results) > 0:
        results = np.asarray(results)
    else:
        results = np.empty((0,6))
    self.results = results

    # debug
    if debug:
        ax = plt.gca()
        
        # plot positive links
        inter_layer_link_mask = inter_layer_links_labels[:,1::2] > link_threshold
        for idx in range(len(inter_layer_link_mask)):
            p1 = priors_xy[idx]
            for n_idx in inter_layer_neighbors_idxs[idx][inter_layer_link_mask[idx]]:
                p2 = priors_xy[n_idx]
                plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'y-', linewidth=2)
        
        cross_layer_link_mask = cross_layer_links_labels[:,1::2] > link_threshold
        for idx in range(len(cross_layer_neighbors_idxs)):
            p1 = priors_xy[idx+first_map_offset]
            for n_idx in cross_layer_neighbors_idxs[idx][cross_layer_link_mask[idx+first_map_offset]]:
                p2 = priors_xy[n_idx]
                plt.plot([p1[0], p2[0]], [p1[1], p2[1]], '-', color='orange', linewidth=2)
                
        # plot segments
        keys = list(rboxes_s_dict.keys())
        for k in keys:
            plot_rbox(rboxes_s_dict[k], color='k', linewidth=2)

        # plot links
        for k in keys:
            p1 = rboxes_s_dict[k][:2]
            for m in adjacency[k]:
                p2 = rboxes_s_dict[m][:2]
                plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'm-', linewidth=1)

        # plot priors
        for k in keys:
            p1 = rboxes_s_dict[k][:2]
            p2 = priors_xy[k]
            plt.plot([p1[0]], [p1[1]], 'mo', markersize=4)
            plt.plot([p2[0]], [p2[1]], 'go', markersize=4)
            plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'g-', linewidth=1)
    
    return results


plt.figure(figsize=[14]*2)
ax = plt.gca()
plt.imshow(test_img)
#res = prior_util.decode(dummy_output, debug=False)
res = decode(prior_util, dummy_output, debug=False)
#res = decode(prior_util, test_pred, debug=False)
#prior_util.plot_gt()
prior_util.plot_results(res)
plt.axis('off'); plt.xlim(0, 512); plt.ylim(512,0)
plt.show()

#print(res)

In [None]:
np.set_printoptions(precision=0, suppress=True, formatter={'float': '{:7.3f}'.format})

i = 6
plt.figure(figsize=[14]*2)
plt.axis('off')
plt.imshow(images[i])
res = decode(prior_util, preds[i], segment_threshold=0.55, link_threshold=0.35, debug=False, debug_combining=False)
prior_util.encode(data[i])
prior_util.plot_gt()
prior_util.plot_results(res)
#plt.savefig('checkpoints/%s/result_%03d.jpg' % (experiment, i))
for i in range(len(prior_util.prior_maps)):
    #prior_util.plot_neighbors(i, list(range(10))*5)
    pass
print(res)
plt.show()

In [None]:
%%timeit
res = decode(prior_util, preds[i], segment_threshold=0.55, link_threshold=0.35, debug=False, debug_combining=False)

In [None]:
plt.figure(figsize=[16]*2)

decode(prior_util, test_pred, segment_threshold=0.55, link_threshold=0.35, debug=False, debug_combining=True)

#plt.show()

#plt.figure(figsize=[24,24])
plt.imshow(test_img)
#gt_util.plot_gt(test_gt, show_labels=False)
#prior_util.plot_results(show_labels=False)
plt.show()

In [None]:
def encode(self, gt_data, debug=False):
    """Encode ground truth polygones to segments and links for local classification and regression.

    # Arguments
        gt_data: shape (boxes, 4 xy + classes)

    # Return
        shape (priors, 2 segment_labels + 5 segment_offsets + 2*8 inter_layer_links_labels + 2*4 cross_layer_links_labels)
    """

    rboxes = []
    polygons = []
    for word in gt_data:
        xy = np.reshape(word[:8], (-1, 2))
        xy = np.copy(xy) * self.image_size
        polygons.append(xy)
        rbox = polygon_to_rbox(xy)
        rboxes.append(rbox)
    rboxes = self.gt_rboxes = np.array(rboxes)
    polygnos = self.gt_polygons = np.array(polygons)

    # compute segments
    for i in range(len(self.prior_maps)):
        m = self.prior_maps[i]

        # compute priors
        #m.compute_priors()

        num_priors = len(m.priors)

        # assigne gt to priors
        a_l = m.minmax_size[0]
        match_indices = np.full(num_priors, -1, dtype=np.int32)
        min_lhs_eq_11 = np.full(num_priors, 1e6, dtype=np.float32)
        for j in range(len(rboxes)): # ~12.9 ms
            cx, cy, w, h, theta = rboxes[j]
            c = rboxes[j,:2]
            # constraint on ratio between box size and word height, equation (11)
            lhs_eq_11 = max(a_l/h, h/a_l)
            if lhs_eq_11 <= 1.5:
                R = rot_matrix(theta)
                for k in range(num_priors): # hurts
                    # is center of prior is in gt rbox
                    d = np.abs(np.dot(m.priors_xy[k]-c, R.T))
                    if d[0] < w/2. and d[1] < h/2.:
                        # is lhs of equation (11) minimal for prior
                        if lhs_eq_11 < min_lhs_eq_11[k]:
                            min_lhs_eq_11[k] = lhs_eq_11
                            match_indices[k] = j   
        m.match_indices = match_indices

        segment_mask = match_indices != -1

        # segment labels
        m.segment_labels = np.empty((num_priors, 2), dtype=np.int8)
        m.segment_labels[:, 0] = np.logical_not(segment_mask)
        m.segment_labels[:, 1] = segment_mask

        # compute offsets only for assigned boxes
        m.segment_offsets = np.zeros((num_priors, 5))
        pos_segment_idxs = np.nonzero(segment_mask)[0]
        for j in pos_segment_idxs: # box_idx # ~4 ms
            gt_idx = match_indices[j]
            rbox = rboxes[gt_idx]
            polygon = polygons[gt_idx]
            cx, cy, w, h, theta = rbox
            R = rot_matrix(theta)
            prior_x, prior_y = m.priors_xy[j]
            prior_w, prior_h = m.priors_wh[j]

            # step 2 figuer 5, rotate word anticlockwise around the center of prior
            d = rbox[:2] - m.priors_xy[j]
            poly_loc = rbox_to_polygon([*d, w, h, theta])
            poly_loc_easy = polygon - m.priors_xy[j]

            poly_loc_rot = np.dot(poly_loc, R.T)

            # step 3 figure 5, crop word to left and right of prior
            poly_loc_coped = np.copy(poly_loc_rot)
            poly_loc_coped[:,0] = np.clip(poly_loc_coped[:,0], -prior_w/2., prior_w/2.)

            # step 4 figure 5, rotate croped word box clockwisely
            poly_loc_rot_back = np.dot(poly_loc_coped, R)
            rbox_loc_rot_back = polygon_to_rbox(poly_loc_rot_back)

            # encode, solve (3) to (7) to get local offsets
            offset = np.array([*(rbox_loc_rot_back[:2]/a_l), 
                               *(np.log(rbox_loc_rot_back[2:4]/a_l)), 
                               rbox_loc_rot_back[4]])
            offset[:4] /= m.priors[j,-4:] # variances
            m.segment_offsets[j] = offset

            # for debugging local geometry
            if debug:
                prior_poly_loc = np.array([[-prior_w, +prior_h],
                                           [+prior_w, +prior_h],
                                           [+prior_w, -prior_h],
                                           [-prior_w, -prior_h]])/2.
                plt.figure(figsize=[10]*2)
                ax = plt.gca()
                ax.add_patch(plt.Polygon(prior_poly_loc, fill=False, edgecolor='r', linewidth=1))
                ax.add_patch(plt.Polygon(poly_loc, fill=False, edgecolor='b', linewidth=1))
                ax.add_patch(plt.Polygon(np.dot(poly_loc, R.T), fill=False, edgecolor='k', linewidth=1))
                #ax.add_patch(plt.Polygon(poly_loc_easy, fill=False, edgecolor='r', linewidth=1))
                #ax.add_patch(plt.Polygon(np.dot(poly_loc_easy, R.T), fill=False, edgecolor='y', linewidth=1))
                ax.add_patch(plt.Polygon(poly_loc_coped, fill=False, edgecolor='c', linewidth=1))
                ax.add_patch(plt.Polygon(poly_loc_rot_back, fill=False, edgecolor='y', linewidth=1))
                lim = 50; plt.xlim(-lim,lim); plt.ylim(-lim,lim); plt.grid()
                plt.show()
                break

        # compute link labels
        m.inter_layer_links_labels = np.zeros((num_priors,16), dtype=np.int8)
        m.cross_layer_links_labels = np.zeros((num_priors,8), dtype=np.int8)
        if i > 0:
            previous_map = self.prior_maps[i-1]
        # we only have to check neighbors if we are positive
        for idx in pos_segment_idxs:
            neighbor_idxs = m.inter_layer_neighbors_idxs[idx]
            for n, neighbor_idx in enumerate(neighbor_idxs):
                # valide neighbors
                if m.inter_layer_neighbors_valide[idx,n]:
                    # neighbor matched to the same word
                    if match_indices[idx] == match_indices[neighbor_idx]:
                        # since we are positive and match to the same word, neighbor has to be positive
                        m.inter_layer_links_labels[idx, n*2+1] = 1
            # would be nice, but we refere to invalide neighbors
            #label = m.inter_layer_neighbors_valide[idx] & (match_indices[neighbor_idxs] == match_indices[idx])
            #m.inter_layer_links_labels[idx, 1::2] = label

            if i > 0:
                neighbor_idxs = m.cross_layer_neighbors_idxs[idx]
                for n, neighbor_idx in enumerate(neighbor_idxs):
                    # cross layer neighbors are always valide
                    if match_indices[idx] == previous_map.match_indices[neighbor_idx]:
                        m.cross_layer_links_labels[idx, n*2+1] = 1

        m.inter_layer_links_labels[:,::2] = np.logical_not(m.inter_layer_links_labels[:,1::2])
        m.cross_layer_links_labels[:,::2] = np.logical_not(m.cross_layer_links_labels[:,1::2])

    # collect encoded ground truth
    maps = self.prior_maps
    segment_labels = np.concatenate([m.segment_labels for m in maps])
    segment_offsets = np.concatenate([m.segment_offsets for m in maps])
    inter_layer_links_labels = np.concatenate([m.inter_layer_links_labels for m in maps])
    cross_layer_links_labels = np.concatenate([m.cross_layer_links_labels for m in maps])
    return np.concatenate([segment_labels, segment_offsets, inter_layer_links_labels, cross_layer_links_labels], axis=1)

In [None]:
def plot_assignment(self, map_idx=None):
    """Draw the combined bounding boxes in the current figure.

    # Arguments
        map_idx: The index of the considered ProroMap.
            If None, all maps are considered.
    """
    ax = plt.gca()
    # groud truth polygones
    for p in self.gt_polygons:
        ax.add_patch(plt.Polygon(p, fill=False, edgecolor='y', linewidth=4))
    # groud truth rboxes
    rboxes = self.gt_rboxes
    for rbox in rboxes:
        box = rbox_to_polygon(rbox)
        ax.add_patch(plt.Polygon(box, fill=False, edgecolor='b', linewidth=2))
    plt.plot(rboxes[:,0], rboxes[:,1], 'go',  markersize=4)
    m = self.prior_maps[map_idx]
    # assigned boxes
    for idx in np.nonzero(m.segment_labels[:, 1])[0]:
        p_prior = m.priors_xy[idx]
        p_word = rboxes[m.match_indices[idx]][:2]
        plt.plot([p_prior[0], p_word[0]], [p_prior[1], p_word[1]], 'm-', linewidth=1)
        #plt.plot([p_word[0]], [p_word[1]], 'ro',  markersize=8)
    # links
    labels = m.inter_layer_links_labels[:,1::2]
    idxs = np.nonzero(np.any(labels, axis=1))[0]
    for idx in idxs:
        for n_idx in m.inter_layer_neighbors_idxs[idx, np.nonzero(labels[idx])[0]]:
            x, y = m.priors_xy[idx]
            n_x, n_y = m.priors_xy[n_idx]
            plt.plot([x, n_x], [y, n_y], '-c', linewidth=1)
    if map_idx > 0:
        n_m = self.prior_maps[map_idx-1]
        labels = m.cross_layer_links_labels[:,1::2]
        idxs = np.nonzero(np.any(labels, axis=1))[0]
        for idx in idxs:
            x, y = m.priors_xy[idx]
            for n_idx in m.cross_layer_neighbors_idxs[idx, np.nonzero(labels[idx])[0]]:
                n_x, n_y = n_m.priors_xy[n_idx]
                plt.plot([x, n_x], [y, n_y], '-c', linewidth=1)

prior_util.encode(test_gt, debug=True)
plt.figure(figsize=[8]*2)

plt.imshow(test_img)

plot_assignment(prior_util, 1)
plt.show()

In [None]:
# backup

def decode(self, model_output,
           #segment_labels, segment_offsets, inter_layer_links_labels, cross_layer_links_labels,
            segment_threshold=0.55, link_threshold=0.35, debug=False):
    """Decode local classification and regression results to combined bounding boxes.

    # Return
        Array with rboxes of shape (results, x + y + w + h + theta).
    """
    # TODO: handle the case when the line is vertical, tan(theta) == inf and x_proj[max_idx] == x_proj[max_idx]

    segment_labels = model_output[:,0:2]
    segment_offsets = model_output[:,2:7]
    inter_layer_links_labels = model_output[:,7:23]
    cross_layer_links_labels = model_output[:,23:31]

    map_offsets = self.map_offsets

    # filter segments, only pos segments
    segment_mask = segment_labels[:,1] > segment_threshold

    # filter links, pos links connected with pos segments 
    #inter_layer_link_mask = (inter_layer_links_labels[:,1::2] > link_threshold) & np.tile(segment_mask[:,np.newaxis], (1, 8))
    #cross_layer_link_mask = (cross_layer_links_labels[:,1::2] > link_threshold) & np.tile(segment_mask[map_offsets[1]:,np.newaxis], (1, 4))
    #cross_layer_link_mask = (cross_layer_links_labels[:,1::2] > link_threshold) & np.tile(segment_mask[:,np.newaxis], (1, 4))
    inter_layer_link_mask = (inter_layer_links_labels[:,1::2] > link_threshold) & np.repeat(segment_mask[np.newaxis, :], 8, axis=0).T
    cross_layer_link_mask = (cross_layer_links_labels[:,1::2] > link_threshold) & np.repeat(segment_mask[np.newaxis, :], 4, axis=0).T
    
    # all pos segments
    segment_idxs = np.ix_(segment_mask)[0]
    # all segments with pos links
    #inter_layer_link_idxs = np.argwhere(np.any(inter_layer_link_mask, axis=1))[:,0]
    #cross_layer_link_idxs = np.argwhere(np.any(cross_layer_link_mask, axis=1))[:,0]
    #inter_layer_link_idxs = np.ix_(np.logical_and.reduce(inter_layer_link_mask, axis=1))[0]
    #cross_layer_link_idxs = np.ix_(np.logical_and.reduce(cross_layer_link_mask, axis=1))[0]
    
    nodes = list(segment_idxs)
    adjacency = {n:set() for n in segment_idxs}
    rboxes_s_dict = {}
    
    for i in range(len(self.prior_maps)):
        m = self.prior_maps[i]
        sl = slice(map_offsets[i], map_offsets[i+1])

        # decode segments
        mask = segment_mask[sl]
        in_map_idxs = np.where(mask)[0]
        offsets = segment_offsets[sl][mask] # delta(x,y,w,h,theta)_s
        offsets = np.copy(offsets)
        offsets[:,:4] *= m.priors_variances[mask] # variances


        rboxes_s = np.empty([len(offsets), 5]) # (x,y,w,h,theta)_s
        rboxes_s[:,0:2] = m.priors_wh[mask] * offsets[:,0:2] + m.priors_xy[mask]
        rboxes_s[:,2:4] = m.priors_wh[mask] * np.exp(offsets[:,2:4]) # priors_wh is filled with a_l by default
        rboxes_s[:,4] = offsets[:,4]
        for idx, rbox in zip(in_map_idxs+map_offsets[i], rboxes_s):
            rboxes_s_dict[idx] = rbox

        # collect inter layer links
        for s_in_map_idx, n in np.array(np.where(inter_layer_link_mask[sl])).T:
            n_in_map_idx = m.inter_layer_neighbors_idxs[s_in_map_idx, n]
            s_idx = s_in_map_idx + map_offsets[i]
            n_idx = n_in_map_idx + map_offsets[i]
            if n_idx in nodes:
                # since we add only links to pos segments, they are also valide
                adjacency[s_idx].add(n_idx)
                adjacency[n_idx].add(s_idx)
        
        # collect cross layer links
        if i > 0:
            #sl = slice(map_offsets[i] - map_offsets[1], map_offsets[i+1] - map_offsets[1])
            sl = slice(map_offsets[i], map_offsets[i+1])
            for s_in_map_idx, n in np.array(np.where(cross_layer_link_mask[sl])).T:
                n_in_map_idx = m.cross_layer_neighbors_idxs[s_in_map_idx, n]
                s_idx = s_in_map_idx + map_offsets[i]
                n_idx = n_in_map_idx + map_offsets[i-1]
                if n_idx in nodes:
                    adjacency[s_idx].add(n_idx)
                    adjacency[n_idx].add(s_idx)

    # find connected components
    ids = {n:None for n in segment_idxs}

    def dfs(node, group_id):
        if ids[node] == None:
            ids[node] = group_id
            for a in adjacency[node]:
                dfs(a, group_id)

    for i in range(len(nodes)):
        dfs(nodes[i], i)
    groups = {i:[] for i in set(ids.values())}
    for k, v in ids.items():
        groups[v].append(k)
    
    # combine segments
    rboxes_b = []
    for f, k in enumerate(groups):
        # decoded segment rboxes in group
        idxs = np.array(groups[k])
        rboxes_s = np.array([rboxes_s_dict[i] for i in idxs]) # (x,y,w,h,theta)_s
        n = len(rboxes_s)

        # step 2, algorithm 1
        theta_b = mean(rboxes_s[:,4])

        # step 3, algorithm 1, find minimizing b in y = a*x + b
        # minimize sum (a*x_i + b - y_i)^2 leads to b = mean(y_i - a*x_i)
        a = np.tan(-theta_b)
        b = mean(rboxes_s[:,1] - a * rboxes_s[:,0])

        # step 4, algorithm 1, project centers on the line
        # construct line y_p = a_p*x_p + b_p that contains the point and is orthognonal to y = a*x + b
        # with a_p = -1/a and b_p = y_p - a_p * x_p we get th point of intersection
        # x_s = (b_p - b) / (a - a_p) 
        # y_s = a * x_s + b
        x_proj = (rboxes_s[:,1] + 1/a * rboxes_s[:,0] - b) / (a + 1/a)
        y_proj = a * x_proj + b

        # find the extreme points
        max_idx = np.argmax(x_proj)
        min_idx = np.argmin(x_proj)
        x_p, y_p = x_proj[min_idx], y_proj[min_idx]
        x_q, y_q = x_proj[max_idx], y_proj[max_idx]

        # step 5 to 10, algorithm 1, compute the rbox values
        w_p = rboxes_s[min_idx,2]
        w_q = rboxes_s[max_idx,2]

        x_b = (x_p + x_q) / 2
        y_b = (y_p + y_q) / 2
        w_b = ((x_q - x_p)**2 + (y_q - y_p)**2)**0.5 + (w_p + w_q) / 2
        h_b = mean(rboxes_s[:,3])

        rbox_b = np.array([x_b, y_b, w_b, h_b, theta_b])
        rboxes_b.append(rbox_b)

        # for debugging geometric construction
        if debug:
            ax = plt.gca()
            for rbox in rboxes_s:
                box = rbox_to_polygon(rbox)
                c = 'grbck'
                c = 'mkgcyb'
                c = c[f%len(c)]
                ax.add_patch(plt.Polygon(box, fill=False, edgecolor=c, linewidth=1))
                # segment centers
                plt.plot(rbox[0], rbox[1], 'o'+c,  markersize=4)
                # projected segment centers
                plt.plot(x_proj, y_proj, 'oy',  markersize=4)
            # lines
            x_l = np.array([0,512])
            y_l = a * x_l + b
            plt.plot(x_l, y_l, 'r')
            # endpoints
            plt.plot(x_p, y_p, 'or', markersize=8)
            plt.plot(x_q, y_q, 'or', markersize=8)
            # combined box
            box = rbox_to_polygon(rbox_b)
            ax.add_patch(plt.Polygon(box, fill=False, edgecolor='r', linewidth=2))

    rboxes_b = np.array(rboxes_b)
    self.rboxes_b = rboxes_b
    
    # debug
    if False:
    # plot positive links
        xy = np.concatenate([m.priors_xy for m in self.prior_maps])

        inter_layer_neighbors_idxs = np.concatenate([ 
            self.prior_maps[i].inter_layer_neighbors_idxs + map_offsets[i] for i in range(len(self.prior_maps)) ])
        inter_layer_link_mask = inter_layer_links_labels[:,1::2] > link_threshold
        for idx in range(len(inter_layer_link_mask)):
            n_idxs = inter_layer_neighbors_idxs[idx][inter_layer_link_mask[idx]]
            p1 = xy[idx]
            for n_idx in n_idxs:
                p2 = xy[n_idx]
                plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'y-', linewidth=2)

        #cross_layer_neighbors_idxs = np.concatenate([
        #    self.prior_maps[i].cross_layer_neighbors_idxs + map_offsets[i] for i in range(1, len(self.prior_maps)) ])
        cross_layer_neighbors_idxs = np.concatenate([
            self.prior_maps[i].cross_layer_neighbors_idxs + map_offsets[i-1] for i in range(1, len(self.prior_maps)) ])
        cross_layer_link_mask = cross_layer_links_labels[:,1::2] > link_threshold
        print('cross_layer_link_mask', len(cross_layer_link_mask))
        print('cross_layer_neighbors_idxs', len(cross_layer_neighbors_idxs))
        for idx in range(len(cross_layer_neighbors_idxs)):
            n_idxs = cross_layer_neighbors_idxs[idx][cross_layer_link_mask[idx+map_offsets[1]]]
            p1 = xy[idx+map_offsets[1]]
            for n_idx in n_idxs:
                try:
                    p2 = xy[n_idx]
                    plt.plot([p1[0], p2[0]], [p1[1], p2[1]], '-', color='orange', linewidth=2)
                except Exception as e:
                    print(e)

        # plot segments
        keys = list(rboxes_s_dict.keys())
        ax = plt.gca()
        for k in keys:
            box = rbox_to_polygon(rboxes_s_dict[k])
            ax.add_patch(plt.Polygon(box, fill=False, edgecolor='k', linewidth=2))

        # plot links
        for k in keys:
            p1 = rboxes_s_dict[k][:2]
            for m in adjacency[k]:
                p2 = rboxes_s_dict[m][:2]
                plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'm-', linewidth=1)

        # plot priors
        xy = np.concatenate([m.priors_xy for m in self.prior_maps])
        for k in keys:
            p1 = rboxes_s_dict[k][:2]
            p2 = xy[k]
            plt.plot([p1[0]], [p1[1]], 'mo', markersize=4)
            plt.plot([p2[0]], [p2[1]], 'go', markersize=4)
            plt.plot([p1[0], p2[0]], [p1[1], p2[1]], 'g-', linewidth=1)
            #print(k, type(k))
    
    return rboxes_b

def plot_results(self, results=None):
    if results is None:
        results = self.rboxes_b
    ax = plt.gca()
    for rbox_b in results:
        box = rbox_to_polygon(rbox_b)
        ax.add_patch(plt.Polygon(box, fill=False, edgecolor='r', linewidth=2))

        
def plot_linking(self):
    
    for i in range(len(self.prior_maps)):
        m = self.prior_maps[i]
        
#i = 6
#plt.figure(figsize=[14]*2)
#plt.imshow(images[i])
#res = decode(prior_util, preds[i], segment_threshold=0.55, link_threshold=0.35, debug=False)
#prior_util.encode(data[i])
##prior_util.plot_gt()
#plot_results(prior_util, res)
#plot_linking(prior_util)
##plt.savefig('checkpoints/%s/result_%03d.jpg' % (experiment, i))
#for i in range(len(prior_util.prior_maps)):
#    #prior_util.plot_neighbors(i, list(range(10))*5)
#    pass
#plt.show()

plt.figure(figsize=[14]*2)
ax = plt.gca()
plt.imshow(test_img)
#res = prior_util.decode(dummy_output, debug=False)
#res = decode(prior_util, dummy_output, debug=False)
res = decode(prior_util, test_pred, debug=False)
#prior_util.plot_gt()
prior_util.plot_results(res)
plt.axis('off'); plt.xlim(0, 512); plt.ylim(512,0)
plt.show()

#print(res)

In [None]:
%%timeit
res = decode(prior_util, preds[i], segment_threshold=0.55, link_threshold=0.35, debug=False)

In [None]:
dummy_output

In [None]:
test_pred

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model.png', show_shapes=False, show_layer_names=True)

In [None]:
dummy = np.array([dummy_output, dummy_output, dummy_output, dummy_output])

dummy_batch_size = dummy.shape[0]

n_seg = 64*64+32*32+16*16+8*8+4*4+2*2+1*1
n_inter = 64*64+32*32+16*16+8*8+4*4+2*2+1*1
n_cross = 32*32+16*16+8*8+4*4+2*2+1*1
print('seg ', n_seg * dummy_batch_size, 21844)
print('link', (n_inter*8 + n_cross*4 ) * dummy_batch_size, 196592)

In [None]:
for name in prior_util.source_layers_names:
    print('%-10s %s' % (name, model.get_layer(name).output_shape[1:3]))

In [None]:
%%timeit
x, y = next(gen.generate())

In [None]:
%%timeit
result = prior_util.decode(dummy_output, segment_threshold=0.9, link_threshold=0.7, debug=False);

In [None]:
# profile encoding batch
from cProfile import Profile
pr = Profile()
pr.enable()
x, y = next(gen.generate())
pr.disable()
pr.print_stats(sort='cumulative')

In [None]:
# profile decoding
from cProfile import Profile
pr = Profile()
pr.enable()
for i in range(1000):
    #results = prior_util.decode(test_encoded_gt)
    result = prior_util.decode(dummy_output, segment_threshold=0.55, link_threshold=0.35, debug=False)
pr.disable()
pr.print_stats(sort='cumulative')

In [None]:
j = 0
for img_batch, gt_batch in gen.generate():
    #print(np.max(gt_batch[:,:,2:7], axis=(0,1)))
    if np.any(np.isnan(gt_batch[:,:,2:7])):
        print(gt_batch[:,:,2:7])
    
    if np.max(gt_batch[:,:,1]) > 1.0:
        print(gt_batch.shape, np.max(gt_batch[:,:,1]), np.sum(gt_batch[:,:,1]))
    
    j += 1
    if j > 10: break

In [None]:
from sl_utils import polygon_to_rbox, rbox_to_polygon

xy = test_gt[1][:8]
#print(xy)
xy = np.reshape(xy, (-1, 2))
print(xy)
rbox = polygon_to_rbox(xy)
print(rbox)
box = rbox_to_polygon(rbox)
print(box)

plt.figure()
ax = plt.gca()
ax.add_patch(plt.Polygon(xy, fill=False, edgecolor='r'))
ax.add_patch(plt.Polygon(box, fill=False, edgecolor='b'))
#plt.xlim(-0,1.2); plt.ylim(-0,1.2)
#plt.xlim(0.4,0.6); plt.ylim(0.5,0.8)
plt.show()

In [None]:
# plot line segments
from matplotlib.collections import LineCollection
n_idxs = inter_layer_neighbors_idxs[idx][inter_layer_link_mask[idx]]
p2s = priors_xy[n_idxs]
lines = np.array([np.tile(p1,(len(p2s),1)),p2s]).transpose((1,0,2))
ax.add_collection(LineCollection(lines, colors='y', linewidths=2))

In [None]:
# memory usage
s = ''
fs = '%-20s %5d kb\n'
s += fs % ('priors', prior_util.priors.nbytes/1024)
s += fs % ('input image', test_img.nbytes/1024)
s += fs % ('encoded gt / output', test_encoded_gt.nbytes/1024)
print(s)
calc_memory_usage(model)

### Generate some samples

In [None]:
class PriorUtilDummy(object):
    def encode(self, gt_data, overlap_threshold=0.5, debug=False):
        return np.zeros((10,10))

gen = InputGenerator(gt_util_train, PriorUtilDummy(), batch_size=8, input_size=(512, 512), 
        saturation_var=0.5,
        brightness_var=0.5,
        contrast_var=0.5,
        lighting_std=0.5,
        hflip_prob=0.0,
        vflip_prob=0.0,
        do_crop=True,
        add_noise=True,
        crop_area_range=[0.9, 1.0],
        aspect_ratio_range=[3.9/3, 4.1/3])

g = gen.generate()
inputs, targets = next(g)

In [None]:
batch_size = 16
print('queue size %0.3f gb' % (inputs[0].nbytes*batch_size/(1024**3),))
print('num_batches', gen.num_batches)