# Object Detection for hand-writing using CVTK (AML Package for Computer Vision)

In [1]:
from cvtk.utils import detection_utils
from cvtk.core import ObjectDetectionDataset

import os
import json
import pandas as pd
import numpy as np

  from ._conv import register_converters as _register_converters


Instructions for updating:
Use the retry module or similar alternatives.


In [2]:
frozen_model_path = '<path to frozen_inference_graph.pb>'
label_map_path = '<path to label_map.pbtxt>'

# frozen_model_path, label_map_path = my_detector.save(model_dir)
print("Frozen model written to path: " + frozen_model_path)
print("Labels written to path: " + label_map_path)

Frozen model written to path: D:/Projects/EY/models/frozen_inference_graph.pb
Labels written to path: D:/Projects/EY/models/label_map.pbtxt


In [12]:
out_json_filename = '<vott output json file>' 
label_dict = {'signature':1, 'others':2} # label dictionary
jpg_folder = '<path to test images>'
output_path = '<results path>'

In [4]:
## get all test image names and paths
image_paths  = []
image_names = []
for img_file in next(os.walk(jpg_folder))[2]:
    img_path = os.path.join(jpg_folder, img_file)
    image_paths.append(img_path)
    image_names.append(img_file)

In [8]:
## this step will takes while to score each image
detections = detection_utils.score_multiple(frozen_model_path, image_paths)

In [6]:
## load VOTT json file
with open(out_json_filename) as f:
    d = json.load(f)
metadata = d['frames']

In [7]:
## As the height and width per image are different, retrieve the height and width for each test image
height_dic = dict()
width_dic = dict()

for index in range(len(image_names)):
    img_name = image_names[index]
    for i in range(len(metadata[str(index)])):
        det = metadata[str(index)][i]
        height_dic[img_name] = det['height']
        width_dic[img_name] = det['width']


#### helper function for calculating precision and recall

In [15]:
def prepare_iou(values):
    bboxes = {}
    bboxes['x1'] = values['bbox_1']
    bboxes['x2'] = values['bbox_3']
    bboxes['y1'] = values['bbox_0']
    bboxes['y2'] = values['bbox_2']
    return bboxes

def cal_precision_recall_matrix(img):
    val_values = groundtruth_values[groundtruth_values['image'] == img]
    model_values = predicted_values[predicted_values['image'] == img]
#     print ('num of true boxes {}; num of detected boxes {}'.format(len(val_values), len(model_values)))
    d_matrix = np.zeros((im_height,im_width))

    if len(val_values) ==0 and len(model_values) == 0:
        precision = 'NA'
        recall = 'NA'
    elif len(model_values) == 0:
        precision = 0
        recall = 0
    elif len(val_values) == 0:
        precision = 0
        recall = 0
    else:
        for di in range(len(model_values)):
            detect = prepare_iou(model_values.iloc[di])
            x1, x2, y1, y2 = int(detect['x1']), int(detect['x2']), int(detect['y1']), int(detect['y2'])
            d_matrix[y1:y2+1, x1:x2+1] =1
            detect_area = int(d_matrix.sum())
        
        g_matrix = np.zeros((im_height, im_width))
        
        for ti in range(len(val_values)):
            gtruth = prepare_iou(val_values.iloc[ti])
            x1, x2, y1, y2 = int(gtruth['x1']), int(gtruth['x2']), int(gtruth['y1']), int(gtruth['y2'])
            g_matrix[y1:y2+1, x1:x2+1] = 1    
            g_area = int(g_matrix.sum())
        
        inter_area = (d_matrix * g_matrix).sum()
        precision = inter_area/detect_area
        recall = inter_area/g_area     
    return precision,recall

#### calculate precision and recall per image

In [17]:
for label in list(label_dict.keys()):
    
    print ('calculate precision and recall for label: {}'.format(label))
    
    ## load groundtruth handwriting boxes
    logfile = []
    for index in range(len(image_names)):
        img_name = image_names[index]
        for i in range(len(metadata[str(index)])):
            det = metadata[str(index)][i]
            if det['tags'] == [label]:
                bbox_1 = det['x1'] # xmin
                bbox_0 = det['y1'] # ymin
                bbox_3 = det['x2'] # xmax
                bbox_2 = det['y2'] # ymax

                line = [img_name, bbox_0, bbox_1, bbox_2, bbox_3]
                height_dic[img_name] = det['height']
                width_dic[img_name] = det['width']

                logfile.append(line)

    groundtruth_values = pd.DataFrame(logfile)
    groundtruth_values.columns = ['image', 'bbox_0', 'bbox_1', 'bbox_2', 'bbox_3']
    groundtruth_values.head()
    
    ## load predicted handwriting boxes
    line = []

    for index in range(len(detections)):
        detect_dict = detections[index]
        image_name = image_paths[index].split('\\')[-1]

        n_obj = 0
        detect_index = [] # record the detection index in the detections_dict
        im_height = height_dic[image_name]
        im_width = width_dic[image_name]
        for i in range(detect_dict['num_detections']):
            if detect_dict['detection_classes'][i] ==label_dict[label] and detect_dict['detection_scores'][i] > 0.5:
                detect_index.append(i)
                n_obj += 1
                ymin, xmin, ymax, xmax = detect_dict['detection_boxes'][i][0]*im_height, detect_dict['detection_boxes'][i][1]*im_width, \
                    detect_dict['detection_boxes'][i][2]*im_height, detect_dict['detection_boxes'][i][3]*im_width
                (left, right, bottom, top) = (xmin, xmax, ymin, ymax)
                line.append([image_name, detect_dict['detection_scores'][i], bottom, left, top, right])

    predicted_values = pd.DataFrame(line)
    predicted_values.columns = ['image', 'score', 'bbox_0', 'bbox_1', 'bbox_2', 'bbox_3']
    predicted_values.head()
    
    ## calculate precision and recall
    precision = []
    recall = []

    for img in image_names:
        precision_per_img, recall_per_img = cal_precision_recall_matrix(img)
        precision.append(precision_per_img)
        recall.append(recall_per_img)
        
    ## save output
    output = pd.concat([pd.DataFrame(precision), pd.DataFrame(recall)], axis = 1)
    output.columns = ['precision', 'recall']
    
    print ('write results to {}'.format(os.path.join(output_path, label+'.csv')))
    output.to_csv(os.path.join(output_path, label+'.csv'), index = False)

calculate precision and recall for label: signature
write results to D:/Projects/EY/results\signature.csv
calculate precision and recall for label: others
write results to D:/Projects/EY/results\others.csv
