# Object Detection for hand-writing using CVTK (AML Package for Computer Vision)

## Scoring

In [69]:
from cvtk.utils import detection_utils
from cvtk.core import ObjectDetectionDataset

import os
import json
import pandas as pd
import numpy as np

In [2]:
frozen_model_path = 'D:/Projects/EY/models/frozen_inference_graph.pb'
label_map_path = 'D:/Projects/EY/models/label_map.pbtxt'

# frozen_model_path, label_map_path = my_detector.save(model_dir)
print("Frozen model written to path: " + frozen_model_path)
print("Labels written to path: " + label_map_path)

Frozen model written to path: D:/Projects/EY/models/frozen_inference_graph.pb
Labels written to path: D:/Projects/EY/models/label_map.pbtxt


In [70]:
out_json_filename = 'D:/Projects/EY/data/test.json' 
look_up = {1: 'signature', 2: 'others'}
jpg_folder = 'D:/Projects/EY/data/test_output/JPEGImages'
output_path = 'D:/Projects/EY/results'

  if not xml:


In [72]:
image_paths  = []
image_names = []
for img_file in next(os.walk(jpg_folder))[2]:
    img_path = os.path.join(jpg_folder, img_file)
    image_paths.append(img_path)
    image_names.append(img_file)

In [74]:
## As the height and width per image are different, 
height_dic = dict()
width_dic = dict()

for index in range(len(image_names)):
    img_name = image_names[index]
    for i in range(len(metadata[str(index)])):
        det = metadata[str(index)][i]
        height_dic[img_name] = det['height']
        width_dic[img_name] = det['width']


#### load groundtruth handwriting boxes

In [118]:
with open(out_json_filename) as f:
    d = json.load(f)
metadata = d['frames']

logfile = []
for index in range(len(image_names)):
    img_name = image_names[index]
    for i in range(len(metadata[str(index)])):
        det = metadata[str(index)][i]
        if det['tags'] == ['others']:
            bbox_1 = det['x1'] # xmin
            bbox_0 = det['y1'] # ymin
            bbox_3 = det['x2'] # xmax
            bbox_2 = det['y2'] # ymax

            line = [img_name, bbox_0, bbox_1, bbox_2, bbox_3]
            height_dic[img_name] = det['height']
            width_dic[img_name] = det['width']

            logfile.append(line)
            
groundtruth_values = pd.DataFrame(logfile)
groundtruth_values.columns = ['image', 'bbox_0', 'bbox_1', 'bbox_2', 'bbox_3']
groundtruth_values.head()

Unnamed: 0,image,bbox_0,bbox_1,bbox_2,bbox_3
0,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,521,325.5,553,495.5
1,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,554,321.0,582,488.0
2,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,80,406.0,107,495.0
3,08d77cf7-618e-4ce3-9889-4b35512f0caf-1.jpg,359,133.0,384,278.0
4,08d77cf7-618e-4ce3-9889-4b35512f0caf-1.jpg,410,94.0,429,219.0


#### score test images and predict handwriting boxes

In [75]:
detections = detection_utils.score_multiple(frozen_model_path, image_paths)

In [117]:
line = []

for index in range(len(detections)):
    detect_dict = detections[index]
    image_name = image_paths[index].split('\\')[-1]

    n_obj = 0
    detect_index = [] # record the detection index in the detections_dict
    im_height = height_dic[image_name]
    im_width = width_dic[image_name]
    for i in range(detect_dict['num_detections']):
        if detect_dict['detection_classes'][i] ==2 and detect_dict['detection_scores'][i] > 0.5:
            detect_index.append(i)
            n_obj += 1
            ymin, xmin, ymax, xmax = detect_dict['detection_boxes'][i][0]*im_height, detect_dict['detection_boxes'][i][1]*im_width, \
                detect_dict['detection_boxes'][i][2]*im_height, detect_dict['detection_boxes'][i][3]*im_width
            (left, right, bottom, top) = (xmin, xmax, ymin, ymax)
            line.append([image_name, detect_dict['detection_scores'][i], bottom, left, top, right])
        
predicted_values = pd.DataFrame(line)
predicted_values.columns = ['image', 'score', 'bbox_0', 'bbox_1', 'bbox_2', 'bbox_3']
predicted_values.head()
 

Unnamed: 0,image,score,bbox_0,bbox_1,bbox_2,bbox_3
0,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,0.998655,80.368577,394.758232,108.948167,494.458601
1,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,0.998602,519.811079,318.068292,549.495041,486.219076
2,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,0.954931,568.136975,436.48916,589.440733,493.237782
3,00cbe986-5497-430f-a707-ca7a217f3525-1.jpg,0.951688,548.152462,313.517184,570.633486,492.46954
4,08d77cf7-618e-4ce3-9889-4b35512f0caf-1.jpg,0.999062,435.882792,326.964991,463.440344,402.930254


#### helper function for calculating precision and recall

In [78]:
def prepare_iou(values):
    bboxes = {}
    bboxes['x1'] = values['bbox_1']
    bboxes['x2'] = values['bbox_3']
    bboxes['y1'] = values['bbox_0']
    bboxes['y2'] = values['bbox_2']
    return bboxes

In [108]:
def cal_precision_recall_matrix(img):
    val_values = groundtruth_values[groundtruth_values['image'] == img]
    model_values = predicted_values[predicted_values['image'] == img]
    print ('num of true boxes {}; num of detected boxes {}'.format(len(val_values), len(model_values)))
    d_matrix = np.zeros((im_height,im_width))

    if len(val_values) ==0 and len(model_values) == 0:
        precision = 'NA'
        recall = 'NA'
    elif len(model_values) == 0:
        precision = 0
        recall = 0
    elif len(val_values) == 0:
        precision = 0
        recall = 0
    else:
        for di in range(len(model_values)):
            detect = prepare_iou(model_values.iloc[di])
            x1, x2, y1, y2 = int(detect['x1']), int(detect['x2']), int(detect['y1']), int(detect['y2'])
            d_matrix[y1:y2+1, x1:x2+1] =1
            detect_area = int(d_matrix.sum())
        
        g_matrix = np.zeros((im_height, im_width))
        
        for ti in range(len(val_values)):
            gtruth = prepare_iou(val_values.iloc[ti])
            x1, x2, y1, y2 = int(gtruth['x1']), int(gtruth['x2']), int(gtruth['y1']), int(gtruth['y2'])
            g_matrix[y1:y2+1, x1:x2+1] = 1    
            g_area = int(g_matrix.sum())
        
        inter_area = (d_matrix * g_matrix).sum()
        precision = inter_area/detect_area
        recall = inter_area/g_area     
    return precision,recall

In [115]:
precision = []
recall = []

for img in image_names:
    precision_per_img, recall_per_img = cal_precision_recall_matrix(img)
    precision.append(precision_per_img)
    recall.append(recall_per_img)

num of true boxes 3; num of detected boxes 3
3 3
num of true boxes 4; num of detected boxes 4
4 4
num of true boxes 2; num of detected boxes 2
2 2
num of true boxes 2; num of detected boxes 2
2 2
num of true boxes 2; num of detected boxes 2
2 2
num of true boxes 2; num of detected boxes 2
2 2
num of true boxes 2; num of detected boxes 2
2 2
num of true boxes 3; num of detected boxes 4
3 4
num of true boxes 4; num of detected boxes 5
4 5
num of true boxes 1; num of detected boxes 1
1 1
num of true boxes 1; num of detected boxes 2
1 2
num of true boxes 1; num of detected boxes 2
1 2
num of true boxes 1; num of detected boxes 2
1 2
num of true boxes 1; num of detected boxes 2
1 2
num of true boxes 4; num of detected boxes 5
4 5
num of true boxes 3; num of detected boxes 3
3 3
num of true boxes 3; num of detected boxes 4
3 4
num of true boxes 2; num of detected boxes 1
2 1
num of true boxes 6; num of detected boxes 8
6 8
num of true boxes 6; num of detected boxes 3
6 3
num of true boxes 3;

In [116]:
output = pd.concat([pd.DataFrame(precision), pd.DataFrame(recall)], axis = 1)
output.columns = ['precision', 'recall']
output.head()
output.to_csv(os.path.join(output_path, 'signature_v3.csv'), index = False)

In [52]:
np.mean(precision), np.mean(recall)

(0.6941276085662448, 0.8415512763390542)