This notebook contains scripts to:
1. Resize image to 640 x 640
2. use labelImg to create bounding box
3. convert the xml from 'labelImg' to two numpy file: bounding box and class indexes of the corresponding bounding box
4. define category index to be used while training and inference. To be updated manually

In [18]:
import os
import glob
import cv2
import numpy as np
import xml.etree.ElementTree as ET
import pickle

In [3]:
#define resized dimention
resized_height = 640
resized_width = 640

In [9]:
#Step 1 : Resize train and test images

#resize train image

input_file_location = '../data/training_data/'
output_file_location = '../data/training_data/processed_images/'

image_file_names = []
for file in glob.glob(input_file_location+"*.jpg"):
    image_file_names.append(file)


for img in image_file_names:
    image = cv2.imread(img)
    resize_image = cv2.resize(image, (resized_width, resized_height))
    cv2.imwrite(output_file_location+img[-7:],resize_image)

#resize test image

input_file_location = '../data/test_data/'
output_file_location = '../data/test_data/processed_images/'

image_file_names = []
for file in glob.glob(input_file_location+"*.jpg"):
    image_file_names.append(file)


for img in image_file_names:
    image = cv2.imread(img)
    resize_image = cv2.resize(image, (resized_width, resized_height))
    cv2.imwrite(output_file_location+img[-7:],resize_image)


In [10]:
#Step 2: create bbox using 'labelImg' as save the xml

# !important: while labelling ensure that labels are numerical, starting from 100. 100,101,102,103,..... etc.
#as the code is designed to handle only the numerals.
os.system("labelimg")

Image:/Users/ml/Desktop/ILS/ILSv2/object_location/data/test_data/100.jpg -> Annotation:/Users/ml/Desktop/ILS/ILSv2/object_location/data/test_data/100.xml
Image:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/100.jpg -> Annotation:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/100.xml
Image:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/101.jpg -> Annotation:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/101.xml
Image:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/102.jpg -> Annotation:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/102.xml
Image:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/103.jpg -> Annotation:/Users/ml/Desktop/ILS/ILSv2/object_location/data/training_data/processed_images/103.xml
Image:/Users/ml/Desktop/ILS/ILSv2/object_location/data/trainin

0

In [24]:
#Step 3: Convert xml to 2 '.npy' files.

training_file_dir = '../data/training_data/processed_images/'

#get all the xml file names generated after labelling from previous step and the corresponding image file names
file_names_xml = []
for file in glob.glob(training_file_dir+"*.xml"):
    file_names_xml.append(file)

file_names_images = []
for file in glob.glob(training_file_dir+"*.jpg"):
    file_names_images.append(file)


#ensure number of images equals number of xml and then organize file name in ascending order

if len(file_names_xml) != len(file_names_images):
    print("Error: Inconsistent number of xml and image files")

#sort xml files 
file_names_sorted = []
for file in file_names_xml:
    file_names_sorted.append(int(file[-7:-4]))

file_names_sorted.sort()
file_names = [str(x)+'.xml' for x in file_names_sorted]



In [25]:
print(file_names)

['100.xml', '101.xml', '102.xml', '103.xml', '104.xml', '105.xml', '106.xml', '107.xml', '108.xml', '109.xml']


In [26]:
#convert xml file to format to be consumed by retina net algorithm

bbox_formatted_all = []
name_obj_all = []
for file_name in file_names:
    
    tree = ET.parse(training_file_dir+file_name)
    
    root = tree.getroot()

    bbox_formatted= []
    name_obj_formatted= []
    obj_bbox = root.findall('./object')
    name_obj = []
    for item in obj_bbox:
        bbox = []
        name_obj.append(item.find('name').text)
        bbox_cord= item.find('./bndbox')
        bbox.append(
            [
            bbox_cord.find("ymin").text,
            bbox_cord.find("xmin").text,
            bbox_cord.find("ymax").text,
            bbox_cord.find("xmax").text
            ]  
        )
        bbox_formatted.append((bbox[0]))
    bbox_formatted_all.append(np.array(bbox_formatted, dtype = np.float32)/640.0) #640 is both and width dimention.
    name_obj_all.append(np.array(name_obj).astype(int))

In [28]:
#save bbox as .npy file
bbox_fn = training_file_dir+'bbox.npy'
with open(bbox_fn, 'wb') as f:
    np.save(f, bbox_formatted_all)

  arr = np.asanyarray(arr)


In [29]:
#save index of the class list
indices_class_fn = training_file_dir+'indices_class_list.npy'

name_obj_all = np.array(name_obj_all).astype(object)
with open(indices_class_fn, 'wb') as f:
    np.save(f, name_obj_all)

  name_obj_all = np.array(name_obj_all).astype(object)


In [30]:
#Step 4: Define category index

num_classes = 1
input_field_id = 1

category_index = {
        input_field_id: {'id': input_field_id, 'name': 'Input Field'}
        }


file_path = training_file_dir+'category_index.pickle'
with open(file_path, 'wb') as file:
    pickle.dump(category_index, file)


""" Example of multi class

    num_classes = 3
    input_field_id = 1
    dropdown_id = 2
    text_id = 3

    category_index = {
        input_field_id: {'id': input_field_id, 'name': 'Input Field'},
        dropdown_id:{'id':dropdown_id, 'name':'Drop Down'},
        text_id:{'id':text_id, 'name':'Text'}
        }

"""


" Example of multi class\n\n    num_classes = 3\n    input_field_id = 1\n    dropdown_id = 2\n    text_id = 3\n\n    category_index = {\n        input_field_id: {'id': input_field_id, 'name': 'Input Field'},\n        dropdown_id:{'id':dropdown_id, 'name':'Drop Down'},\n        text_id:{'id':text_id, 'name':'Text'}\n        }\n\n"