In [17]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
import random
from matplotlib import pyplot as plt
%matplotlib inline

In [18]:
root_dir = "20170822/video_exp"
test_ratio = 0.1
train_file_name = "head_hand_sku_train.txt"
test_file_name = "head_hand_sku_test.txt"

In [19]:
sub_dirs = os.listdir(root_dir)

In [21]:
# three classes: head and hand, and sku

In [22]:
def parse_xml(image_dir, xml_file):
    if os.path.isfile(xml_file) == False:
        return [], []
    tree = ET.parse(xml_file)
    root = tree.getroot()

    image_paths = []
    labels = []

    images = root.find("images")
    images = images.findall("image")

    for image in images:
        image_name = image.get('file').split('\\')[1]
        image_path = os.path.join(image_dir, image_name)
        cur_img = cv2.imread(image_path)
        if cur_img is None:
            print("wrong img name: " + xml_file)
            continue
        img_height, img_width, _ = cur_img.shape

        image_labels = []
        boxes = image.findall('box')
        if len(boxes) == 0:
            continue
        for box in boxes:
            obj_label = box.find('label')
            if obj_label.text.startswith("sku"):
                klass = 2
            elif obj_label.text.endswith("head"):
                klass = 0
            elif obj_label.text.endswith("left") or obj_label.text.endswith("right"):
                klass = 1
            else:
                continue
            top = int(box.get('top'))
            left = int(box.get('left'))
            width = int(box.get('width'))
            height = int(box.get('height'))

            xmin = np.max([left, 0])
            xmax = np.min([left + width, img_width])
            ymin = np.max([top, 0])
            ymax = np.min([top + height, img_height])

            # all the objects are class 0
            image_labels.append([xmin, ymin, xmax, ymax, klass])
        if len(image_labels) == 0:
            continue
        image_paths.append(os.path.join(image_dir, image_name))
        labels.append(image_labels)
    return image_paths, labels

In [23]:
image_paths = []
labels = []
for sub_dir in sub_dirs:
    sub_dir_path = os.path.join(root_dir, sub_dir)
    segment_dirs = os.listdir(sub_dir_path)
    for segment_dir in segment_dirs:
        segment_dir_path = os.path.join(sub_dir_path, segment_dir)
        print(segment_dir_path)
        if os.path.isdir(segment_dir_path) == False:
            continue
        files = os.listdir(segment_dir_path)
        for file in files:
            if file.endswith("xml") != True:
                continue
            xml_path = os.path.join(segment_dir_path, file)
            img_dir_path = os.path.join(segment_dir_path, file.split('.')[0])
            
            cur_paths, cur_labels = parse_xml(img_dir_path, xml_path)
            image_paths.extend(cur_paths)
            labels.extend(cur_labels)
#         break
#     break

20170822/video_exp/1/0567032a-8edc-4505-b520-d9de34c4c3d7
20170822/video_exp/1/97d25dcc-4476-4040-8258-eaaf21bc8fa1
20170822/video_exp/1/6d742b6b-ee9f-430e-a4c0-aca593142cb7
20170822/video_exp/1/b5a5b8b6-8f8d-4976-821b-d21251a21589
20170822/video_exp/1/182adf5e-ff64-4ed0-8f4b-e342d3a099fd
20170822/video_exp/1/520a2ecc-8a8d-4586-937e-97de378420b6
20170822/video_exp/1/0b08eef1-399a-442d-965f-c3e559c3f1f3
20170822/video_exp/1/34623841-12e1-4b92-9a18-fcc0145046c3
20170822/video_exp/1/49521cc2-3818-4ef6-837a-5079364dddd8
20170822/video_exp/1/982fd493-78f5-4305-a8d2-63329ed770aa
20170822/video_exp/1/e26d3afd-332f-4809-84e9-e5098e4ce6c8
20170822/video_exp/1/3fe359d6-d3ce-40fa-a98b-bfc288bcfde9
20170822/video_exp/1/cd548357-19be-43f3-b834-073f5230f354
20170822/video_exp/1/5936e9ce-e00f-4775-8009-497659439e0d
20170822/video_exp/1/eeed142e-0434-4f3b-aa67-cc7171ab9ee3
20170822/video_exp/1/7421052c-edce-40d8-85be-535b1b73e459
20170822/video_exp/1/1dc5b46d-57a3-4024-b83f-5e56da24d8d0
20170822/video

20170822/video_exp/2/aee7887e-a5f8-4793-92db-dd8017007253
20170822/video_exp/2/eba0dec3-8f77-490e-8e5b-2905faf226b9
20170822/video_exp/2/63398e90-7235-4427-b72d-64a59597b143
20170822/video_exp/2/cd400b4e-1a28-486f-a909-38576718a31b
20170822/video_exp/2/0fa34f82-8f10-487b-b0ac-7e217b684d7c
20170822/video_exp/2/b2ac1bac-ca7e-437a-a3a8-1a9c4e0d75f2
20170822/video_exp/2/8c64abfb-43b7-42be-9cc9-c014eb6a1c97
20170822/video_exp/2/a43dca56-9e00-48c1-a470-4552ad03d703
20170822/video_exp/2/56780640-e44c-4178-a4e2-e6e02fe21449
20170822/video_exp/2/b1b74a88-dff7-49bf-86de-69b00ec9a4d6
20170822/video_exp/2/fb1776a9-9758-4bf7-8ee7-60269407b27b
20170822/video_exp/2/b5259370-b8d1-41fd-85d8-17439c9fd2c1
20170822/video_exp/2/9b597d5a-1d7f-44af-88a8-86405200b7a1
20170822/video_exp/2/38fccd17-f582-4e3f-b647-e9b9a9704426
20170822/video_exp/2/3482f51a-0805-4a3f-a440-36adfee9a7b1
20170822/video_exp/2/29b3a5c2-3e2e-4d75-8a2b-59d3d897e03a
20170822/video_exp/2/915cd669-031b-4f9e-a670-787083ffaf63
20170822/video

20170822/video_exp/0/c7d06999-1ff7-4734-9343-18a66284c075
20170822/video_exp/0/428c1fd7-6f86-4b86-af0f-34749dcf8c39
20170822/video_exp/0/f2a11a75-7e31-4e19-b06b-270b24beb70c
20170822/video_exp/0/4af6a946-f4de-4932-9cdc-d9b6b14a7dd6
20170822/video_exp/0/66c251a2-f9cc-485a-b5e4-2a23913e3524
20170822/video_exp/0/collec_data.py
20170822/video_exp/0/d4b14b3e-b510-410b-8d1e-490c9b567904
20170822/video_exp/0/734e7908-0bed-47d0-af1c-ea27906d9184
20170822/video_exp/0/6cf58128-6238-4016-b582-7447d3afa066
20170822/video_exp/0/52ef9c3e-65c1-4d7e-9ced-cb330fa62719
20170822/video_exp/0/ab06905f-5dcd-4a68-a7d6-baa12a96ff5d
20170822/video_exp/0/1717a486-0836-4490-a4ab-52071c644103
20170822/video_exp/0/bbb7a964-20a1-411c-9d7e-ee9fd906dd0e
20170822/video_exp/0/6cfddcad-28a4-4253-9797-6d24de13487d
20170822/video_exp/0/37c1992a-c140-49ef-9dc4-b9596ce1e35f


In [24]:
# convert records to strings
def convert_to_string(image_path, labels):
    """convert image_path, lables to string 
    Returns:
    string 
    """
    out_string = ''
    out_string += image_path
    for label in labels:
        for i in label:
            out_string += ' ' + str(i)
    out_string += '\n'
    return out_string

records = []
for idx, image_path in enumerate(image_paths):
    label = labels[idx]
    records.append(convert_to_string(image_path, label))

random.shuffle(records)
print(len(records))

5741


In [25]:
# split into training set and test set
total_num = len(records)
test_num = int(test_ratio * total_num)
train_num = total_num - test_num
train_records = records[0:train_num]
test_records = records[train_num:]

In [26]:
# save to text file
train_out_file = open(train_file_name, "w")
for record in train_records:
    train_out_file.write(record)
train_out_file.close()
test_out_file = open(test_file_name, "w")
for record in test_records:
    test_out_file.write(record)
test_out_file.close()