In [1]:
import hashlib
import io
import json
import os, sys
import contextlib2
import numpy as np
import PIL.Image
import cv2

import tensorflow as tf

sys.path.append("../")

from object_detection.dataset_tools import tf_record_creation_util
from object_detection.utils import dataset_util
from object_detection.utils import label_map_util



In [2]:
path = "Sport_QA/Annotations"
path = [os.path.join(path,x) for x in os.listdir(path)]
im_path = "Sport_QA/Images"
im_path = [os.path.join(im_path,x) for x in os.listdir(im_path)]

test_path = "Sport_QA/test_Annotations"
test_path = [os.path.join(test_path,x) for x in os.listdir(test_path)]
test_im_path = "Sport_QA/test_Images"
test_im_path = [os.path.join(test_im_path,x) for x in os.listdir(test_im_path)]


In [3]:
def create_tf_example(full_path , annotations_list, include_masks=False):
    
    with tf.gfile.GFile(full_path, 'rb') as fid:
        encoded_jpg = fid.read()
        
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    
    image = PIL.Image.open(encoded_jpg_io)
    image_width = image.size[0]
    image_height = image.size[1]
    filename = full_path.split("/")[-1]
    key = hashlib.sha256(encoded_jpg).hexdigest()
    
    xmin = []
    xmax = []
    ymin = []
    ymax = []
    is_crowd = []
    category_names = []
    category_ids = []
    area = []
    encoded_mask_png = []
    num_annotations_skipped = 0
    
#     for anno in  annotations_list: 
        
        
            
#         xmin.append(float(x) / image_width)
#         xmax.append(float(x + width) / image_width)
#         ymin.append(float(y) / image_height)
#         ymax.append(float(y + height) / image_height)
#         category_id = int(object_annotations['category_id'])
#         category_ids.append(category_id)
#         category_names.append(category_index[category_id]['name'].encode('utf8'))
#         area.append(object_annotations['area'])
        
    xmin = annotations_list['x_left']/float(image_width)
    ymin = annotations_list['y_left']/float(image_height)
    xmax = annotations_list['x_right']/float(image_width)
    ymax = annotations_list['y_right']/float(image_height)
    category_names = annotations_list['text']
                
    feature_dict = {
        'image/height':
        dataset_util.int64_feature(image_height),
        'image/width':
        dataset_util.int64_feature(image_width),
        'image/filename':
        dataset_util.bytes_feature(filename.encode('utf8')),
        'image/key/sha256':
        dataset_util.bytes_feature(key.encode('utf8')),
        'image/encoded':
        dataset_util.bytes_feature(encoded_jpg),
        'image/format':
        dataset_util.bytes_feature('jpeg'.encode('utf8')),
        'image/object/bbox/xmin':
        dataset_util.float_list_feature(xmin),
        'image/object/bbox/xmax':
        dataset_util.float_list_feature(xmax),
        'image/object/bbox/ymin':
        dataset_util.float_list_feature(ymin),
        'image/object/bbox/ymax':
        dataset_util.float_list_feature(ymax),
        'image/object/class/text':
        dataset_util.bytes_list_feature(category_names)
        }
    
    example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
    return key, example


In [4]:
def read_anno_sport_qa(filename, h, w):
    
    with open(filename) as f:
        
        content = f.readlines()
        
    content = [x[1:-3].strip().split(",") for x in content]
    
    anno = {}
    x_text = []
    x_left = []
    y_left = []
    x_right = []
    y_right = []
    
    for x in content:
        x_text_ = x[0][1:-1].lower()
        x_left_ = int(x[1].strip())
        y_left_ = int(x[2].strip())
        x_right_ = int(x[1].strip())
        y_right_ = int(x[2].strip())
        
        if x_left_<0: x_left_=0
        if y_left_<0: y_left_=0
        if x_right_>w: x_right_=w
        if y_right_>h: y_right_=h
            
        if x_right_ < x_left_ or y_right_ < y_left_ : pass
        
        else:
            x_text.append(x_text_)
            x_left.append(x_left_)
            y_left.append(y_left_)
            x_right.append(x_right_)
            y_right.append(y_right_)
    
    anno['text'] = np.array(x_text)
    anno['x_left'] = np.array(x_left)
    anno['y_left'] = np.array(y_left)
    anno['x_right'] = np.array(x_right)
    anno['y_right'] =  np.array(y_right)
    
    return anno
        

In [5]:
def convert_sport_qa_to_tfrecords(output_path):
    
    train_count = 0
    test_count = 0
    
    train_writer = tf.python_io.TFRecordWriter('%s_train.tfrecord'%output_path)
    val_writer = tf.python_io.TFRecordWriter('%s_val.tfrecord'%output_path)
    
    for i,train in enumerate(path):
        imgpath = im_path[i]
        
        h,w,c = np.shape(cv2.imread(imgpath))
        
        img_label = read_anno_sport_qa(train, h, w)
        
        
        key, example = create_tf_example(imgpath, img_label)
        
        
        train_writer.write(example.SerializeToString())
        train_count += 1
        
    for i,test in enumerate(test_path):
        imgpath = im_path[i]
        h,w,c = np.shape(cv2.imread(imgpath))
        img_label = read_anno_sport_qa(test, h, w)
        
        
        key, example = create_tf_example(imgpath, img_label)
        
        
        val_writer.write(example.SerializeToString())
        test_count += 1
        
    train_writer.close()
    val_writer.close()


In [6]:
convert_sport_qa_to_tfrecords("Sport_data/Sport_QA")