In [1]:
import os
import json
import cv2
import pandas as pd
import pickle
from sklearn.model_selection import train_test_split
import shutil

In [6]:
# divide train and test images
path_to_json = 'data/annotations/'
path_to_jpg = 'data/images/'

lista_json = os.listdir(path_to_json)
lista_jpg = os.listdir(path_to_jpg)
lista_jpg_train, lista_jpg_test, lista_json_train, lista_json_test = train_test_split(lista_jpg, lista_json, test_size=0.2, random_state=42)

dict_folder_list = {path_to_json + 'train':lista_json_train, path_to_json + 'test':lista_json_test, path_to_jpg + 'train':lista_jpg_train, path_to_jpg + 'test':lista_jpg_test}
dict_folder = {path_to_json + 'train':path_to_json, path_to_json + 'test':path_to_json, path_to_jpg + 'train':path_to_jpg, path_to_jpg + 'test':path_to_jpg}

for dir_path in [path_to_json + 'train', path_to_json + 'test', path_to_jpg + 'train', path_to_jpg + 'test']:
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

    if os.path.exists(dir_path):
        for file_name in dict_folder_list[dir_path]:
            file_path = dict_folder[dir_path] + file_name
            
            # move files into created directory
            shutil.move(file_path, dir_path)

In [8]:
def json_to_csv(folder):
    path_to_json = 'data/annotations/{}/'.format(folder)
    json_files = [pos_json for pos_json in os.listdir(path_to_json) if pos_json.endswith('.json')]
    path_to_jpg = 'data/images/{}/'.format(folder)
    jpg_files = [pos_jpg for pos_jpg in os.listdir(path_to_jpg)]
    fjpg=(list(jpg_files))
    n=0
    csv_list = []
    labels=[]
    for j in json_files:
        data_file=open(path_to_json + j)   
        data = json.load(data_file)
        filename = data['FileName']
        img = cv2.cvtColor(cv2.imread('data/images/{}/{}'.format(folder,filename)),cv2.COLOR_BGR2RGB)
        width, height = img.shape[0], img.shape[1]
        for item in data['Annotations']:
            if data['NumOfAnno']>0:
                name=item['classname']
                labels.append(name)
                xmin=item['BoundingBox'][0]
                ymin=item['BoundingBox'][1]
                xmax=item['BoundingBox'][2]
                ymax=item['BoundingBox'][3]
                value = (fjpg[n],
                         width,
                         height,
                         name,
                         xmin,
                         ymin,
                         xmax,
                         ymax
                         )
                csv_list.append(value)
        n=n+1
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    csv_df = pd.DataFrame(csv_list, columns=column_name)
    labels_train=list(set(labels))
    with open("train_labels.txt", "wb") as fp:   #Pickling
        pickle.dump(labels_train, fp)
    return csv_df

def main():
    for directory in ['train','test']:
        csv_df = json_to_csv(directory)
        csv_df.to_csv('data/{}_labels.csv'.format(directory), index=None)
        print('Successfully converted json to csv.')

main()

Successfully converted json to csv.
Successfully converted json to csv.


In [1]:
import os
import io
import pandas as pd
import tensorflow as tf
from PIL import Image

from object_detection.utils import dataset_util
from object_detection.utils import label_map_util
from collections import namedtuple

In [2]:
def split(df, group):
	data = namedtuple('data', ['filename', 'object'])
	gb = df.groupby(group)
	return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]

In [3]:
def create_tf_example(group, path, category_idx):
	with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
		encoded_jpg = fid.read()
	encoded_jpg_io = io.BytesIO(encoded_jpg)
	image = Image.open(encoded_jpg_io)
	width, height = image.size

	filename = group.filename.encode('utf8')
	image_format = b'jpg'
	xmins = []
	xmaxs = []
	ymins = []
	ymaxs = []
	classes_text = []
	classes = []

	for index, row in group.object.iterrows():
		xmins.append(float(row['xmin']) / width)
		xmaxs.append(float(row['xmax']) / width)
		ymins.append(float(row['ymin']) / height)
		ymaxs.append(float(row['ymax']) / height)
		classes_text.append(row['class'].encode('utf8'))
		classes.append(category_idx[row['class']])

	tf_example = tf.train.Example(features=tf.train.Features(feature={
		'image/height': dataset_util.int64_feature(height),
		'image/width': dataset_util.int64_feature(width),
		'image/filename': dataset_util.bytes_feature(filename),
		'image/source_id': dataset_util.bytes_feature(filename),
		'image/encoded': dataset_util.bytes_feature(encoded_jpg),
		'image/format': dataset_util.bytes_feature(image_format),
		'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
		'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
		'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
		'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
		'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
		'image/object/class/label': dataset_util.int64_list_feature(classes),
	}))
	return tf_example

In [None]:
if __name__ == '__main__':

	csv_path = "data/train_labels.csv"
	images_path = "data/images/train"
	tfrecord_path = "data/myrecord_train.record"
	print("images path : ", images_path)
	print("csv path : ", csv_path)
	print("path to output tfrecords : ", tfrecord_path)
	label_map_dict = label_map_util.get_label_map_dict("label_map.pbtxt")
	writer = tf.io.TFRecordWriter(tfrecord_path)

	examples = pd.read_csv(csv_path)
	print("Generating tfrecord .... ")
	grouped = split(examples, 'filename')
	for group in grouped:
		tf_example = create_tf_example(group, images_path, label_map_dict)
		writer.write(tf_example.SerializeToString())

	writer.close()
	print('Successfully created the TFRecords: {}'.format(tfrecord_path))

images path :  data/images/test
csv path :  data/test_labels.csv
path to output tfrecords :  data/myrecord_test.record
Generating tfrecord .... 
Successfully created the TFRecords: data/myrecord_test.record


In [7]:
if __name__ == '__main__':

	csv_path = "data/test_labels.csv"
	images_path = "data/images/test"
	tfrecord_path = "data/myrecord_test.record"
	print("images path : ", images_path)
	print("csv path : ", csv_path)
	print("path to output tfrecords : ", tfrecord_path)
	label_map_dict = label_map_util.get_label_map_dict("label_map.pbtxt")
	writer = tf.io.TFRecordWriter(tfrecord_path)

	examples = pd.read_csv(csv_path)
	print("Generating tfrecord .... ")
	grouped = split(examples, 'filename')
	for group in grouped:
		tf_example = create_tf_example(group, images_path, label_map_dict)
		writer.write(tf_example.SerializeToString())

	writer.close()
	print('Successfully created the TFRecords: {}'.format(tfrecord_path))

images path :  data/images/test
csv path :  data/test_labels.csv
path to output tfrecords :  data/myrecord_test.record
Generating tfrecord .... 
Successfully created the TFRecords: data/myrecord_test.record
