In [None]:
import os
import io
import pandas as pd
import tensorflow as tf
from tensorflow import train
from PIL import Image
from object_detection.utils import dataset_util
from collections import namedtuple, OrderedDict

# Generowanie na podstawie katalogów pliku kategorii .pbtxt

In [None]:
TRAIN_DIR = 'C:\messier_detection\data_110\categories_110'

LABELS_DIR = [ x for x in os.listdir(TRAIN_DIR) ]
LABELS  = [n+1 for n in range(len(LABELS_DIR))]

labels_to_num_dict = dict(zip(LABELS_DIR, LABELS))

In [None]:
len(labels_to_num_dict)

In [None]:
def generate_pbtxt_mapping_file(path_to_file, mapping_dict):
    with open(path_to_file,"w+") as file:
        for category_name, category_id in mapping_dict.items():
            line = ("item {\n"
                    "\tid: "+str(category_id) +"\n"
                    "\tname: '"+category_name +"'\n"
                    "}\n")
            file.write(line)


In [None]:
print(len(labels_to_num_dict))
path_to_pbtxt = 'PATH_TO_SAVE_FILE/categories.pbtxt'
generate_pbtxt_mapping_file(path_to_pbtxt, labels_to_num_dict)

# Definicja funkcji generujących pliki .tfrecord

In [None]:
def split(df, group):
    data = namedtuple('data', ['img_file', 'object'])
    gb = df.groupby(group)
    return [data(img_file, gb.get_group(obj)) for img_file, obj in zip(gb.groups.keys(), gb.groups)]

def create_tf_example(category_to_id_dict, group, path):
    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.img_file)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.img_file.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['xmin'] / width)
        xmaxs.append(row['xmax'] / width)
        ymins.append(row['ymin'] / height)
        ymaxs.append(row['ymax'] / height)
        classes_text.append(row['category'].encode('utf8'))
        classes.append(category_to_id_dict[row['category']])

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(filename),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

# Generacja na postawie dostarczonych plików rekordów .tfrec

In [None]:
data_set_path = 'PATH_TO_SAVE_TF_RECORDS/train_records.tfrecord'
path = 'PATH_TO_INPUT_DATA'
csv_path = path+'/train.csv'

    
tf_record_writer = tf.io.TFRecordWriter(data_set_path)
examples = pd.read_csv(csv_path)
grouped = split(examples, 'img_file')
for group in grouped:
    tf_example = create_tf_example(labels_to_num_dict,group, path)
    tf_record_writer.write(tf_example.SerializeToString())

tf_record_writer.close()