In [1]:
import os
import glob
import pandas as pd
import io
import xml.etree.ElementTree as ET
import argparse
import tensorflow as tf
import json

os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'    # Suppress TensorFlow logging (1)
from PIL import Image
from object_detection.utils import dataset_util, label_map_util
from collections import namedtuple

In [2]:
# # # data paths
train_csv_path = 'dataset/final_train_labels.csv'
test_csv_path = 'dataset/final_test_labels.csv'
val_csv_path = 'dataset/final_validate_labels.csv'

# TFRecord file paths 
train_tfrecord_filename = './data/train.tfrecord'
test_tfrecord_filename = './data/test.tfrecord'
validate_tfrecord_filename = './data/validate.tfrecord'

dataset_dir = 'dataset'

In [3]:
# read, train, test and validation dataset
train_data = pd.read_csv(train_csv_path)
test_data = pd.read_csv(test_csv_path)
val_data = pd.read_csv(val_csv_path)

In [4]:
# read, train, test and validation dataset
train_data = pd.read_csv(train_csv_path)
test_data = pd.read_csv(test_csv_path)
val_data = pd.read_csv(val_csv_path)

In [5]:
train_data.head()

Unnamed: 0,width,height,crop,class,fname,img_path,x1,y1,x2,y2,id,integer_label
0,640,640,Tomato,Tomato Late Blight,e701b2d9-d9ef-49dd-a9b2-067ac10dff12.jpg,dataset/train\e701b2d9-d9ef-49dd-a9b2-067ac10d...,191.42496,306.8542,256.16083,391.2765,0,16
1,640,640,Corn,Corn Healthy,ee11488a-d7c4-4426-898b-0153fe55d82b.jpg,dataset/train\ee11488a-d7c4-4426-898b-0153fe55...,370.76172,459.11493,457.5262,595.264,1,3
2,640,640,Corn,Corn Common Rust,8c38b101-19ed-4353-b518-63e1128fc5b7.jpg,dataset/train\8c38b101-19ed-4353-b518-63e1128f...,167.16748,134.74731,214.3625,330.7657,2,2
3,640,640,Corn,Corn Streak,627ce1e3-0354-4713-9f12-3d2db183e3e0.jpg,dataset/train\627ce1e3-0354-4713-9f12-3d2db183...,517.21704,73.61801,535.5021,302.75104,3,4
4,640,640,Pepper,Pepper Leaf Curl,fe203180-5056-479c-abe4-e9d4c613dc01.jpg,dataset/train\fe203180-5056-479c-abe4-e9d4c613...,149.65358,178.65503,225.32716,296.8891,4,6


In [7]:
def class_text_to_int(row_label, label_map):
    return label_map[row_label]

label_map_path = 'data/label_map.json'

# Read and parse the JSON label map
with open(label_map_path, 'r') as json_file:
    label_map = json.load(json_file)

label_map

{'Corn Cercospora Leaf Spot': 1,
 'Corn Common Rust': 2,
 'Corn Healthy': 3,
 'Corn Streak': 4,
 'Corn Northern Leaf Blight': 5,
 'Pepper Leaf Curl': 6,
 'Pepper Cercospora': 7,
 'Pepper Leaf Blight': 8,
 'Pepper Bacterial Spot': 9,
 'Pepper Leaf Mosaic': 10,
 'Pepper Healthy': 11,
 'Pepper Fusarium': 12,
 'Pepper Septoria': 13,
 'Pepper Late Blight': 14,
 'Pepper Early Blight': 15,
 'Tomato Late Blight': 16,
 'Tomato Early Blight': 17,
 'Tomato Bacterial Spot': 18,
 'Tomato Septoria': 19,
 'Tomato Fusarium': 20,
 'Tomato Leaf Curl': 21,
 'Tomato Healthy': 22,
 'Tomato Mosaic': 23}

In [10]:
def split(df, group):
    data = namedtuple('data', ['filename', 'object'])
    gb = df.groupby(group)
    return [data(filename, gb.get_group(x)) for filename, x in zip(gb.groups.keys(), gb.groups)]


def create_tf_example(group, path):
    with tf.io.gfile.GFile(os.path.join(path, '{}'.format(group.filename)), 'rb') as fid:
        encoded_jpg = fid.read()
    encoded_jpg_io = io.BytesIO(encoded_jpg)
    image = Image.open(encoded_jpg_io)
    width, height = image.size

    filename = group.filename.encode('utf8')
    image_format = b'jpg'
    xmins = []
    xmaxs = []
    ymins = []
    ymaxs = []
    classes_text = []
    classes = []

    for index, row in group.object.iterrows():
        xmins.append(row['x1'])
        xmaxs.append(row['x2'])
        ymins.append(row['y1'])
        ymaxs.append(row['y2'])
        classes_text.append(row['class'].encode('utf8'))
        classes.append(class_text_to_int(row['class'], label_map=label_map))

    tf_example = tf.train.Example(features=tf.train.Features(feature={
        'image/height': dataset_util.int64_feature(height),
        'image/width': dataset_util.int64_feature(width),
        'image/filename': dataset_util.bytes_feature(filename),
        'image/source_id': dataset_util.bytes_feature(str(index).encode('utf-8')),
        'image/image_id': dataset_util.bytes_feature(str(index).encode('utf-8')),
        'image/encoded': dataset_util.bytes_feature(encoded_jpg),
        'image/format': dataset_util.bytes_feature(image_format),
        'image/object/bbox/xmin': dataset_util.float_list_feature(xmins),
        'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs),
        'image/object/bbox/ymin': dataset_util.float_list_feature(ymins),
        'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs),
        'image/object/class/text': dataset_util.bytes_list_feature(classes_text),
        'image/object/class/label': dataset_util.int64_list_feature(classes),
    }))
    return tf_example

In [11]:
writer = tf.io.TFRecordWriter(test_tfrecord_filename)
path = 'dataset/test'
grouped = split(test_data, 'fname')
for group in grouped:
    tf_example = create_tf_example(group, path)
    writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecord file: {}'.format(test_tfrecord_filename))

Successfully created the TFRecord file: ./data/test.tfrecord


In [12]:
writer = tf.io.TFRecordWriter(validate_tfrecord_filename)
path = 'dataset/validate'
grouped = split(val_data, 'fname')
for group in grouped:
    tf_example = create_tf_example(group, path)
    writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecord file: {}'.format(validate_tfrecord_filename))

Successfully created the TFRecord file: ./data/validate.tfrecord


In [13]:
writer = tf.io.TFRecordWriter(train_tfrecord_filename)
path = 'dataset/train'
grouped = split(train_data, 'fname')
for group in grouped:
    tf_example = create_tf_example(group, path)
    writer.write(tf_example.SerializeToString())
writer.close()
print('Successfully created the TFRecord file: {}'.format(train_tfrecord_filename))

Successfully created the TFRecord file: ./data/train.tfrecord


In [None]:
def create_label_map_pbtxt(json_obj, output_file):
    """Creates a label_map.pbtxt file from a JSON object.

    Args:
        json_obj: A JSON object containing the label map.
        output_file: The path to the output label_map.pbtxt file.
    """

    with open(output_file, "w") as f:
        for id, label in json_obj.items():
            f.write("item {\n")
            f.write("  id: {}\n".format(id))
            f.write("  name: '{}'\n".format(label["name"]))
            f.write("}\n")

if __name__ == "__main__":
    json_obj = {
            "1": {
                "id": 1,
                "name": "Corn Cercospora Leaf Spot"
            },
            "2": {
                "id": 2,
                "name": "Corn Common Rust"
            },
            "3": {
                "id": 3,
                "name": "Corn Healthy"
            },
            "4": {
                "id": 4,
                "name": "Corn Streak"
            },
            "5": {
                "id": 5,
                "name": "Corn Northern Leaf Blight"
            },
            "6": {
                "id": 6,
                "name": "Pepper Leaf Curl"
            },
            "7": {
                "id": 7,
                "name": "Pepper Cercospora"
            },
            "8": {
                "id": 8,
                "name": "Pepper Leaf Blight"
            },
            "9": {
                "id": 9,
                "name": "Pepper Bacterial Spot"
            },
            "10": {
                "id": 10,
                "name": "Pepper Leaf Mosaic"
            },
            "11": {
                "id": 11,
                "name": "Pepper Healthy"
            },
            "12": {
                "id": 12,
                "name": "Pepper Fusarium"
            },
            "13": {
                "id": 13,
                "name": "Pepper Septoria"
            },
            "14": {
                "id": 14,
                "name": "Pepper Late Blight"
            },
            "15": {
                "id": 15,
                "name": "Pepper Early Blight"
            },
            "16": {
                "id": 16,
                "name": "Tomato Late Blight"
            },
            "17": {
                "id": 17,
                "name": "Tomato Early Blight"
            },
            "18": {
                "id": 18,
                "name": "Tomato Bacterial Spot"
            },
            "19": {
                "id": 19,
                "name": "Tomato Septoria"
            },
            "20": {
                "id": 20,
                "name": "Tomato Fusarium"
            },
            "21": {
                "id": 21,
                "name": "Tomato Leaf Curl"
            },
            "22": {
                "id": 22,
                "name": "Tomato Healthy"
            },
            "23": {
                "id": 23,
                "name": "Tomato Mosaic"
            }
    }

    create_label_map_pbtxt(json_obj, "dataset/label_map.pbtxt")
