Convert Dataset to tfrecord

In [8]:
import tensorflow as tf
import os

# Step 1: Define a function that takes an image file name and a mask file name as inputs, and returns a serialized example object
def create_example(image_file, mask_file):
  # Read the image and mask files as bytes
  image_data = tf.io.read_file(image_file)
  mask_data = tf.io.read_file(mask_file)

  # Decode the image and mask data as tensors
  image = tf.image.decode_jpeg(image_data)
  mask = tf.image.decode_png(mask_data)

  # Get the height, width and channels of the image and mask
  height = image.shape[0]
  width = image.shape[1]
  channels = image.shape[2]

  # Create a feature dictionary that contains the encoded image data, the encoded mask data, and some other features
  feature = {
    'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_data.numpy()])),
    'mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[mask_data.numpy()])),
    'height': tf.train.Feature(int64_list=tf.train.Int64List(value=[height])),
    'width': tf.train.Feature(int64_list=tf.train.Int64List(value=[width])),
    'channels': tf.train.Feature(int64_list=tf.train.Int64List(value=[channels]))
  }

  # Create an example object from the feature dictionary
  example = tf.train.Example(features=tf.train.Features(feature=feature))

  # Serialize the example object to a string
  return example.SerializeToString()

# Step 2: Create a TFRecordWriter object that can write examples to a TFRecord file
tfrecord_file = 'dataset.tfrecord'
writer = tf.io.TFRecordWriter(tfrecord_file)

# Step 3: Loop over your images and masks directories, and for each pair of files, call the function from step 1 and write the result to the TFRecord file using the writer from step 
images_dir = 'LabeledTomatoLeaves/images'
masks_dir = 'LabeledTomatoLeaves/labels'

for image_file in os.listdir(images_dir):
  
   # Get the corresponding mask file name by replacing .jpg with .png
    mask_file = ""
    print(image_file)
    if image_file.endswith('.jpg'):
        mask_file = image_file.replace('.jpg', '.png')
    elif image_file.endswith('.JPG'):
        mask_file = image_file.replace('.JPG', '.png')
    elif image_file.endswith('.jpeg'):
        mask_file = image_file.replace('.jpeg', '.png')
    else:
        continue

    print(mask_file)
      
    #    mask_file = image_file.replace('.JPG', '.png')

    # Get the full paths of the image and mask files
    image_path = os.path.join(images_dir, image_file)
    mask_path = os.path.join(masks_dir, mask_file)

    # Call the function from step 1 to create a serialized example object
    example = create_example(image_path, mask_path)

    # Write the example object to the TFRecord file using the writer from step 
    writer.write(example)

# Step : Close the writer when done
writer.close()

image (222).JPG
image (222).png
image (26).JPG
image (26).png
tomatoes_002_full_width.jpg
tomatoes_002_full_width.png
image (5).JPG
image (5).png
Tomato-diseases-prevention-treatment--Early-blight-_jpg.rf.218d855be5666cc2019881969d402d66.jpg
Tomato-diseases-prevention-treatment--Early-blight-_jpg.rf.218d855be5666cc2019881969d402d66.png
image (218).JPG
image (218).png
image (10).JPG
image (10).png
h80_hight.jpg
h80_hight.png
bacterialspot1.jpg
bacterialspot1.png
tomato-early-blight-1_jpg.rf.8b480156e5d50b37f7de7031d6663c32.jpg
tomato-early-blight-1_jpg.rf.8b480156e5d50b37f7de7031d6663c32.png
image (9).JPG
image (9).png
tomato-early-blightH_jpg.rf.e30fcdfe9db62ad8e8c774e36002455d.jpg
tomato-early-blightH_jpg.rf.e30fcdfe9db62ad8e8c774e36002455d.png
image (8).JPG
image (8).png
.DS_Store
IMG_4153.jpeg
IMG_4153.png
9AF14E8B-4CD0-4E06-9D10-EB406F9E5927.JPG
9AF14E8B-4CD0-4E06-9D10-EB406F9E5927.png
h38_hight.jpg
h38_hight.png
image (11).JPG
image (11).png
earlyblight-trinidad.jpg
earlyblight-tr

Open tfrecord to verify it was saved

In [12]:
# Step 1: Create a TFRecordDataset object that can read examples from a TFRecord file
tfrecord_file = 'dataset.tfrecord'
dataset = tf.data.TFRecordDataset(tfrecord_file)

# Step 2: Define a function that takes a serialized example object as input, and parses it using tf.io.parse_single_example with the same feature dictionary that you used to create the example object
def parse_example(example):
  # Define the feature dictionary
  feature = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'mask': tf.io.FixedLenFeature([], tf.string),
    'height': tf.io.FixedLenFeature([], tf.int64),
    'width': tf.io.FixedLenFeature([], tf.int64),
    'channels': tf.io.FixedLenFeature([], tf.int64)
  }

  # Parse the example using the feature dictionary
  parsed_example = tf.io.parse_single_example(example, feature)

  # Decode the image and mask data as tensors
  image = tf.image.decode_jpeg(parsed_example['image'])
  mask = tf.image.decode_png(parsed_example['mask'])

  # Return the image and mask tensors
  return image, mask

# Step : Map the function from step to the dataset from step using tf.data.Dataset.map method
dataset = dataset.map(parse_example)

# Step : Iterate over the dataset using tf.data.Dataset.make_one_shot_iterator or tf.data.Dataset.as_numpy_iterator methods
# iterator = dataset.make_one_shot_iterator()
# for image, mask in iterator:
#    # Do something with image and mask tensors
#    print(image)

# Alternatively, you can use as_numpy_iterator method to get numpy arrays instead of tensors
for image, mask in dataset.as_numpy_iterator():
  print(image[:5], mask[:5])

[[[103  92  98]
  [114 103 109]
  [112 101 107]
  ...
  [119 108 112]
  [128 117 121]
  [154 143 147]]

 [[137 126 132]
  [128 117 123]
  [110  99 105]
  ...
  [143 132 136]
  [152 141 145]
  [154 143 147]]

 [[130 119 125]
  [126 115 121]
  [112 101 107]
  ...
  [148 137 141]
  [155 144 148]
  [148 137 141]]

 [[113 102 108]
  [113 102 108]
  [107  96 102]
  ...
  [138 127 131]
  [139 128 132]
  [142 131 135]]

 [[120 109 113]
  [108  97 101]
  [ 98  87  91]
  ...
  [143 132 136]
  [140 129 133]
  [141 130 134]]] [[[  0   0   0]
  [106  61 154]
  [106  61 154]
  ...
  [106  61 154]
  [106  61 154]
  [106  61 154]]

 [[106  61 154]
  [106  61 154]
  [106  61 154]
  ...
  [106  61 154]
  [106  61 154]
  [106  61 154]]

 [[106  61 154]
  [106  61 154]
  [106  61 154]
  ...
  [106  61 154]
  [106  61 154]
  [106  61 154]]

 [[106  61 154]
  [106  61 154]
  [106  61 154]
  ...
  [106  61 154]
  [106  61 154]
  [106  61 154]]

 [[106  61 154]
  [106  61 154]
  [106  61 154]
  ...
  [106  61