In [1]:
import os
# os.environ["KERAS_BACKEND"] = "TensorFlow"

from tqdm.auto import tqdm # show progress bars

In [2]:
import tensorflow as tf
from tensorflow import keras
import keras_cv

from keras_cv import bounding_box
from keras_cv import visualization

Using TensorFlow backend


In [3]:
# Hyperparameters
SPLIT_RATIO = 0.2
BATCH_SIZE = 4
LEARNING_RATE = 0.001
EPOCH = 5
GLOBAL_CLIPNORM = 10.0

In [4]:
# Creating a dictionary for the classes
class_ids = [
    "car",
    "pedestrian",
    "trafficLight",
    "biker",
    "truck",
]

class_mapping = dict(zip(range(len(class_ids)), class_ids))
class_mapping

{0: 'car', 1: 'pedestrian', 2: 'trafficLight', 3: 'biker', 4: 'truck'}

In [11]:
# Path to images and annotations
path_images = "../data/example_data/images"
path_annot = "../data/example_data/labels"

In [49]:
# Get all XML file paths in path_annot and sort them
txt_files = sorted(
    [
        os.path.join(path_annot, file_name)
        for file_name in os.listdir(path_annot)
        if file_name.endswith(".txt")
    ]
)
txt_files = txt_files[:2000]

In [50]:
txt_files[0]

'../data/example_data/labels/1478019952686311006_jpg.rf.54e2d12dbabc46be3c78995b6eaf3fee.txt'

In [51]:
def parse_annotation(txt_file):
    with open(txt_file) as file:
        lines = file.readlines()
        file_name = file.name

    image_path = os.path.join(path_images, file_name)
    boxes = []
    class_ids = []
    for line in lines:
        line = line.split()

        cls = int(line[0])
        class_ids.append(cls)

        xmin = float(line[1])
        ymin = float(line[2])
        xmax = float(line[3])
        ymax = float(line[4])

        boxes.append([xmin, ymin, xmax, ymax])
    return image_path, boxes, class_ids

In [52]:
image_paths = []
bbox = []
classes = []
for txt_file in txt_files:
    image_path, boxes, class_ids = parse_annotation(txt_file)
    image_paths.append(image_path)
    bbox.append(boxes)
    classes.append(class_ids)

In [53]:
classes[:4]

[[10, 1, 1, 2], [10, 1, 1, 2], [10, 1, 10, 1], [10, 1, 10, 1]]

In [54]:
bbox[:4]

[[[0.43359375, 0.48828125, 0.0166015625, 0.0283203125],
  [0.458984375, 0.494140625, 0.0244140625, 0.0263671875],
  [0.5087890625, 0.4970703125, 0.0283203125, 0.0380859375],
  [0.9287109375, 0.5107421875, 0.0361328125, 0.21875]],
 [[0.43359375, 0.48833333333333334, 0.01614583333333333, 0.028333333333333335],
  [0.45859374999999997,
   0.4941666666666667,
   0.024479166666666666,
   0.02666666666666667],
  [0.5088541666666666, 0.49750000000000005, 0.028125, 0.03833333333333334],
  [0.9286458333333333,
   0.5108333333333334,
   0.036458333333333336,
   0.21833333333333335]],
 [[0.39453125, 0.49609375, 0.017578125, 0.0302734375],
  [0.4130859375, 0.5029296875, 0.0224609375, 0.025390625],
  [0.8125, 0.46875, 0.1123046875, 0.0849609375],
  [0.4677734375, 0.5078125, 0.0283203125, 0.0380859375]],
 [[0.39479166666666665,
   0.49583333333333335,
   0.017708333333333333,
   0.030000000000000002],
  [0.41328125, 0.5029166666666667, 0.022395833333333334, 0.025833333333333337],
  [0.812239583333333

In [55]:
# creating ragged tensors because not the number of objects varies
# from image to image
bbox = tf.ragged.constant(bbox)
classes = tf.ragged.constant(classes)
image_paths = tf.ragged.constant(image_paths)

data = tf.data.Dataset.from_tensor_slices((image_paths, classes, bbox))

In [56]:
# Splitting data
# Determine number of validation data
num_val = int(len(txt_files) * SPLIT_RATIO)

# split into train and validation
# TODO change into random split via train_test_split
val_data = data.take(num_val)
train_data = data.skip(num_val)