In [11]:
import glob
import os

In [None]:
# !pip install --upgrade -q git+https://github.com/keras-team/keras-cv

## Reference List

https://developer.nvidia.com/blog/fast-track-your-production-ai-with-pre-trained-models-and-tao-toolkit-3-0/
https://keras.io/examples/timeseries/timeseries_traffic_forecasting/
https://keras.io/guides/keras_cv/object_detection_keras_cv/

## GPU Checks
https://stackoverflow.com/questions/44544766/how-do-i-check-if-keras-is-using-gpu-version-of-tensorflow#44547144

In [1]:
import tensorflow as tf
print(tf.__version__)
tf.config.list_physical_devices('GPU')

2023-11-13 18:37:31.342365: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.13.0


2023-11-13 18:37:33.207474: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-13 18:37:33.331122: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-13 18:37:33.331614: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

## Previous TensorFlow Install Guide
https://www.tensorflow.org/install/pip

https://keras.io/examples/vision/yolov8/

In [4]:
import os
from tqdm.auto import tqdm
import xml.etree.ElementTree as ET

import tensorflow as tf
from tensorflow import keras

import keras_cv
from keras_cv import bounding_box
from keras_cv import visualization

Using TensorFlow backend


## Hyper Parameters

In [36]:
SPLIT_RATIO = 0.2
BATCH_SIZE = 4
LEARNING_RATE = 0.001
EPOCH = 5
GLOBAL_CLIPNORM = 10.0

In [None]:
# !pip install roboflow

In [None]:
# from roboflow import Roboflow
# rf = Roboflow(api_key="CfThL4bE8Z")
# project = rf.workspace("roboflow-gw7yv").project("self-driving-car")
# dataset = project.version(3).download("yolov8")

## Get Input Images and Annotations for the Yolo8 Model

In [13]:
class_ids = [
    "car",
    "pedestrian",
    "trafficLight",
    "biker",
    "truck",
]
class_mapping = dict(zip(range(len(class_ids)), class_ids))

# Path to images and annotations
path_images = "export/images/" #"/kaggle/input/dataset/data/images/"
path_annot = "export/labels/" #"/kaggle/input/dataset/data/annotations/"

# Get all XML file paths in path_annot and sort them
txt_files = sorted(
    [
        os.path.join(path_annot, file_name)
        for file_name in os.listdir(path_annot)
        if file_name.endswith(".txt")
    ]
)

# Get all JPEG image file paths in path_images and sort them
jpg_files = sorted(
    [
        os.path.join(path_images, file_name)
        for file_name in os.listdir(path_images)
        if file_name.endswith(".jpg")
    ]
)


In [14]:
label_files = glob.glob(f"{path_annot}*")
print(len(label_files))
label_files[:5]

29800


['export/labels/1478896928938416423_jpg.rf.kYj91j6BMt3KIYihOFLT.txt',
 'export/labels/1478900095024715037_jpg.rf.bf962ff644f95d0c82417d01eceecb86.txt',
 'export/labels/1478899896786707985_jpg.rf.RkD49wNL5ucdyWvu3aqY.txt',
 'export/labels/1478020604203617302_jpg.rf.ed3d14d0eff881495e9d584862382013.txt',
 'export/labels/1478896849527318206_jpg.rf.3e946a060fbf6c6b9bc5d1b288026436.txt']

In [16]:
print(f"Number Annotated (xml) files: {len(txt_files):,}")
print(f"Number of Image (jpg) files: {len(jpg_files):,}")

Number Annotated (xml) files: 29,800
Number of Image (jpg) files: 29,800


In [21]:
jpg_files[:5]

['export/images/1478019952686311006_jpg.rf.54e2d12dbabc46be3c78995b6eaf3fee.jpg',
 'export/images/1478019952686311006_jpg.rf.JLSB3LP2Q4RuGHYKqfF6.jpg',
 'export/images/1478019953180167674_jpg.rf.8a816c9d7e9b423a63ed6ecd4a663e47.jpg',
 'export/images/1478019953180167674_jpg.rf.azslsZnM8FLQPu3QWLTl.jpg',
 'export/images/1478019953689774621_jpg.rf.2e4b7ae29c3379da1282e85cff4c1745.jpg']

In [24]:
import numpy as np
import re

In [28]:
#Finding a Files Pair....
txt_file = np.random.choice(txt_files)
jpg_pair = txt_file.replace(".txt",".jpg").replace('/labels/','/images/')
[f for f in jpg_files if f == jpg_pair]

['export/images/1478899691122801774_jpg.rf.721a2baae483c8999be1c2303d3bfc55.jpg']

## Preview an Annotation File

> My Download from RoboFlow: https://public.roboflow.com/object-detection/self-driving-car/3#
produced txt files instead of XML files. Adapting code accordingly.

In [31]:
txt_file = txt_files[0]
with open(txt_file, "r") as f:
    for line in f.readlines():
        class_, xmin, xmax, ymin, ymax = [float(x) for x in line.split()]
        print(class_, xmin, xmax, ymin, ymax)

10.0 0.43359375 0.48828125 0.0166015625 0.0283203125
1.0 0.458984375 0.494140625 0.0244140625 0.0263671875
1.0 0.5087890625 0.4970703125 0.0283203125 0.0380859375
2.0 0.9287109375 0.5107421875 0.0361328125 0.21875


**Comment:**
> Appears to be of the form:  
category, bounding box x?min x?max y?min y?max

In [32]:
def parse_annotation(txt_file):
    jpg_pair = txt_file.replace(".txt",".jpg").replace('/labels/','/images/')
    classes = []
    boxes = []
    with open(txt_file, "r") as f:
        for line in f.readlines():
            cls, xmin, xmax, ymin, ymax = [float(x) for x in line.split()]
            cls = int(cls)
            classes.append(cls)
            boxes.append([xmin, ymin, xmax, ymax])
    return jpg_pair, boxes, classes

In [33]:
# def parse_annotation(xml_file):
#     tree = ET.parse(xml_file)
#     root = tree.getroot()

#     image_name = root.find("filename").text
#     image_path = os.path.join(path_images, image_name)

#     boxes = []
#     classes = []
#     for obj in root.iter("object"):
#         cls = obj.find("name").text
#         classes.append(cls)

#         bbox = obj.find("bndbox")
#         xmin = float(bbox.find("xmin").text)
#         ymin = float(bbox.find("ymin").text)
#         xmax = float(bbox.find("xmax").text)
#         ymax = float(bbox.find("ymax").text)
#         boxes.append([xmin, ymin, xmax, ymax])

#     class_ids = [
#         list(class_mapping.keys())[list(class_mapping.values()).index(cls)]
#         for cls in classes
#     ]
#     return image_path, boxes, class_ids


image_paths = []
bbox = []
classes = []
for txt_file in tqdm(txt_files):
    image_path, boxes, class_ids = parse_annotation(txt_file)
    image_paths.append(image_path)
    bbox.append(boxes)
    classes.append(class_ids)
print(f"Number of image paths: {len(image_paths):,}")

  0%|          | 0/29800 [00:00<?, ?it/s]

Number of image paths: 29,800


Here we are using tf.ragged.constant to create ragged tensors from the bbox and classes lists. A ragged tensor is a type of tensor that can handle varying lengths of data along one or more dimensions. This is useful when dealing with data that has variable-length sequences, such as text or time series data.

```python
classes = [
    [8, 8, 8, 8, 8],      # 5 classes
    [12, 14, 14, 14],     # 4 classes
    [1],                  # 1 class
    [7, 7],               # 2 classes
 ...]

bbox = [
    [[199.0, 19.0, 390.0, 401.0],
    [217.0, 15.0, 270.0, 157.0],
    [393.0, 18.0, 432.0, 162.0],
    [1.0, 15.0, 226.0, 276.0],
    [19.0, 95.0, 458.0, 443.0]],     #image 1 has 4 objects
    [[52.0, 117.0, 109.0, 177.0]],   #image 2 has 1 object
    [[88.0, 87.0, 235.0, 322.0],
    [113.0, 117.0, 218.0, 471.0]],   #image 3 has 2 objects
 ...]
```

In this case, the bbox and classes lists have different lengths for each image, depending on the number of objects in the image and the corresponding bounding boxes and classes. To handle this variability, ragged tensors are used instead of regular tensors.

Later, these ragged tensors are used to create a tf.data.Dataset using the from_tensor_slices method. This method creates a dataset from the input tensors by slicing them along the first dimension. By using ragged tensors, the dataset can handle varying lengths of data for each image and provide a flexible input pipeline for further processing.

In [34]:
bbox = tf.ragged.constant(bbox)
classes = tf.ragged.constant(classes)
image_paths = tf.ragged.constant(image_paths)

data = tf.data.Dataset.from_tensor_slices((image_paths, classes, bbox))


2023-11-20 20:47:19.429457: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-20 20:47:19.430048: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-11-20 20:47:19.430370: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

### Split INto Training and Validation Sets

In [37]:
# Determine the number of validation samples
num_val = int(len(xml_files) * SPLIT_RATIO)

# Split the dataset into train and validation sets
val_data = data.take(num_val)
train_data = data.skip(num_val)