# Import Libraries

In [31]:
import os
import numpy as np
import tensorflow as tf
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.layers import Input, Dense, Reshape, GlobalAveragePooling2D, Conv2D, UpSampling2D, Concatenate
from tensorflow.keras.models import Model
from tensorflow.keras.losses import binary_crossentropy

# Configuration

In [15]:
IMG_SIZE = 320
BATCH_SIZE = 8
EPOCHS = 50
MAX_BOXES = 10

# Directory paths

In [4]:
base_dir = "Object-detection-dataset"
train_dir = os.path.join(base_dir, "train")
valid_dir = os.path.join(base_dir, "valid")

# Extract Class Labels

In [32]:
def extract_labels(directory):
    labels = set()
    for file in os.listdir(directory):
        if file.endswith(".xml"):
            tree = ET.parse(os.path.join(directory, file))
            for obj in tree.findall("object"):
                labels.add(obj.find("name").text)
    return sorted(list(labels))

label_list = extract_labels(train_dir)
label_map = {name: idx for idx, name in enumerate(label_list)}
NUM_CLASSES = len(label_list)
print("Detected Classes:", label_map)

Detected Classes: {'aeroplane': 0, 'apple': 1, 'backpack': 2, 'banana': 3, 'baseball bat': 4, 'baseball glove': 5, 'bear': 6, 'bed': 7, 'bench': 8, 'bicycle': 9, 'bird': 10, 'boat': 11, 'book': 12, 'bottle': 13, 'bowl': 14, 'broccoli': 15, 'bus': 16, 'cake': 17, 'car': 18, 'carrot': 19, 'cat': 20, 'cell phone': 21, 'chair': 22, 'clock': 23, 'cow': 24, 'cup': 25, 'diningtable': 26, 'dog': 27, 'donut': 28, 'elephant': 29, 'fire hydrant': 30, 'fork': 31, 'frisbee': 32, 'giraffe': 33, 'hair drier': 34, 'handbag': 35, 'horse': 36, 'hot dog': 37, 'keyboard': 38, 'kite': 39, 'knife': 40, 'laptop': 41, 'microwave': 42, 'motorbike': 43, 'mouse': 44, 'orange': 45, 'oven': 46, 'parking meter': 47, 'person': 48, 'pizza': 49, 'pottedplant': 50, 'refrigerator': 51, 'remote': 52, 'sandwich': 53, 'scissors': 54, 'sheep': 55, 'sink': 56, 'skateboard': 57, 'skis': 58, 'snowboard': 59, 'sofa': 60, 'spoon': 61, 'sports ball': 62, 'stop sign': 63, 'suitcase': 64, 'surfboard': 65, 'teddy bear': 66, 'tennis 

# Parse Pascal VOC Annotations

In [33]:
def parse_annotation(xml_path):
    tree = ET.parse(xml_path)
    root = tree.getroot()
    boxes, classes = [], []
    for obj in root.findall('object'):
        cls = obj.find('name').text
        bbox = obj.find('bndbox')
        xmin = float(bbox.find('xmin').text)
        ymin = float(bbox.find('ymin').text)
        xmax = float(bbox.find('xmax').text)
        ymax = float(bbox.find('ymax').text)
        boxes.append([xmin, ymin, xmax, ymax])
        classes.append(label_map[cls])
    return boxes, classes

# Load Dataset and Apply Padding

In [34]:
def load_dataset(image_dir):
    image_paths, box_list, class_list = [], [], []
    for fname in os.listdir(image_dir):
        if fname.endswith(".jpg") or fname.endswith(".png"):
            xml = os.path.join(image_dir, os.path.splitext(fname)[0] + ".xml")
            if os.path.exists(xml):
                boxes, labels = parse_annotation(xml)
                image_paths.append(os.path.join(image_dir, fname))
                box_list.append(boxes)
                class_list.append(labels)
    return image_paths, box_list, class_list

train_imgs, train_boxes, train_labels = load_dataset(train_dir)
val_imgs, val_boxes, val_labels = load_dataset(valid_dir)

# Pad Boxes and Labels

In [35]:
def pad_boxes_list(boxes_list):
    padded = []
    for boxes in boxes_list:
        b = np.array(boxes, dtype=np.float32)
        if b.ndim != 2 or b.shape[0] == 0:
            b = np.zeros((MAX_BOXES, 4), dtype=np.float32)
        elif len(b) > MAX_BOXES:
            b = b[:MAX_BOXES]
        else:
            pad_len = MAX_BOXES - len(b)
            b = np.pad(b, [(0, pad_len), (0, 0)], mode='constant')
        padded.append(b)
    return np.array(padded)

def pad_labels_list(labels_list, num_classes):
    padded = []
    for labels in labels_list:
        if len(labels) == 0:
            one_hots = np.zeros((MAX_BOXES, num_classes), dtype=np.float32)
        else:
            one_hots = np.eye(num_classes)[labels]
            if len(one_hots) > MAX_BOXES:
                one_hots = one_hots[:MAX_BOXES]
            else:
                pad_len = MAX_BOXES - len(one_hots)
                one_hots = np.pad(one_hots, [(0, pad_len), (0, 0)], mode='constant')
        padded.append(one_hots)
    return np.array(padded)

padded_train_boxes = pad_boxes_list(train_boxes)
padded_train_labels = pad_labels_list(train_labels, NUM_CLASSES)

padded_val_boxes = pad_boxes_list(val_boxes)
padded_val_labels = pad_labels_list(val_labels, NUM_CLASSES)

# Create tf.data.Dataset

In [36]:
def preprocess(img_path, boxes, labels):
    image = tf.io.read_file(img_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
    image = tf.cast(image, tf.float32) / 255.0
    boxes = boxes / [640.0, 480.0, 640.0, 480.0]  # normalize
    return image, (boxes, labels)

train_ds = tf.data.Dataset.from_tensor_slices((train_imgs, padded_train_boxes, padded_train_labels))
train_ds = train_ds.map(preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices((val_imgs, padded_val_boxes, padded_val_labels))
val_ds = val_ds.map(preprocess).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# Build EfficientNet-based Detector

In [37]:
def build_detector():
    base = EfficientNetB0(input_shape=(IMG_SIZE, IMG_SIZE, 3), include_top=False, weights="imagenet")
    base.trainable = True

    # Extract features from intermediate layers for FPN
    c3 = base.get_layer("block4a_expand_activation").output
    c4 = base.get_layer("block6a_expand_activation").output
    c5 = base.output

    p5 = Conv2D(128, 1, padding="same")(c5)
    p4 = UpSampling2D()(p5)
    p4 = Concatenate()([p4, c4])
    p4 = Conv2D(128, 3, padding="same", activation="relu")(p4)

    p3 = UpSampling2D()(p4)
    p3 = Concatenate()([p3, c3])
    p3 = Conv2D(128, 3, padding="same", activation="relu")(p3)

    x = GlobalAveragePooling2D()(p3)
    bbox_out = Dense(MAX_BOXES * 4, activation="sigmoid", name="bbox")(x)
    cls_out = Dense(MAX_BOXES * NUM_CLASSES, activation="sigmoid", name="class")(x)
    bbox_out = Reshape((MAX_BOXES, 4))(bbox_out)
    cls_out = Reshape((MAX_BOXES, NUM_CLASSES))(cls_out)

    return Model(inputs=base.input, outputs=[bbox_out, cls_out])

model = build_detector()
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_2 (InputLayer)           [(None, 320, 320, 3  0           []                               
                                )]                                                                
                                                                                                  
 rescaling_1 (Rescaling)        (None, 320, 320, 3)  0           ['input_2[0][0]']                
                                                                                                  
 normalization_1 (Normalization  (None, 320, 320, 3)  7          ['rescaling_1[0][0]']            
 )                                                                                                
                                                                                            

# Loss Functions

In [38]:
def ciou_loss(y_true, y_pred):
    return tf.reduce_mean(tf.square(y_true - y_pred))

def focal_loss(y_true, y_pred, gamma=2.0, alpha=0.25):
    bce = binary_crossentropy(y_true, y_pred)
    pt = tf.exp(-bce)
    return tf.reduce_mean(alpha * (1 - pt) ** gamma * bce)

def total_loss(y_true, y_pred):
    return ciou_loss(y_true[0], y_pred[0]) + focal_loss(y_true[1], y_pred[1])

# Training

In [None]:
model.compile(optimizer=tf.keras.optimizers.Adam(1e-4), loss=total_loss)
history = model.fit(train_ds, validation_data=val_ds, epochs=EPOCHS)

# Plot Loss

In [None]:
plt.plot(history.history['loss'], label='Loss')
plt.title("Training Loss")
plt.xlabel("Epochs")
plt.ylabel("Loss")
plt.legend()
plt.grid()
plt.show()

# Export TFLite (float16)

In [None]:
model.save("efficientnet_detector")
converter = tf.lite.TFLiteConverter.from_saved_model("efficientnet_detector")
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_model = converter.convert()
with open("detector_float16.tflite", "wb") as f:
    f.write(tflite_model)
print("\u2705 Exported float16 TFLite model for Jetson Nano.")