In [None]:
### Import packages ###

import os
import shutil
import datetime

import tensorflow as tf
from tensorflow import keras
from keras import optimizers
import tensorflow_model_optimization as tfmot

import keras_cv
from keras_cv import utils

from IPython import display
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt

import mlflow
import mlflow.keras
import yaml
import cv2



# TensorBoard notebook extension
%load_ext tensorboard

print("TF version:", tf.__version__)
print("GPU is", "available" if tf.config.list_physical_devices('GPU') else "NOT AVAILABLE")

In [None]:
def parse_label_file(label_file):
    with open(label_file, 'r') as f:
        lines = f.readlines()
    labels = []
    for line in lines:
        if line.strip():  # Ignore empty lines
            class_id, x_center, y_center, width, height = map(float, line.strip().split())
            labels.append([class_id, x_center, y_center, width, height])
    return np.array(labels)

In [None]:
def load_dataset(images_dir, labels_dir):
    images = []
    bboxes = []
    class_ids = []
    for img_file in os.listdir(images_dir):
        if img_file.endswith('.jpg'):
            img_path = os.path.join(images_dir, img_file)
            label_path = os.path.join(labels_dir, img_file.replace('.jpg', '.txt'))
            
            image = cv2.imread(img_path)
            image = cv2.resize(image, (INPUT_SIZE, INPUT_SIZE))
            image = image / 255.0
            images.append(image)
            
            labels = parse_label_file(label_path)
            if labels.size > 0:
                bbox = labels[:, 1:]
                class_id = labels[:, 0]
            else:
                bbox = np.zeros((0, 4))
                class_id = np.zeros((0,))
            
            bboxes.append(bbox)
            class_ids.append(class_id)
    
    images = np.array(images)
    bboxes = [np.array(b) for b in bboxes]
    class_ids = [np.array(c) for c in class_ids]
    
    return images, bboxes, class_ids

In [None]:
train_images, train_bboxes, train_class_ids = load_dataset('../yolo_data_v2/train/images', '../yolo_data_v2/train/labels')
val_images, val_bboxes, val_class_ids = load_dataset('../yolo_data_v2/val/images', '../yolo_data_v2/val/labels')

In [None]:
def create_tf_dataset(images, bboxes, class_ids):
    def generator():
        for img, bbox, cls_id in zip(images, bboxes, class_ids):
            yield img, bbox, cls_id
    
    output_signature = (
        tf.TensorSpec(shape=(INPUT_SIZE, INPUT_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 4), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.float32),
    )
    
    dataset = tf.data.Dataset.from_generator(generator, output_signature=output_signature)
    dataset = dataset.padded_batch(
        BATCH_SIZE,
        padded_shapes=(
            tf.TensorShape([INPUT_SIZE, INPUT_SIZE, 3]),
            tf.TensorShape([None, 4]),
            tf.TensorShape([None])
        ),
        padding_values=(
            tf.constant(0, dtype=tf.float32),
            tf.constant(0, dtype=tf.float32),
            tf.constant(-1, dtype=tf.float32)  # Use -1 for class_id padding
        )
    )
    dataset = dataset.prefetch(tf.data.AUTOTUNE)
    return dataset

In [None]:
train_ds = create_tf_dataset(train_images, train_bboxes, train_class_ids)
val_ds = create_tf_dataset(val_images, val_bboxes, val_class_ids)

In [None]:
import random
import matplotlib.pyplot as plt


In [None]:
def visualize_dataset(dataset, num_images=5):
    for images, bboxes, class_ids in dataset.take(1):
        images = images.numpy()
        bboxes = bboxes.numpy()
        class_ids = class_ids.numpy()

        for i in range(num_images):
            image = images[i]
            bbox = bboxes[i]
            class_id = class_ids[i]

            fig, ax = plt.subplots(1)
            ax.imshow(image)

            for j in range(len(bbox)):
                if class_id[j] == -1:  # Ignore padding
                    continue
                x_center, y_center, width, height = bbox[j]
                x_center *= INPUT_SIZE
                y_center *= INPUT_SIZE
                width *= INPUT_SIZE
                height *= INPUT_SIZE

                x1 = x_center - width / 2
                y1 = y_center - height / 2
                rect = plt.Rectangle((x1, y1), width, height, linewidth=1, edgecolor='r', facecolor='none')
                ax.add_patch(rect)
                plt.text(x1, y1, f'Class {int(class_id[j])}', color='white', bbox=dict(facecolor='red', alpha=0.5))

            plt.show()

In [None]:
visualize_dataset(train_ds, num_images=10)

In [24]:
from tensorflow.keras import layers, models

def build_model(input_size, num_classes):
    inputs = tf.keras.Input(shape=(input_size, input_size, 3))
    
    # Feature extractor
    x = layers.Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    x = layers.Conv2D(512, (3, 3), activation='relu', padding='same')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    
    # Flatten and add dense layers
    x = layers.Flatten()(x)
    x = layers.Dense(1024, activation='relu')(x)
    
    # Bounding box predictions (4 coordinates: x, y, width, height)
    bbox_output = layers.Dense(4, name='bbox_output')(x)
    
    # Class predictions
    class_output = layers.Dense(num_classes, activation='sigmoid', name='class_output')(x)
    
    model = models.Model(inputs=inputs, outputs=[bbox_output, class_output])
    return model

model = build_model(INPUT_SIZE, NUM_CLASSES)
model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 250, 250, 3  0           []                               
                                )]                                                                
                                                                                                  
 conv2d (Conv2D)                (None, 250, 250, 32  896         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 max_pooling2d (MaxPooling2D)   (None, 125, 125, 32  0           ['conv2d[0][0]']                 
                                )                                                             

In [26]:
model.compile(
    optimizer='adam',
    loss={
        'bbox_output': 'mean_squared_error',
        'class_output': 'binary_crossentropy'  # Use binary_crossentropy for multi-label classification
    },
    metrics={
        'bbox_output': 'mean_squared_error',
        'class_output': 'accuracy'
    }
)



In [27]:
history = model.fit(
    train_ds,
    epochs=EPOCHS,
    validation_data=val_ds,
)

Epoch 1/50


ValueError: in user code:

    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/engine/training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/engine/training.py", line 1051, in train_step
        loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/engine/training.py", line 1109, in compute_loss
        return self.compiled_loss(
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/engine/compile_utils.py", line 265, in __call__
        loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/losses.py", line 142, in __call__
        losses = call_fn(y_true, y_pred)
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/losses.py", line 268, in call  **
        return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/losses.py", line 2156, in binary_crossentropy
        backend.binary_crossentropy(y_true, y_pred, from_logits=from_logits),
    File "/home/anish/Work/Personal/Thesis/stacc_repo/Turtlebot-ObjectDetection-SW/.venv/lib/python3.9/site-packages/keras/backend.py", line 5707, in binary_crossentropy
        return tf.nn.sigmoid_cross_entropy_with_logits(

    ValueError: `logits` and `labels` must have the same shape, received ((None, 3) vs (None, None, 4)).
