In [2]:
pip install pycocotools-windows


Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement pycocotools-windows (from versions: none)
ERROR: No matching distribution found for pycocotools-windows

[notice] A new release of pip is available: 24.0 -> 25.0
[notice] To update, run: C:\Users\HP\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [4]:
import os
import xml.etree.ElementTree as ET  # not used in this example since we use COCO API
import numpy as np
import tensorflow as tf
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense
from tensorflow.keras.models import Model
from pycocotools.coco import COCO

In [11]:
IMG_HEIGHT, IMG_WIDTH = 224, 224      # Input image dimensions
BATCH_SIZE = 16
EPOCHS = 10

# Dataset paths (adjust these as needed)
DATASET_DIR = 'C:/xampp/htdocs/AI-ML_Kit/Datasets/VOC_Yolo'
TRAIN_DIR = os.path.join(DATASET_DIR, 'C:/xampp/htdocs/AI-ML_Kit/Datasets/VOC_Yolo/train')   # (Not used in this demo)
VALID_DIR = os.path.join(DATASET_DIR, 'C:/xampp/htdocs/AI-ML_Kit/Datasets/VOC_Yolo/valid')
ANNOT_FILE = os.path.join(VALID_DIR, 'C:/xampp/htdocs/AI-ML_Kit/Datasets/VOC_Yolo/valid/_annotations.coco.json')

In [12]:
# Initialize COCO for the validation dataset
# ========================
# Note: We assume the COCO annotations file is in VALID_DIR
coco = COCO(ANNOT_FILE)
# Get all image IDs from the COCO annotations
img_ids = coco.getImgIds()

# Build a category mapping to contiguous labels
cats = coco.loadCats(coco.getCatIds())
# Sort category IDs to ensure consistency
sorted_cat_ids = sorted([cat['id'] for cat in cats])
cat_id_to_idx = {cat_id: idx for idx, cat_id in enumerate(sorted_cat_ids)}
NUM_CLASSES = len(sorted_cat_ids)
print("Found {} categories.".format(NUM_CLASSES))

loading annotations into memory...
Done (t=0.13s)
creating index...
index created!
Found 21 categories.


In [14]:
# Data Generator using COCO API
# ========================
def load_data_generator():
    """
    A generator that yields (image, (label, bbox)) tuples.
    For simplicity, for each image we use only the first annotation found.
    The bounding box is normalized to the range [0, 1] based on original image size.
    """
    for img_id in img_ids:
        img_info = coco.loadImgs(img_id)[0]
        file_name = img_info['file_name']
        img_path = os.path.join(VALID_DIR, file_name)
        
        # Read and preprocess the image
        img_raw = tf.io.read_file(img_path)
        img = tf.image.decode_jpeg(img_raw, channels=3)
        # Save original dimensions for bbox normalization
        orig_w, orig_h = img_info['width'], img_info['height']
        img = tf.image.resize(img, (IMG_HEIGHT, IMG_WIDTH))
        img = img / 255.0  # Normalize pixel values to [0,1]
        
        # Load annotations for this image
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)
        if len(anns) == 0:
            continue  # skip images with no annotations
        
        # Use the first annotation only for this demo
        ann = anns[0]
        # COCO bounding box format is [x, y, width, height]
        bbox = ann['bbox']
        xmin, ymin, box_width, box_height = bbox
        xmax = xmin + box_width
        ymax = ymin + box_height
        # Normalize bbox coordinates relative to original image dimensions
        xmin_norm = xmin / orig_w
        ymin_norm = ymin / orig_h
        xmax_norm = xmax / orig_w
        ymax_norm = ymax / orig_h
        bbox_norm = [xmin_norm, ymin_norm, xmax_norm, ymax_norm]
        
        # Convert category_id to contiguous label index
        cat_id = ann['category_id']
        label = cat_id_to_idx.get(cat_id, -1)
        if label == -1:
            continue
        
        yield img, (np.int32(label), np.array(bbox_norm, dtype=np.float32))

# Define the output signature for the generator
output_signature = (
    tf.TensorSpec(shape=(IMG_HEIGHT, IMG_WIDTH, 3), dtype=tf.float32),
    (tf.TensorSpec(shape=(), dtype=tf.int32),
     tf.TensorSpec(shape=(4,), dtype=tf.float32))
)

In [15]:
dataset = tf.data.Dataset.from_generator(load_data_generator, output_signature=output_signature)
dataset = dataset.shuffle(1000)
dataset = dataset.batch(BATCH_SIZE)
dataset = dataset.prefetch(tf.data.AUTOTUNE)


In [16]:
# For demonstration, split dataset into training and validation sets (80/20)
total_samples = sum(1 for _ in dataset.unbatch())
train_count = int(0.8 * total_samples)
val_count = total_samples - train_count

# Unbatch and then split
dataset = dataset.unbatch()
dataset = dataset.shuffle(total_samples)
train_dataset = dataset.take(train_count).batch(BATCH_SIZE)
val_dataset = dataset.skip(train_count).batch(BATCH_SIZE)

print("Total samples: {}, Training: {}, Validation: {}".format(total_samples, train_count, val_count))

Total samples: 3302, Training: 2641, Validation: 661


In [17]:
base_model = MobileNetV2(input_shape=(IMG_HEIGHT, IMG_WIDTH, 3),
                           include_top=False,
                           weights='imagenet')
base_model.trainable = False

# Global pooling layer
x = GlobalAveragePooling2D()(base_model.output)
# Classification head: outputs probabilities for NUM_CLASSES categories
class_output = Dense(NUM_CLASSES, activation='softmax', name='class_output')(x)
# Bounding box regression head: outputs 4 values (normalized bbox coordinates)
# Use sigmoid to ensure output is in [0,1]
bbox_output = Dense(4, activation='sigmoid', name='bbox_output')(x)

model = Model(inputs=base_model.input, outputs=[class_output, bbox_output])
model.summary()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [18]:
model.compile(optimizer='adam',
              loss={'class_output': 'sparse_categorical_crossentropy',
                    'bbox_output': 'mse'},
              metrics={'class_output': 'accuracy',
                       'bbox_output': 'mae'})

# ========================
# Train the Model
# ========================
history = model.fit(train_dataset,
                    validation_data=val_dataset,
                    epochs=EPOCHS)

Epoch 1/10
    166/Unknown [1m111s[0m 415ms/step - bbox_output_loss: 0.0756 - bbox_output_mae: 0.2096 - class_output_accuracy: 0.4936 - class_output_loss: 1.8935 - loss: 1.9691



[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m161s[0m 718ms/step - bbox_output_loss: 0.0755 - bbox_output_mae: 0.2094 - class_output_accuracy: 0.4943 - class_output_loss: 1.8905 - loss: 1.9661 - val_bbox_output_loss: 0.0431 - val_bbox_output_mae: 0.1601 - val_class_output_accuracy: 0.7443 - val_class_output_loss: 0.8525 - val_loss: 0.9266
Epoch 2/10
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m110s[0m 526ms/step - bbox_output_loss: 0.0415 - bbox_output_mae: 0.1569 - class_output_accuracy: 0.7535 - class_output_loss: 0.8207 - loss: 0.8622 - val_bbox_output_loss: 0.0352 - val_bbox_output_mae: 0.1463 - val_class_output_accuracy: 0.7882 - val_class_output_loss: 0.6282 - val_loss: 0.6805
Epoch 3/10
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m104s[0m 530ms/step - bbox_output_loss: 0.0379 - bbox_output_mae: 0.1495 - class_output_accuracy: 0.8097 - class_output_loss: 0.6479 - loss: 0.6859 - val_bbox_output_loss: 0.0315 - val_bbox_output_mae: 0.1385 

In [19]:
model.save('object_detection_model.h5')
print("Model saved as object_detection_model.h5")



Model saved as object_detection_model.h5


In [20]:
print("Converting model to TensorFlow Lite format...")
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]  # Enable optimizations such as quantization
tflite_model = converter.convert()
tflite_filename = "object_detection_model.tflite"
with open(tflite_filename, "wb") as f:
    f.write(tflite_model)
print(f"Model converted and saved as {tflite_filename}")

Converting model to TensorFlow Lite format...
INFO:tensorflow:Assets written to: C:\Users\HP\AppData\Local\Temp\tmpfydvyfoc\assets


INFO:tensorflow:Assets written to: C:\Users\HP\AppData\Local\Temp\tmpfydvyfoc\assets


Saved artifact at 'C:\Users\HP\AppData\Local\Temp\tmpfydvyfoc'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32, name='keras_tensor')
Output Type:
  List[TensorSpec(shape=(None, 21), dtype=tf.float32, name=None), TensorSpec(shape=(None, 4), dtype=tf.float32, name=None)]
Captures:
  2294779372240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779373776: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779373584: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779372816: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779371664: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779373968: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779372624: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779372432: TensorSpec(shape=(), dtype=tf.resource, name=None)
  2294779373200: TensorSpec(shape=(), dtype=tf.resource, name=None)
  22947793745