# Importing Libraries

In [23]:
import numpy as np
import os
import tensorflow as tf
import xml.etree.ElementTree as ET
import matplotlib.pyplot as plt
import shutil
import re
import tarfile
import urllib.request
import cv2

from absl import logging

# Setting Up for TensorFlow 2.0

In [2]:
assert tf.__version__.startswith('2')

# Suppress TensorFlow and absl logs for cleaner output
tf.get_logger().setLevel('ERROR')
logging.set_verbosity(logging.ERROR)

# Dataset Preprocessing and Augmentation

### Directory paths

In [3]:
base_dir = r'Object-detection-dataset'
train_dir = 'Object-detection-dataset/train'
valid_dir = 'Object-detection-dataset/valid'

### Extracting Label Map

In [4]:
# Extract labels from XML files
def extract_labels(directory):
    labels = set()
    for filename in os.listdir(directory):
        if filename.endswith(".xml"):
            tree = ET.parse(os.path.join(directory, filename))
            root = tree.getroot()
            for obj in root.findall('object'):
                labels.add(obj.find('name').text)
    return labels

# Extract and merge all labels
train_labels = extract_labels(train_dir)
valid_labels = extract_labels(valid_dir)
all_labels = sorted(train_labels.union(valid_labels))

# Convert to the required label_map
label_map = {i + 1: name for i, name in enumerate(all_labels)}

# Display the labels
print("Labels used in your annotations:")
for label in all_labels:
    print(label)

Labels used in your annotations:
aeroplane
apple
backpack
banana
baseball bat
baseball glove
bear
bed
bench
bicycle
bird
boat
book
bottle
bowl
broccoli
bus
cake
car
carrot
cat
cell phone
chair
clock
cow
cup
diningtable
dog
donut
elephant
fire hydrant
fork
frisbee
giraffe
hair drier
handbag
horse
hot dog
keyboard
kite
knife
laptop
microwave
motorbike
mouse
orange
oven
parking meter
person
pizza
pottedplant
refrigerator
remote
sandwich
scissors
sheep
sink
skateboard
skis
snowboard
sofa
spoon
sports ball
stop sign
suitcase
surfboard
teddy bear
tennis racket
tie
toaster
toilet
toothbrush
traffic light
train
truck
tvmonitor
umbrella
vase
wine glass
zebra


### Data Augmentation

In [5]:
def augment_image(image, bbox):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, 0.9, 1.1)
    return image, bbox

### Apply and Save Augmented Images

In [13]:
aug_dir = os.path.join(base_dir, 'train_augmented')
os.makedirs(aug_dir, exist_ok=True)

for file in os.listdir(train_dir):
    if file.endswith('.jpg') or file.endswith('.png'):
        base_name = os.path.splitext(file)[0]
        img_path = os.path.join(train_dir, file)
        xml_path = os.path.join(train_dir, base_name + '.xml')

        if not os.path.exists(xml_path):
            continue  # Skip if annotation missing

        # Load and augment image
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels = 3)
        image = tf.image.resize(image, (320, 320))
        image = tf.cast(image, tf.uint8)

        aug_image, _ = augment_image(image, None)
        aug_image = tf.image.encode_jpeg(tf.cast(aug_image, tf.uint8))

        # Save augmented image
        aug_img_name = base_name + '_aug.jpg'
        aug_img_path = os.path.join(aug_dir, aug_img_name)
        tf.io.write_file(aug_img_path, aug_image)

        # Copy original XML with new name
        aug_xml_name = base_name + '_aug.xml'
        shutil.copy(xml_path, os.path.join(aug_dir, aug_xml_name))

### Combine original and augmented training data

In [7]:
train_combined_dir = os.path.join(base_dir, 'train_combined')
os.makedirs(train_combined_dir, exist_ok = True)

for src_dir in [train_dir, aug_dir]:
    for f in os.listdir(src_dir):
        shutil.copy(os.path.join(src_dir, f), os.path.join(train_combined_dir, f))

### Generate train.txt and val.txt file lists

In [25]:
train_list_path = 'annotations/train.txt'
val_list_path = 'annotations/val.txt'

# Generate train.txt from combined directory
train_files = [os.path.splitext(f)[0] for f in os.listdir(train_combined_dir) if f.endswith('.xml')]
with open(train_list_path, 'w') as f:
    for name in sorted(set(train_files)):
        f.write(f"{name}")

# Generate val.txt from validation directory
val_files = [os.path.splitext(f)[0] for f in os.listdir(valid_dir) if f.endswith('.xml')]
with open(val_list_path, 'w') as f:
    for name in sorted(set(val_files)):
        f.write(f"{name}")

### Convert XML annotations to TFRecord files

In [26]:
!python models/research/object_detection/dataset_tools/create_pascal_tf_record.py \
  --label_map_path=annotations/label_map.pbtxt \
  --data_dir=Object-detection-dataset \
  --output_path=annotations/train.record \
  --examples_path=annotations/train.txt

!python models/research/object_detection/dataset_tools/create_pascal_tf_record.py \
  --label_map_path=annotations/label_map.pbtxt \
  --data_dir=Object-detection-dataset \
  --output_path=annotations/val.record \
  --examples_path=annotations/val.txt
train_combined_dir = os.path.join(base_dir, 'train_combined')
os.makedirs(train_combined_dir, exist_ok=True)

for src_dir in [train_dir, aug_dir]:
    for f in os.listdir(src_dir):
        shutil.copy(os.path.join(src_dir, f), os.path.join(train_combined_dir, f))

2025-06-12 23:02:01.860115: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-06-12 23:02:01.860166: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
  File "C:\Users\fahad\Music\models\research\object_detection\dataset_tools\create_pascal_tf_record.py", line 37, in <module>
    from object_detection.utils import dataset_util
ModuleNotFoundError: No module named 'object_detection'
2025-06-12 23:02:06.943899: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-06-12 23:02:06.943934: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
  File "C:\Users\fahad

# Install TF2 Detection API

In [19]:
MODEL_NAME = 'ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8'
MODEL_DATE = '20200711'
MODEL_BASE = f'http://download.tensorflow.org/models/object_detection/tf2/{MODEL_DATE}'
MODEL_TAR = f'{MODEL_NAME}.tar.gz'

urllib.request.urlretrieve(f'{MODEL_BASE}/{MODEL_TAR}', MODEL_TAR)
with tarfile.open(MODEL_TAR) as tar:
    tar.extractall()

# Write Label Map File

In [20]:
label_map_path = 'annotations/label_map.pbtxt'
os.makedirs(os.path.dirname(label_map_path), exist_ok=True)

with open(label_map_path, 'w') as f:
    for id, name in label_map.items():
        f.write(f"item {{\n  id: {id}\n  name: '{name}'\n}}\n")

# Modify pipeline.config for Custom Training

In [21]:
pipeline_path = f'{MODEL_NAME}/pipeline.config'
new_pipeline_path = f'{MODEL_NAME}/custom_pipeline.config'

with open(pipeline_path, 'r') as f:
    config = f.read()

config = re.sub('fine_tune_checkpoint: ".*?"', f'fine_tune_checkpoint: "{MODEL_NAME}/checkpoint/ckpt-0"', config)
config = re.sub('label_map_path: ".*?"', 'label_map_path: "annotations/label_map.pbtxt"', config)
config = re.sub('input_path: ".*?train.*?"', 'input_path: "annotations/train.record"', config)
config = re.sub('input_path: ".*?val.*?"', 'input_path: "annotations/val.record"', config)
config = re.sub('num_classes: [0-9]+', f'num_classes: {len(label_map)}', config)
config = re.sub('batch_size: [0-9]+', 'batch_size: 8', config)
config = re.sub('num_steps: [0-9]+', 'num_steps: 1000', config)

with open(new_pipeline_path, 'w') as f:
    f.write(config)

# Training the Model Using TF2 Detection API

In [22]:
sys.path.append(os.path.abspath("models"))
sys.path.append(os.path.abspath("models/research"))

!python models/research/object_detection/model_main_tf2.py \
    --pipeline_config_path=ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/custom_pipeline.config \
    --model_dir=training/ \
    --alsologtostderr

2025-06-12 22:52:14.883321: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'cudart64_110.dll'; dlerror: cudart64_110.dll not found
2025-06-12 22:52:14.883374: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Traceback (most recent call last):
  File "C:\Users\fahad\Music\models\research\object_detection\model_main_tf2.py", line 31, in <module>
    from object_detection import model_lib_v2
ModuleNotFoundError: No module named 'object_detection'


# Export Final SavedModel for Deployment

In [None]:
!python models/research/object_detection/exporter_main_v2.py \
    --input_type image_tensor \
    --pipeline_config_path=ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/custom_pipeline.config \
    --trained_checkpoint_dir=training/ \
    --output_directory=exported_model/

# Load Exported Model and Run Inference

In [None]:
from object_detection.utils import label_map_util, visualization_utils as viz_utils

detect_fn = tf.saved_model.load('exported_model/saved_model')
category_index = label_map_util.create_category_index_from_labelmap('annotations/label_map.pbtxt')

img_path = 'path/to/test/image.jpg'
image_np = cv2.imread(img_path)
input_tensor = tf.convert_to_tensor(image_np[None, ...])
detections = detect_fn(input_tensor)

viz_utils.visualize_boxes_and_labels_on_image_array(
    image_np,
    detections['detection_boxes'][0].numpy(),
    detections['detection_classes'][0].numpy().astype(np.int32),
    detections['detection_scores'][0].numpy(),
    category_index,
    use_normalized_coordinates=True,
    max_boxes_to_draw=10,
    min_score_thresh=0.5
)

plt.figure(figsize=(10, 10))
plt.imshow(cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB))
plt.axis('off')
plt.show()
