# Importing Libraries

In [5]:
import os
import numpy as np
import tensorflow as tf
from object_detection.utils import dataset_util
import xml.etree.ElementTree as ET
from tensorflow.keras.optimizers.schedules import ExponentialDecay
import shutil

# Dataset Preprocessing and Augmentation

### Directory paths

In [6]:
base_dir = r'C:\Users\ACER\Jupyter_Notebook_Workplace\Object-detection-dataset'
train_dir = os.path.join(base_dir, 'train')
valid_dir = os.path.join(base_dir, 'valid')
output_label_map_path = os.path.join(base_dir, 'label_map.pbtxt')

### Extracting Label Map

In [7]:
# Extract labels from XML files
def extract_labels(directory):
    labels = set()
    for filename in os.listdir(directory):
        if filename.endswith(".xml"):
            tree = ET.parse(os.path.join(directory, filename))
            root = tree.getroot()
            for obj in root.findall('object'):
                labels.add(obj.find('name').text.strip())
    return labels

# Extract and merge all labels
train_labels = extract_labels(train_dir)
valid_labels = extract_labels(valid_dir)
all_labels = sorted(train_labels.union(valid_labels))

# Create label_map dictionary
label_map_dict = {i + 1: name for i, name in enumerate(all_labels)}

# Print labels to console
print("Labels used in your annotations:")
for idx, name in label_map_dict.items():
    print(f"{idx}: {name}")

# Save to label_map.pbtxt
def save_label_map(label_map, filepath):
    with open(filepath, 'w') as f:
        for idx, name in label_map.items():
            f.write("item {\n")
            f.write(f"  id: {idx}\n")
            f.write(f"  name: '{name}'\n")
            f.write("}\n\n")

save_label_map(label_map_dict, output_label_map_path)

print(f"\nLabel map saved to: {output_label_map_path}")

Labels used in your annotations:
1: Tom Tom
2: bench
3: bin
4: bus
5: car
6: chair
7: cng
8: dog
9: door
10: glass partition
11: motorcycle
12: person
13: pillar
14: railing
15: rickshaw
16: shelf
17: stair
18: table
19: tempu
20: tom tom
21: tree
22: truck
23: umbrella
24: van

Label map saved to: C:\Users\ACER\Jupyter_Notebook_Workplace\Object-detection-dataset\label_map.pbtxt


### Data Augmentation

In [8]:
def augment_image(image, bbox):
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_brightness(image, max_delta=0.1)
    image = tf.image.random_contrast(image, 0.9, 1.1)
    return image, bbox

### Apply and Save Augmented Images

In [9]:
aug_dir = os.path.join(base_dir, 'train_augmented')
os.makedirs(aug_dir, exist_ok=True)

for file in os.listdir(train_dir):
    if file.endswith('.jpg') or file.endswith('.png'):
        base_name = os.path.splitext(file)[0]
        img_path = os.path.join(train_dir, file)
        xml_path = os.path.join(train_dir, base_name + '.xml')

        if not os.path.exists(xml_path):
            continue  # Skip if annotation missing

        # Load and augment image
        image = tf.io.read_file(img_path)
        image = tf.image.decode_jpeg(image, channels = 3)
        image = tf.image.resize(image, (448, 448))
        image = tf.cast(image, tf.uint8)

        aug_image, _ = augment_image(image, None)
        aug_image = tf.image.encode_jpeg(tf.cast(aug_image, tf.uint8))

        # Save augmented image
        aug_img_name = base_name + '_aug.jpg'
        aug_img_path = os.path.join(aug_dir, aug_img_name)
        tf.io.write_file(aug_img_path, aug_image)

        # Copy original XML with new name
        aug_xml_name = base_name + '_aug.xml'
        shutil.copy(xml_path, os.path.join(aug_dir, aug_xml_name))

### Merge Original and Augmented Training Data

In [10]:
train_combined_dir = os.path.join(base_dir, 'train_combined')
os.makedirs(train_combined_dir, exist_ok = True)

for src_dir in [train_dir, aug_dir]:
    for f in os.listdir(src_dir):
        shutil.copy(os.path.join(src_dir, f), os.path.join(train_combined_dir, f))