Resources Used
- wget.download('https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/_downloads/da4babe668a8afb093cc7776d7e630f3/generate_tfrecord.py')
- Setup https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html

# 0. Setup Paths

In [1]:
WORKSPACE_PATH = 'Tensorflow/workspace'
SCRIPTS_PATH = 'Tensorflow/scripts'
APIMODEL_PATH = 'Tensorflow/models'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'

In [35]:
labels = [{'name':'A', 'id':1}, {'name':'B', 'id':2}, {'name':'C', 'id':3}, {'name':'D', 'id':4}, {'name':'E', 'id':5}
          , {'name':'F', 'id':6}, {'name':'G', 'id':7}, {'name':'H', 'id':8}, {'name':'I', 'id':9}, {'name':'J', 'id':10}
          , {'name':'K', 'id':11}, {'name':'L', 'id':12}, {'name':'M', 'id':13}, {'name':'N', 'id':14}, {'name':'O', 'id':15}
          , {'name':'P', 'id':16}, {'name':'Q', 'id':17}, {'name':'R', 'id':18}, {'name':'S', 'id':19}, {'name':'T', 'id':20}
          , {'name':'U', 'id':21}, {'name':'V', 'id':22}, {'name':'W', 'id':23}, {'name':'X', 'id':24}, {'name':'Y', 'id':25}
          , {'name':'Z', 'id':26}, {'name':'yes', 'id':27}, {'name':'no', 'id':28}, {'name':'thanks', 'id':29}, {'name':'iloveyou', 'id':30}
          , {'name':'hello', 'id':31}, {'name':'1', 'id':32}, {'name':'2', 'id':33}, {'name':'3', 'id':34}, {'name':'4', 'id':35}
          , {'name':'5', 'id':36}, {'name':'6', 'id':37}, {'name':'7', 'id':38}, {'name':'8', 'id':39}, {'name':'9', 'id':40}]
with open(ANNOTATION_PATH + '\label_map.pbtxt', 'w') as f:
    for label in labels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

# 2. Create TF records

In [3]:
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/train'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/train.record'}
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x{IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}

Successfully created the TFRecord file: Tensorflow/workspace/annotations/train.record
Successfully created the TFRecord file: Tensorflow/workspace/annotations/test.record


# 3. Download TF Models Pretrained Models from Tensorflow Model Zoo

# 4. Copy Model Config to Training Folder

In [4]:
CUSTOM_MODEL_NAME = 'my_ssd_mobnet' 

In [5]:
# !mkdir {'Tensorflow/workspace/models/'+CUSTOM_MODEL_NAME}
# !copy {PRETRAINED_MODEL_PATH+"/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config"} {MODEL_PATH+"/"+CUSTOM_MODEL_NAME}

# 5. Update Config For Transfer Learning

In [6]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [7]:
CONFIG_PATH = MODEL_PATH+'/'+CUSTOM_MODEL_NAME+'/pipeline.config'

In [8]:
config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)

In [9]:
config

{'model': ssd {
   num_classes: 40
   image_resizer {
     fixed_shape_resizer {
       height: 320
       width: 320
     }
   }
   feature_extractor {
     type: "ssd_mobilenet_v2_fpn_keras"
     depth_multiplier: 1.0
     min_depth: 16
     conv_hyperparams {
       regularizer {
         l2_regularizer {
           weight: 3.9999998989515007e-05
         }
       }
       initializer {
         random_normal_initializer {
           mean: 0.0
           stddev: 0.009999999776482582
         }
       }
       activation: RELU_6
       batch_norm {
         decay: 0.996999979019165
         scale: true
         epsilon: 0.0010000000474974513
       }
     }
     use_depthwise: true
     override_base_feature_extractor_hyperparams: true
     fpn {
       min_level: 3
       max_level: 7
       additional_layer_depth: 128
     }
   }
   box_coder {
     faster_rcnn_box_coder {
       y_scale: 10.0
       x_scale: 10.0
       height_scale: 5.0
       width_scale: 5.0
     }
   }
   matc

In [10]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(CONFIG_PATH, "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  

In [11]:
pipeline_config.model.ssd.num_classes = 40
pipeline_config.train_config.batch_size = 5
pipeline_config.train_config.fine_tune_checkpoint = PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/train.record']
pipeline_config.eval_input_reader[0].label_map_path = ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/test.record']

In [12]:
config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(CONFIG_PATH, "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   

# 6. Train The Model

In [13]:
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=10000""".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME))

python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --num_train_steps=10000


In [14]:
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --checkpoint_dir={}/{}
""".format(APIMODEL_PATH, MODEL_PATH, CUSTOM_MODEL_NAME, MODEL_PATH, CUSTOM_MODEL_NAME, MODEL_PATH, CUSTOM_MODEL_NAME))

python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --checkpoint_dir=Tensorflow/workspace/models/my_ssd_mobnet



# 6. Evaluate the model

In [34]:
def parse_tfrecord_fn(example_proto):
    feature_description = {
        'image/encoded': tf.io.FixedLenFeature([], tf.string),
        'image/height': tf.io.FixedLenFeature([], tf.int64),
        'image/width': tf.io.FixedLenFeature([], tf.int64),
        'image/filename': tf.io.FixedLenFeature([], tf.string),
        'image/source_id': tf.io.FixedLenFeature([], tf.string),
        'image/objects/bbox/xmin': tf.io.VarLenFeature(tf.float32),
        'image/objects/bbox/ymin': tf.io.VarLenFeature(tf.float32),
        'image/objects/bbox/xmax': tf.io.VarLenFeature(tf.float32),
        'image/objects/bbox/ymax': tf.io.VarLenFeature(tf.float32),
        'image/objects/label': tf.io.VarLenFeature(tf.int64),
    }

    # Parse the example
    example = tf.io.parse_single_example(example_proto, feature_description)

    # Decode the image
    image = tf.image.decode_jpeg(example['image/encoded'], channels=3)

    # Normalize the image
    image = tf.image.convert_image_dtype(image, tf.float32)

    # Get the bounding boxes and labels
    boxes = tf.stack([
        tf.sparse.to_dense(example['image/objects/bbox/xmin']),
        tf.sparse.to_dense(example['image/objects/bbox/ymin']),
        tf.sparse.to_dense(example['image/objects/bbox/xmax']),
        tf.sparse.to_dense(example['image/objects/bbox/ymax']),
    ], axis=-1)
    classes = tf.sparse.to_dense(example['image/objects/label'])

    return image, classes, boxes

def get_id_from_name(name):
    for label in labels:
        if label['name'] == name:
            return label['id']
    return None

In [None]:
import os
import xml.etree.ElementTree as ET
import tensorflow as tf
import numpy as np

# Define the paths and parameters
TEST_PATH = IMAGE_PATH + '/test'
NUM_CLASSES = 40
tf.config.run_functions_eagerly(run_eagerly=True)
# Define a function to parse the XML files
def parse_xml_file(xml_file):
    tree = ET.parse(xml_file)
    root = tree.getroot()

    # Get the image filename
    filename = root.find('filename').text

    # Get the image size
    size = root.find('size')
    width = int(size.find('width').text)
    height = int(size.find('height').text)

    # Get the ground truth boxes
    boxes = []
    classes = []
    for obj in root.findall('object'):
        class_name = obj.find('name').text
        class_id = int(get_id_from_name(str(class_name)))
        xmin = int(obj.find('bndbox/xmin').text)
        ymin = int(obj.find('bndbox/ymin').text)
        xmax = int(obj.find('bndbox/xmax').text)
        ymax = int(obj.find('bndbox/ymax').text)
        boxes.append([ymin/height, xmin/width, ymax/height, xmax/width])  # normalized coordinates
        classes.append(class_id)

    # Load the image
    image = tf.io.read_file(os.path.join(TEST_PATH, filename))
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [height, width])  # resize to original size
    image = tf.cast(image, tf.float32) / 255.0

    # Convert to tensors
    image = tf.expand_dims(image, axis=0)  # add batch dimension
    groundtruth_classes = tf.one_hot(classes, depth=NUM_CLASSES)
    groundtruth_boxes = tf.constant(boxes, dtype=tf.float32)

    return image, groundtruth_classes, groundtruth_boxes

# Load the validation data
xml_files = os.listdir(TEST_PATH)
validation_dataset = []
for xml in xml_files:
    if ".xml" in xml:
        img, gt, gtb = parse_xml_file(os.path.join(TEST_PATH, xml))
        res = []
        res.append(img)
        res.append(gt)
        res.append(gtb)
        validation_dataset.append(res)
    
# Define the model and restore checkpoint
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
model_config = configs['model']
model_config.ssd.num_classes = NUM_CLASSES
detection_model = model_builder.build(model_config=model_config, is_training=False)
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'all-10000-5/ckpt-11')).expect_partial()

# Define the evaluation function
@tf.function
def evaluate(image, groundtruth_classes, groundtruth_boxes):
    # Make predictions
    detections = detection_model(image, training=False)

    # Compute metrics
    APs = []
    for i in range(NUM_CLASSES):
        # Compute AP for each class
        ap = tf.keras.metrics.AUC(
            num_thresholds=100,
            curve='PR',
        )
        ap.update_state(
            groundtruth_classes[:, i],
            detections['detection_scores'][:, i],
        )
        APs.append(ap.result().numpy())

    # Compute mAP
    mAP = sum(APs) / len(APs)

    # Return metrics
    return mAP

# Iterate over the validation dataset and compute metrics
mAP_total = 0
mAP_count = 0
for elem in validation_dataset:
    mAP = evaluate(elem[0], elem[1], elem[2])
    mAP_total += mAP
    mAP_count += 1
    tf.print('mAP:', mAP)

# Compute average metrics
mAP_avg = mAP_total / mAP_count
tf.print('Average mAP:', mAP_avg)

mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.0
mAP: 0.0
mAP: 0.0
mAP: 0.0
mAP: 0.0
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025
mAP: 0.025


# 7. Load Train Model From Checkpoint

In [14]:
import os
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

In [16]:
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-11')).expect_partial()

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

# 8. Detect in Real-Time

In [17]:
import cv2 
import numpy as np

In [18]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

In [19]:
# Setup capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [22]:
while True: 
    ret, frame = cap.read()
    image_np = np.array(frame)
    
    input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
    detections = detect_fn(input_tensor)
    
    num_detections = int(detections.pop('num_detections'))
    detections = {key: value[0, :num_detections].numpy()
                  for key, value in detections.items()}
    detections['num_detections'] = num_detections

    # detection_classes should be ints.
    detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

    label_id_offset = 1
    image_np_with_detections = image_np.copy()

    viz_utils.visualize_boxes_and_labels_on_image_array(
                image_np_with_detections,
                detections['detection_boxes'],
                detections['detection_classes']+label_id_offset,
                detections['detection_scores'],
                category_index,
                use_normalized_coordinates=True,
                max_boxes_to_draw=3,
                min_score_thresh=.1,
                agnostic_mode=False)

    cv2.imshow('object detection',  cv2.resize(image_np_with_detections, (800, 600)))
    
    if cv2.waitKey(1) & 0xFF == ord('q'):
        cap.release()
        break

KeyboardInterrupt: 

In [12]:
import os
import shutil

labels = ["1", "2", "3", "4", "5", "6", "7", "8", "9"
          , "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "S", "T", "U", "V", "W", "X"
          , "Y", "Z", "Hello", "No", "ThankYou", "Yes", "ILoveYou"]

dst_path = "C:\\Users\\Aya\\Desktop\\GP\\Handwave\\Tensorflow\\workspace\\images\\train-aya"

def main(path):
    for filename in os.listdir(path):
        new_filename = "Aya_" + filename
        my_source =path + "\\" + filename
        my_dest =path + "\\" + new_filename
        os.rename(my_source, my_dest)

In [13]:
for l in labels:
    src_path = "F:\\Data\\train" + "\\" + l
    for f in os.listdir(src_path):
        src = src_path + "\\" + f
        dst = dst_path + "\\" + f
        shutil.copy(src, dst)

In [6]:
labels = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "S", "T", "U", "V", "X"
          , "Y", "Z", "Hello", "No", "ThankYou", "Yes", "ILoveYou"]

import xml.etree.ElementTree as ET

for l in labels:
    src_path = "F:\\Data\\train" + "\\" + l
    for filename in os.listdir(src_path):
        if(".xml" in filename):
            src = src_path + "\\" + filename
            # Load the XML file
            tree = ET.parse(src)

            # Get the root element of the XML tree
            root = tree.getroot()

            # Find the element you want to modify by its tag name
            element_to_modify = root.find('filename')
            new_value = filename.replace(".xml", ".jpg")

            # Change the value of the element
            element_to_modify.text = new_value

            # Write the modified XML back to the file
            tree.write(src)