Resources Used
- wget.download('https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/_downloads/da4babe668a8afb093cc7776d7e630f3/generate_tfrecord.py')
- Setup https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/latest/install.html

# 0. LOAD virtual environment

In [None]:
# terminal command - isntall requirements txt
pip install -r requirements.txt

# 1. SETUP paths

In [21]:
WORKSPACE_PATH = 'Tensorflow/workspace'
SCRIPTS_PATH = 'Tensorflow/scripts'
APIMODEL_PATH = 'Tensorflow/models'
ANNOTATION_PATH = WORKSPACE_PATH+'/annotations'
IMAGE_PATH = WORKSPACE_PATH+'/images'
MODEL_PATH = WORKSPACE_PATH+'/models'
PRETRAINED_MODEL_PATH = WORKSPACE_PATH+'/pre-trained-models'
CONFIG_PATH = MODEL_PATH+'/my_ssd_mobnet/pipeline.config'
CHECKPOINT_PATH = MODEL_PATH+'/my_ssd_mobnet/'

# 2. CREATE label maps

In [24]:
# create the dictionary entries for all glosses the model shall be trained on - MAKE SURE the naming exactly matches labelimg - ad add sequential IDs
labels = [
    {'name':'hello', 'id':1}, 
    {'name':'yes', 'id':2}, 
    {'name':'no', 'id':3}, 
    {'name':'thanks', 'id':4}, 
    {'name':'i love you', 'id':5}, 
    # {'name':'montag', 'id':6},    
    # {'name':'auch', 'id':7},  
    # {'name':'mehr', 'id':8},  
    # {'name':'wolke', 'id':9},  
    # {'name':'als', 'id':10},  
    # {'name':'sonne', 'id':11},  
    # {'name':'ueberwiegend', 'id':12},  
    # {'name':'regen', 'id':13},  
    # {'name':'gewitter', 'id':14},  
    ]

with open(ANNOTATION_PATH + '/label_map.pbtxt', 'w') as f:
    for label in labels:
        f.write('item {\n') # space removed
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

# 3. CREATE tfrecord file

In [3]:
# downloading pretrained tf models from tf model zoo
!cd Tensorflow && git clone https://github.com/tensorflow/models

fatal: destination path 'models' already exists and is not an empty directory.


In [4]:
# installing object detection module (see command box below, keeping comment & command in same code created issues)

In [25]:
%%bash
cd Tensorflow/models/research/
protoc object_detection/protos/*.proto --python_out=.
cp object_detection/packages/tf1/setup.py . # updated to tf1 
python -m pip install .

Processing /Users/maximilianscheel/neuefische/capstone_project/capstone_sl_txt_voice/sl_rtod/Tensorflow/models/research
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: object_detection
  Building wheel for object_detection (setup.py): started
  Building wheel for object_detection (setup.py): finished with status 'done'
  Created wheel for object_detection: filename=object_detection-0.1-py3-none-any.whl size=1656739 sha256=deb39b2c43ad10514e6fd6b56ef02bfca8053d840cd247fee7c37f58447c86e7
  Stored in directory: /private/var/folders/y5/l0vddhks2fj4y_tkywmly1240000gn/T/pip-ephem-wheel-cache-1fajb1zk/wheels/ed/f4/6c/bcb9019e476a83fa88035201dd62b698e5d335557fa45a84a6
Successfully built object_detection
Installing collected packages: object_detection
  Attempting uninstall: object_detection
    Found existing installation: object_detection 0.1
    Uninstalling object_detection-0.1:
      Successfully 


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [26]:
# update line of code in .venv/lib/python3.10/site-packages/official/vision/image_classification/augment.py

"""
replace
from tensorflow.python.keras.layers.preprocessing import image_preprocessing as image_ops 
with
from tensorflow.keras.preprocessing import image as image_ops
"""


import os

# Define the path to the Python file
file_path = ".venv/lib/python3.10/site-packages/official/vision/image_classification/augment.py"

# Define the lines to be replaced
old_line = "from tensorflow.python.keras.layers.preprocessing import image_preprocessing as image_ops"
new_line = "from tensorflow.keras.preprocessing import image as image_ops"

# Read the file and copy the old line as a comment, then add the new line
with open(file_path, 'r') as file:
    lines = file.readlines()

with open(file_path, 'w') as file:
    for line in lines:
        if old_line in line:
            file.write("# " + line)  # Comment out the old line
            file.write(new_line + '\n')  # Add the new line
        else:
            file.write(line)

print(f"Updated {file_path} with new import statement and commented out the old one.")

Updated .venv/lib/python3.10/site-packages/official/vision/image_classification/augment.py with new import statement and commented out the old one.


In [27]:
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x {IMAGE_PATH + '/train'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/train.record'}
!python {SCRIPTS_PATH + '/generate_tfrecord.py'} -x{IMAGE_PATH + '/test'} -l {ANNOTATION_PATH + '/label_map.pbtxt'} -o {ANNOTATION_PATH + '/test.record'}

Successfully created the TFRecord file: Tensorflow/workspace/annotations/train.record
Successfully created the TFRecord file: Tensorflow/workspace/annotations/test.record


# 4. COPY model config to training folder

In [28]:
CUSTOM_MODEL_NAME = 'my_ssd_mobnet' # model rerun with augmented training data - folder Tensorflow/models/my_ssd_mobnet needs to be deleted otherwise training model script will throw an error

In [29]:
!mkdir -p {'Tensorflow/workspace/models/' + CUSTOM_MODEL_NAME}
!cp {PRETRAINED_MODEL_PATH + '/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/pipeline.config'} {MODEL_PATH + '/' + CUSTOM_MODEL_NAME}

# 5. UPDATE config for transfer learning

In [30]:
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

In [31]:
CONFIG_PATH = MODEL_PATH+'/'+CUSTOM_MODEL_NAME+'/pipeline.config'

In [32]:
config = config_util.get_configs_from_pipeline_file(CONFIG_PATH)

In [33]:
config

{'model': ssd {
   num_classes: 90
   image_resizer {
     fixed_shape_resizer {
       height: 320
       width: 320
     }
   }
   feature_extractor {
     type: "ssd_mobilenet_v2_fpn_keras"
     depth_multiplier: 1.0
     min_depth: 16
     conv_hyperparams {
       regularizer {
         l2_regularizer {
           weight: 4e-05
         }
       }
       initializer {
         random_normal_initializer {
           mean: 0.0
           stddev: 0.01
         }
       }
       activation: RELU_6
       batch_norm {
         decay: 0.997
         scale: true
         epsilon: 0.001
       }
     }
     use_depthwise: true
     override_base_feature_extractor_hyperparams: true
     fpn {
       min_level: 3
       max_level: 7
       additional_layer_depth: 128
     }
   }
   box_coder {
     faster_rcnn_box_coder {
       y_scale: 10.0
       x_scale: 10.0
       height_scale: 5.0
       width_scale: 5.0
     }
   }
   matcher {
     argmax_matcher {
       matched_threshold: 0.5
   

In [34]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(CONFIG_PATH, "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)  

In [35]:
pipeline_config.model.ssd.num_classes = 5 # UPDATE to the number of classes your model shall be trained on
pipeline_config.train_config.batch_size = 4
pipeline_config.train_config.fine_tune_checkpoint = PRETRAINED_MODEL_PATH+'/ssd_mobilenet_v2_fpnlite_320x320_coco17_tpu-8/checkpoint/ckpt-0'
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/train.record']
pipeline_config.eval_input_reader[0].label_map_path = ANNOTATION_PATH + '/label_map.pbtxt'
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [ANNOTATION_PATH + '/test.record']

In [36]:
config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(CONFIG_PATH, "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text)   

# 6. TRAIN the model

In [37]:
# no. of steps set to 10000
print("""python {}/research/object_detection/model_main_tf2.py --model_dir={}/{} --pipeline_config_path={}/{}/pipeline.config --num_train_steps=10000""".format(APIMODEL_PATH, MODEL_PATH,CUSTOM_MODEL_NAME,MODEL_PATH,CUSTOM_MODEL_NAME))

python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --num_train_steps=10000


In [38]:
# copy paste python command to run the model 
!python Tensorflow/models/research/object_detection/model_main_tf2.py --model_dir=Tensorflow/workspace/models/my_ssd_mobnet --pipeline_config_path=Tensorflow/workspace/models/my_ssd_mobnet/pipeline.config --num_train_steps=10000



TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
I0203 18:52:56.382397 8350614080 mirrored_strategy.py:419] Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
INFO:tensorflow:Maybe overwriting train_steps: 10000
I0203 18:52:56.395046 8350614080 config_util.py:552] Maybe overwriting train_steps: 10000
INFO:tensorflow:Maybe overwriting use_bfloat16: False
I0203 18:52:56.395141 8350614080 config_util.py:552] Maybe overwriting use_bfloat16: False
Instructions for updating:
rename to distribute_datasets_from_

# 7. LOAD trained model from checkpoint

In [39]:
import os
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as viz_utils
from object_detection.builders import model_builder

In [40]:
# Load pipeline config and build a detection model
configs = config_util.get_configs_from_pipeline_file(CONFIG_PATH)
detection_model = model_builder.build(model_config=configs['model'], is_training=False)

# Restore checkpoint
ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)
# UPDATE ckpt-XX to latest checkpoint
ckpt.restore(os.path.join(CHECKPOINT_PATH, 'ckpt-11')).expect_partial() 

@tf.function
def detect_fn(image):
    image, shapes = detection_model.preprocess(image)
    prediction_dict = detection_model.predict(image, shapes)
    detections = detection_model.postprocess(prediction_dict, shapes)
    return detections

# 8 - RTOD pretrained sign language gloss detection

In [41]:
import cv2
import numpy as np
# added to ensure stream closes properly
import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils

In [42]:
category_index = label_map_util.create_category_index_from_labelmap(ANNOTATION_PATH+'/label_map.pbtxt')

In [43]:
# Setup capture
cap = cv2.VideoCapture(0)
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

In [50]:
# start webcam and log detected objects with confidence score

import cv2
import numpy as np
import tensorflow as tf
from object_detection.utils import visualization_utils as viz_utils
import time

# Define a function to log detected objects
def log_detected_objects(detections, category_index):
    log = []
    for i in range(detections['num_detections']):
        if detections['detection_scores'][i] >= 0.8:
            class_id = detections['detection_classes'][i]
            if class_id in category_index:
                class_name = category_index[class_id]['name']
                score = detections['detection_scores'][i]
                log.append((class_name, score))
    return log

# Initialize logging list
detected_objects_log = []
last_log_time = time.time()

# Initialize the camera
cap = cv2.VideoCapture(0)

if not cap.isOpened():
    print("Error: Could not open video source.")
else:
    try:
        while True: 
            ret, frame = cap.read()
            if not ret:
                print("Error: Failed to capture frame.")
                break

            image_np = np.array(frame)
            
            input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)
            detections = detect_fn(input_tensor)
            
            num_detections = int(detections.pop('num_detections'))
            detections = {key: value[0, :num_detections].numpy()
                          for key, value in detections.items()}
            detections['num_detections'] = num_detections

            # detection_classes should be ints.
            detections['detection_classes'] = detections['detection_classes'].astype(np.int64)

            label_id_offset = 1
            image_np_with_detections = image_np.copy()

            viz_utils.visualize_boxes_and_labels_on_image_array(
                        image_np_with_detections,
                        detections['detection_boxes'],
                        detections['detection_classes'] + label_id_offset,
                        detections['detection_scores'],
                        category_index,
                        use_normalized_coordinates=True,
                        max_boxes_to_draw=5,
                        min_score_thresh=.5,
                        agnostic_mode=False)

            cv2.imshow('object detection', cv2.resize(image_np_with_detections, (800, 600)))

            # Log detected objects with a score of over 80%, but only once every second
            current_time = time.time()
            if current_time - last_log_time >= 1:
                logged_objects = log_detected_objects(detections, category_index)
                detected_objects_log.extend(logged_objects)
                print(f"Logged objects: {logged_objects}")  # Print the logged objects with confidence scores
                last_log_time = current_time
            
            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

    finally:
        cap.release()
        cv2.destroyAllWindows()
        # Print the log of detected objects in list format
        detected_objects = [(entry[0], entry[1]) for entry in detected_objects_log]
        print(f"\ndetected_objects_log = {detected_objects}")


Logged objects: []
Logged objects: []
Logged objects: []
Logged objects: [('yes', 0.81171584)]
Logged objects: [('thanks', 0.96582884)]
Logged objects: [('thanks', 0.8597149)]
Logged objects: [('thanks', 0.9724091)]
Logged objects: []
Logged objects: []
Logged objects: []
Logged objects: [('yes', 0.88370776)]
Logged objects: []

detected_objects_log = [('yes', 0.81171584), ('thanks', 0.96582884), ('thanks', 0.8597149), ('thanks', 0.9724091), ('yes', 0.88370776)]


In [53]:
# remove duplicates/ objects appearing multiple times after each other
def remove_consecutive_duplicates(detected_objects):
    return [detected_objects[i] for i in range(len(detected_objects)) if i == 0 or detected_objects[i][0] != detected_objects[i-1][0]]

# Create cleaned_detected_objects_log
cleaned_detected_objects_log = remove_consecutive_duplicates(detected_objects_log)

# Print the cleaned objects
print(f"\ncleaned_detected_objects_log = {cleaned_detected_objects_log}")



cleaned_detected_objects_log = [('yes', 0.81171584), ('thanks', 0.96582884), ('yes', 0.88370776)]
