# Training Script

In [56]:
import os, object_detection
import tensorflow as tf
from object_detection.utils import config_util
from object_detection.protos import pipeline_pb2
from google.protobuf import text_format

## Set up file structure and download chosen base model from Model Zoo

In [57]:
modelName = "640x640_model"
pretrainedModel = "ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8"
pretrainedModelURL = "http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz"

# Script sourced from Tensorflow: https://tensorflow-object-detection-api-tutorial.readthedocs.io/en/tensorflow-1.14/training.html
tfRecordScript = "generate_tfrecord.py"
labelMap = "label_map.pbtxt"

In [58]:
filePaths = {
    "WORKSPACE": os.path.join("TF", "workspace"),
    "SCRIPTS": os.path.join("TF", "scripts"),
    "APIMODEL": os.path.join("TF", "models"),
    "ANNOTATIONS": os.path.join("TF", "workspace", "annotations"),
    "IMAGES": os.path.join("TF", "workspace", "images"),
    "PRETRAINED_MODELS": os.path.join("TF", "workspace", "pretrained_models"),
    'MODELS': os.path.join('TF', 'workspace','models'),
    'CHECKPOINTS': os.path.join('TF', 'workspace','models', modelName), 
}

In [66]:
files = {
    'PIPELINE_CONFIG':os.path.join('TF', 'workspace','models', modelName, 'pipeline.config'),
    'TF_RECORD_SCRIPT': os.path.join(filePaths['SCRIPTS'], tfRecordScript),
    'LABELMAP': os.path.join(filePaths['ANNOTATIONS'], labelMap)
}

In [67]:
for path in filePaths.values():
    !mkdir -p {path}

In [68]:
if not os.path.exists(os.path.join(filePaths['APIMODEL'], 'research', 'object_detection')):
    !git clone https://github.com/tensorflow/models {filePaths['APIMODEL']}

In [69]:
!wget {pretrainedModelURL}
!mv {pretrainedModel+'.tar.gz'} {filePaths['PRETRAINED_MODELS']}
!cd {filePaths['PRETRAINED_MODELS']} && tar -zxvf {pretrainedModel+'.tar.gz'}
!cp {os.path.join(filePaths['PRETRAINED_MODELS'], pretrainedModel, 'pipeline.config')} {os.path.join(filePaths['CHECKPOINTS'])}

--2023-03-09 13:35:25--  http://download.tensorflow.org/models/object_detection/tf2/20200711/ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz
Resolving download.tensorflow.org (download.tensorflow.org)... 172.217.169.80, 2a00:1450:4009:815::2010
Connecting to download.tensorflow.org (download.tensorflow.org)|172.217.169.80|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 20518283 (20M) [application/x-tar]
Saving to: ‘ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz’


2023-03-09 13:35:25 (32.4 MB/s) - ‘ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8.tar.gz’ saved [20518283/20518283]

ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/
ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/
ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/ckpt-0.data-00000-of-00001
ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/checkpoint
ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/checkpoint/ckpt-0.index
ssd_mobilenet_v2_fpnlite_640x640_coco17_tpu-8/pipeli

# Create label mapping

In [70]:
objectLabels = [{'name':'resistor', 'id':1},
                {'name':'electrolytic_capacitor', 'id':2},
                {'name':'LED', 'id':3},
                {'name':'ceramic_capacitor', 'id':4},
                {'name':'IC', 'id':5}]

with open(files['LABELMAP'], 'w') as f:
    for label in objectLabels:
        f.write('item { \n')
        f.write('\tname:\'{}\'\n'.format(label['name']))
        f.write('\tid:{}\n'.format(label['id']))
        f.write('}\n')

## Convert XML to TF record

In [71]:
!python {files['TF_RECORD_SCRIPT']} -x {os.path.join(filePaths['IMAGES'], 'train')} -l {files['LABELMAP']} -o {os.path.join(filePaths['ANNOTATIONS'], 'train.record')} 
!python {files['TF_RECORD_SCRIPT']} -x {os.path.join(filePaths['IMAGES'], 'test')} -l {files['LABELMAP']} -o {os.path.join(filePaths['ANNOTATIONS'], 'test.record')} 

Successfully created the TFRecord file: TF/workspace/annotations/train.record
Successfully created the TFRecord file: TF/workspace/annotations/test.record


## Set up model config file

In [72]:
pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
with tf.io.gfile.GFile(files['PIPELINE_CONFIG'], "r") as f:                                                                                                                                                                                                                     
    proto_str = f.read()                                                                                                                                                                                                                                          
    text_format.Merge(proto_str, pipeline_config)
    
pipeline_config.model.ssd.num_classes = len(objectLabels)
pipeline_config.train_config.batch_size = 4
pipeline_config.train_config.fine_tune_checkpoint = os.path.join(filePaths['PRETRAINED_MODELS'], pretrainedModel, 'checkpoint', 'ckpt-0')
pipeline_config.train_config.fine_tune_checkpoint_type = "detection"
pipeline_config.train_input_reader.label_map_path= files['LABELMAP']
pipeline_config.train_input_reader.tf_record_input_reader.input_path[:] = [os.path.join(filePaths['ANNOTATIONS'], 'train.record')]
pipeline_config.eval_input_reader[0].label_map_path = files['LABELMAP']
pipeline_config.eval_input_reader[0].tf_record_input_reader.input_path[:] = [os.path.join(filePaths['ANNOTATIONS'], 'test.record')]

config_text = text_format.MessageToString(pipeline_config)                                                                                                                                                                                                        
with tf.io.gfile.GFile(files['PIPELINE_CONFIG'], "wb") as f:                                                                                                                                                                                                                     
    f.write(config_text) 


# Training

In [73]:
import time

trainerScript = os.path.join(filePaths['APIMODEL'], 'research', 'object_detection', 'model_main_tf2.py')

trainCommand = "python {} --model_dir={} --pipeline_config_path={} --num_train_steps=10000".format(trainerScript, filePaths['CHECKPOINTS'],files['PIPELINE_CONFIG'])

# Trained using an Nvidia RTX 3060 GPU (12GB VRAM) for acceleration.
start = time.time()
!{trainCommand}
end = time.time()
elapsed = end - start
elapsedMins = elapsed / 60
print("Training time: {} seconds! ({} minutes)".format(round(elapsed, 2), round(elapsedMins, 0)))

2023-03-09 13:35:39.974640: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-09 13:35:41.494285: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-09 13:35:41.495896: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-09 13:35:41.496001: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least on

Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.
W0309 13:35:46.585071 139666653136704 deprecation.py:350] From /home/ben/.conda/envs/tf/lib/python3.10/site-packages/tensorflow/python/util/dispatch.py:1176: sample_distorted_bounding_box (from tensorflow.python.ops.image_ops_impl) is deprecated and will be removed in a future version.
Instructions for updating:
`seed2` arg is deprecated.Use sample_distorted_bounding_box_v2 instead.
Instructions for updating:
Use `tf.cast` instead.
W0309 13:35:47.360509 139666653136704 deprecation.py:350] From /home/ben/.conda/envs/tf/lib/python3.10/site-packages/tensorflow/python/util/dispatch.py:1176: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
2023-03-09 13:35:59.549906: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8800
2023-03-09 13:36:00.223219: I tensorf

INFO:tensorflow:Step 600 per-step time 0.141s
I0309 13:37:46.759302 139666653136704 model_lib_v2.py:705] Step 600 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.24850117,
 'Loss/localization_loss': 0.21593525,
 'Loss/regularization_loss': 0.15055695,
 'Loss/total_loss': 0.61499333,
 'learning_rate': 0.0586664}
I0309 13:37:46.759494 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.24850117,
 'Loss/localization_loss': 0.21593525,
 'Loss/regularization_loss': 0.15055695,
 'Loss/total_loss': 0.61499333,
 'learning_rate': 0.0586664}
INFO:tensorflow:Step 700 per-step time 0.141s
I0309 13:38:00.904025 139666653136704 model_lib_v2.py:705] Step 700 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.09859046,
 'Loss/localization_loss': 0.06890924,
 'Loss/regularization_loss': 0.15013768,
 'Loss/total_loss': 0.31763738,
 'learning_rate': 0.0639998}
I0309 13:38:00.904266 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.098

INFO:tensorflow:Step 2100 per-step time 0.146s
I0309 13:41:19.857867 139666653136704 model_lib_v2.py:705] Step 2100 per-step time 0.146s
INFO:tensorflow:{'Loss/classification_loss': 0.06004992,
 'Loss/localization_loss': 0.031413525,
 'Loss/regularization_loss': 0.14075793,
 'Loss/total_loss': 0.23222138,
 'learning_rate': 0.07990056}
I0309 13:41:19.858016 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.06004992,
 'Loss/localization_loss': 0.031413525,
 'Loss/regularization_loss': 0.14075793,
 'Loss/total_loss': 0.23222138,
 'learning_rate': 0.07990056}
INFO:tensorflow:Step 2200 per-step time 0.141s
I0309 13:41:33.995940 139666653136704 model_lib_v2.py:705] Step 2200 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.063009724,
 'Loss/localization_loss': 0.03139003,
 'Loss/regularization_loss': 0.13999099,
 'Loss/total_loss': 0.23439074,
 'learning_rate': 0.07988167}
I0309 13:41:33.996182 139666653136704 model_lib_v2.py:708] {'Loss/classification_lo

INFO:tensorflow:Step 3600 per-step time 0.141s
I0309 13:44:52.016838 139666653136704 model_lib_v2.py:705] Step 3600 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.048478693,
 'Loss/localization_loss': 0.015829762,
 'Loss/regularization_loss': 0.12996386,
 'Loss/total_loss': 0.19427231,
 'learning_rate': 0.079445526}
I0309 13:44:52.017084 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.048478693,
 'Loss/localization_loss': 0.015829762,
 'Loss/regularization_loss': 0.12996386,
 'Loss/total_loss': 0.19427231,
 'learning_rate': 0.079445526}
INFO:tensorflow:Step 3700 per-step time 0.141s
I0309 13:45:06.131914 139666653136704 model_lib_v2.py:705] Step 3700 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.07125929,
 'Loss/localization_loss': 0.035937462,
 'Loss/regularization_loss': 0.129253,
 'Loss/total_loss': 0.23644975,
 'learning_rate': 0.07940216}
I0309 13:45:06.132153 139666653136704 model_lib_v2.py:708] {'Loss/classification_

INFO:tensorflow:Step 5100 per-step time 0.144s
I0309 13:48:24.618540 139666653136704 model_lib_v2.py:705] Step 5100 per-step time 0.144s
INFO:tensorflow:{'Loss/classification_loss': 0.050454568,
 'Loss/localization_loss': 0.012826384,
 'Loss/regularization_loss': 0.119827196,
 'Loss/total_loss': 0.18310815,
 'learning_rate': 0.07862595}
I0309 13:48:24.618765 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.050454568,
 'Loss/localization_loss': 0.012826384,
 'Loss/regularization_loss': 0.119827196,
 'Loss/total_loss': 0.18310815,
 'learning_rate': 0.07862595}
INFO:tensorflow:Step 5200 per-step time 0.139s
I0309 13:48:38.563651 139666653136704 model_lib_v2.py:705] Step 5200 per-step time 0.139s
INFO:tensorflow:{'Loss/classification_loss': 0.048859842,
 'Loss/localization_loss': 0.013676367,
 'Loss/regularization_loss': 0.11920569,
 'Loss/total_loss': 0.1817419,
 'learning_rate': 0.07855851}
I0309 13:48:38.563891 139666653136704 model_lib_v2.py:708] {'Loss/classificatio

INFO:tensorflow:Step 6600 per-step time 0.141s
I0309 13:51:56.390858 139666653136704 model_lib_v2.py:705] Step 6600 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.044282496,
 'Loss/localization_loss': 0.016768891,
 'Loss/regularization_loss': 0.11057805,
 'Loss/total_loss': 0.17162944,
 'learning_rate': 0.077449396}
I0309 13:51:56.391065 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.044282496,
 'Loss/localization_loss': 0.016768891,
 'Loss/regularization_loss': 0.11057805,
 'Loss/total_loss': 0.17162944,
 'learning_rate': 0.077449396}
INFO:tensorflow:Step 6700 per-step time 0.141s
I0309 13:52:10.508697 139666653136704 model_lib_v2.py:705] Step 6700 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.055280495,
 'Loss/localization_loss': 0.015116592,
 'Loss/regularization_loss': 0.11006581,
 'Loss/total_loss': 0.1804629,
 'learning_rate': 0.077358514}
I0309 13:52:10.508894 139666653136704 model_lib_v2.py:708] {'Loss/classificati

INFO:tensorflow:Step 8100 per-step time 0.145s
I0309 13:55:28.988266 139666653136704 model_lib_v2.py:705] Step 8100 per-step time 0.145s
INFO:tensorflow:{'Loss/classification_loss': 0.0409474,
 'Loss/localization_loss': 0.009080457,
 'Loss/regularization_loss': 0.102302745,
 'Loss/total_loss': 0.1523306,
 'learning_rate': 0.07592674}
I0309 13:55:28.988422 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.0409474,
 'Loss/localization_loss': 0.009080457,
 'Loss/regularization_loss': 0.102302745,
 'Loss/total_loss': 0.1523306,
 'learning_rate': 0.07592674}
INFO:tensorflow:Step 8200 per-step time 0.141s
I0309 13:55:43.107630 139666653136704 model_lib_v2.py:705] Step 8200 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.045611415,
 'Loss/localization_loss': 0.018685361,
 'Loss/regularization_loss': 0.10178531,
 'Loss/total_loss': 0.16608208,
 'learning_rate': 0.075813256}
I0309 13:55:43.107823 139666653136704 model_lib_v2.py:708] {'Loss/classification_lo

INFO:tensorflow:Step 9600 per-step time 0.141s
I0309 13:59:01.389113 139666653136704 model_lib_v2.py:705] Step 9600 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.03922488,
 'Loss/localization_loss': 0.009085844,
 'Loss/regularization_loss': 0.09507908,
 'Loss/total_loss': 0.1433898,
 'learning_rate': 0.07407206}
I0309 13:59:01.389348 139666653136704 model_lib_v2.py:708] {'Loss/classification_loss': 0.03922488,
 'Loss/localization_loss': 0.009085844,
 'Loss/regularization_loss': 0.09507908,
 'Loss/total_loss': 0.1433898,
 'learning_rate': 0.07407206}
INFO:tensorflow:Step 9700 per-step time 0.141s
I0309 13:59:15.524654 139666653136704 model_lib_v2.py:705] Step 9700 per-step time 0.141s
INFO:tensorflow:{'Loss/classification_loss': 0.042265058,
 'Loss/localization_loss': 0.011493522,
 'Loss/regularization_loss': 0.09459212,
 'Loss/total_loss': 0.1483507,
 'learning_rate': 0.073937014}
I0309 13:59:15.524847 139666653136704 model_lib_v2.py:708] {'Loss/classification_los

# Evaluation

In [74]:
evalCommand = "python {} --model_dir={} --pipeline_config_path={} --checkpoint_dir={}".format(trainerScript, filePaths['CHECKPOINTS'],files['PIPELINE_CONFIG'], filePaths['CHECKPOINTS'])

!{evalCommand}

2023-03-09 14:55:43.995372: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-03-09 14:55:45.546470: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-09 14:55:45.547999: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2023-03-09 14:55:45.548103: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:981] successful NUMA node read from SysFS had negative value (-1), but there must be at least on

Instructions for updating:
Use `tf.cast` instead.
W0309 14:55:48.313072 139832377866048 deprecation.py:350] From /home/ben/.conda/envs/tf/lib/python3.10/site-packages/tensorflow/python/util/dispatch.py:1176: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.cast` instead.
INFO:tensorflow:Waiting for new checkpoint at TF/workspace/models/640x640_model
I0309 14:55:49.491962 139832377866048 checkpoint_utils.py:140] Waiting for new checkpoint at TF/workspace/models/640x640_model
INFO:tensorflow:Found new checkpoint at TF/workspace/models/640x640_model/ckpt-11
I0309 14:55:49.492387 139832377866048 checkpoint_utils.py:149] Found new checkpoint at TF/workspace/models/640x640_model/ckpt-11
2023-03-09 14:56:00.295915: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:428] Loaded cuDNN version 8800
2023-03-09 14:56:00.301248: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:630] TensorFloat

^C
Traceback (most recent call last):
  File "/home/ben/Documents/Computer_Science/Hope/Core2/CircuitDetector/TF/models/research/object_detection/model_main_tf2.py", line 114, in <module>
    tf.compat.v1.app.run()
  File "/home/ben/.conda/envs/tf/lib/python3.10/site-packages/tensorflow/python/platform/app.py", line 36, in run
    _run(main=main, argv=argv, flags_parser=_parse_flags_tolerate_undef)
  File "/home/ben/.conda/envs/tf/lib/python3.10/site-packages/absl/app.py", line 308, in run
    _run_main(main, args)
  File "/home/ben/.conda/envs/tf/lib/python3.10/site-packages/absl/app.py", line 254, in _run_main
    sys.exit(main(argv))
  File "/home/ben/Documents/Computer_Science/Hope/Core2/CircuitDetector/TF/models/research/object_detection/model_main_tf2.py", line 81, in main
    model_lib_v2.eval_continuously(
  File "/home/ben/.conda/envs/tf/lib/python3.10/site-packages/object_detection/model_lib_v2.py", line 1135, in eval_continuously
    for latest_checkpoint in tf.