In [1]:
import shutil
import os
import sys
import re

# Define the home folder for your project
HOMEFOLDER = '/root/MultiAgent/FTCTraining/'  # Update this path if different

# Set environment variable
os.environ["HOMEFOLDER"] = HOMEFOLDER

# Define other important directories
MODELS_DIR = os.path.join(HOMEFOLDER, 'models')
RESEARCH_DIR = os.path.join(MODELS_DIR, 'research')
SCRIPTS_DIR = os.path.join(MODELS_DIR, 'research', 'object_detection', 'builders', 'model_builder_tf2_test.py')
FINALOUTPUTFOLDER_DIRNAME = 'final_output'
FINALOUTPUTFOLDER = os.path.join(HOMEFOLDER, FINALOUTPUTFOLDER_DIRNAME)
TRAINING_PROGRESS_DIR = os.path.join(HOMEFOLDER, 'training_progress')

# Print paths for verification
print(f"HOMEFOLDER set to: {HOMEFOLDER}")
print(f"Models Directory: {MODELS_DIR}")
print(f"Research Directory: {RESEARCH_DIR}")
print(f"Final Output Folder: {FINALOUTPUTFOLDER}")
print(f"Training Progress Directory: {TRAINING_PROGRESS_DIR}")


HOMEFOLDER set to: /root/MultiAgent/FTCTraining/
Models Directory: /root/MultiAgent/FTCTraining/models
Research Directory: /root/MultiAgent/FTCTraining/models/research
Final Output Folder: /root/MultiAgent/FTCTraining/final_output
Training Progress Directory: /root/MultiAgent/FTCTraining/training_progress


In [2]:
# Remove existing 'models' directory if it exists to avoid Git clone errors
if os.path.exists(MODELS_DIR):
    shutil.rmtree(MODELS_DIR)
    print(f"Removed existing directory: {MODELS_DIR}")
else:
    print(f"Directory does not exist: {MODELS_DIR}")

# Remove and recreate 'training_progress' and 'final_output' directories
for directory in [TRAINING_PROGRESS_DIR, FINALOUTPUTFOLDER]:
    if os.path.exists(directory):
        shutil.rmtree(directory)
        print(f"Removed existing directory: {directory}")
    os.makedirs(directory, exist_ok=True)
    print(f"Created directory: {directory}")


Removed existing directory: /root/MultiAgent/FTCTraining/models
Removed existing directory: /root/MultiAgent/FTCTraining/training_progress
Created directory: /root/MultiAgent/FTCTraining/training_progress
Removed existing directory: /root/MultiAgent/FTCTraining/final_output
Created directory: /root/MultiAgent/FTCTraining/final_output


In [3]:
# Install required Python packages
!pip install tf_slim
!pip install pillow
!pip install lvis
!pip install Cython
!pip install contextlib2
!pip install matplotlib

# Install protocol buffers compiler if not already installed
!sudo apt-get update
!sudo apt-get install -y protobuf-compiler


Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Hit:2 http://archive.ubuntu.com/ubuntu noble InRelease
Get:3 http://archive.ubuntu.com/ubuntu noble-updates InRelease [126 kB]
Hit:4 http://security.ubuntu.com/ubuntu noble-security InRelease               
Hit:5 http://archive.ubuntu.com/ubuntu noble-backports InRelease               
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64  InRelease
Fetched 126 kB in 1s (206 kB/s)
Reading package lists... Done
W: https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted.gpg), see the DEPRECATION section in apt-key(8) for details.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
protobuf-compiler is already the newest version (3.21.12-8.2build1).
0 upgra

In [4]:
# Clone the TensorFlow Models repository
!git clone --depth 1 https://github.com/tensorflow/models {MODELS_DIR}

# Navigate to the models directory
%cd {MODELS_DIR}

# Checkout the specific commit to ensure consistency
!git fetch --depth 1 origin ad1f7b56943998864db8f5db0706950e93bb7d81
!git checkout ad1f7b56943998864db8f5db0706950e93bb7d81

# Verify the current commit
!git rev-parse HEAD


Cloning into '/root/MultiAgent/FTCTraining/models'...
remote: Enumerating objects: 4306, done.[K
remote: Counting objects: 100% (4306/4306), done.[K
remote: Compressing objects: 100% (3325/3325), done.[K
remote: Total 4306 (delta 1208), reused 2115 (delta 908), pack-reused 0 (from 0)[K
Receiving objects: 100% (4306/4306), 53.17 MiB | 11.46 MiB/s, done.
Resolving deltas: 100% (1208/1208), done.
/root/MultiAgent/FTCTraining/models


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


remote: Enumerating objects: 3055, done.[K
remote: Counting objects: 100% (3055/3055), done.[K
remote: Compressing objects: 100% (1359/1359), done.[K
remote: Total 1824 (delta 1223), reused 701 (delta 446), pack-reused 0 (from 0)[K
Receiving objects: 100% (1824/1824), 10.05 MiB | 13.47 MiB/s, done.
Resolving deltas: 100% (1223/1223), completed with 739 local objects.
From https://github.com/tensorflow/models
 * branch            ad1f7b56943998864db8f5db0706950e93bb7d81 -> FETCH_HEAD
Note: switching to 'ad1f7b56943998864db8f5db0706950e93bb7d81'.

You are in 'detached HEAD' state. You can look around, make experimental
changes and commit them, and you can discard any commits you make in this
state without impacting any branches by switching back to a branch.

If you want to create a new branch to retain commits you create, you may
do so (now or later) by using -c with the switch command. Example:

  git switch -c <new-branch-name>

Or undo this operation with:

  git switch -

Turn o

In [5]:
# Navigate to the research directory
%cd {RESEARCH_DIR}

# Compile the protocol buffers
!protoc object_detection/protos/*.proto --python_out=.

# Add the research and slim directories to PYTHONPATH
sys.path.append(os.path.join(MODELS_DIR, 'research'))
sys.path.append(os.path.join(MODELS_DIR, 'research', 'slim'))

print("Protocol buffers compiled and PYTHONPATH updated.")


/root/MultiAgent/FTCTraining/models/research
Protocol buffers compiled and PYTHONPATH updated.


In [6]:
# Install the Object Detection API
!cp object_detection/packages/tf2/setup.py .

# Install the package
!python -m pip install .

print("Object Detection API installed successfully.")


Processing /root/MultiAgent/FTCTraining/models/research
  Preparing metadata (setup.py) ... [?25ldone
Building wheels for collected packages: object_detection
  Building wheel for object_detection (setup.py) ... [?25ldone
[?25h  Created wheel for object_detection: filename=object_detection-0.1-py3-none-any.whl size=1655500 sha256=897d516382f5b79bb599a4925a99f24a4286a21618c3a8ad8319910b1d75ee15
  Stored in directory: /tmp/pip-ephem-wheel-cache-5nzzna51/wheels/b6/2f/03/c8b84fe9815921b7e43b436d3a5ec0a12f379883962017df92
Successfully built object_detection
Installing collected packages: object_detection
  Attempting uninstall: object_detection
    Found existing installation: object_detection 0.1
    Uninstalling object_detection-0.1:
      Successfully uninstalled object_detection-0.1
Successfully installed object_detection-0.1
[0mObject Detection API installed successfully.


In [7]:
# Verify the installation by running the test script
!python object_detection/builders/model_builder_tf2_test.py


2025-02-01 11:13:10.262971: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-01 11:13:10.290184: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-01 11:13:10.290238: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-01 11:13:10.290931: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-01 11:13:10.295564: I tensorflow/core/platform/cpu_feature_guar

In [8]:
# Define the path to tfexample_decoder.py
tf_slim_path = '/root/miniconda/envs/FTCTrainingEnv/lib/python3.11/site-packages/tf_slim/data/tfexample_decoder.py'

# Check if the file exists
if os.path.exists(tf_slim_path):
    with open(tf_slim_path, 'r') as file:
        content = file.read()
        # Add TensorFlow import and replace deprecated functions
        content = re.sub(r'import abc', 'import tensorflow as tf\n\nimport abc', content)
        content = re.sub(r'control_flow_ops\.case', 'tf.case', content)
        content = re.sub(r'control_flow_ops\.cond', 'tf.compat.v1.cond', content)
    with open(tf_slim_path, 'w') as file:
        file.write(content)
    print(f"File {tf_slim_path} updated successfully.")
else:
    print(f"Warning: Could not find {tf_slim_path}. Please verify the path.")


File /root/miniconda/envs/FTCTrainingEnv/lib/python3.11/site-packages/tf_slim/data/tfexample_decoder.py updated successfully.


In [9]:
import os
import shutil
import zipfile

# Define dataset variables
# Assuming you have already downloaded 'samples-specimens.tfrecord.zip' locally
# Update the path to where your .tfrecord.zip is located
TFR_ZIP_PATH = "/root/MultiAgent/FTCTraining/FTCYolo.v7i.tfrecord.zip"  # Adjust if different
EXTRACT_DIR = os.path.join(HOMEFOLDER, 'train')

# Do NOT remove the 'models' directory here, as it contains the cloned TensorFlow models repository
# Commenting out the deletion of MODELS_DIR:
# if os.path.exists(MODELS_DIR):
#     shutil.rmtree(MODELS_DIR)
#     print(f"Removed existing directory: {MODELS_DIR}")

# Unzip the TFRecord file
if os.path.exists(TFR_ZIP_PATH):
    with zipfile.ZipFile(TFR_ZIP_PATH, 'r') as zip_ref:
        zip_ref.extractall(EXTRACT_DIR)
    print(f"Extracted {TFR_ZIP_PATH} to {EXTRACT_DIR}")
else:
    print(f"TFRecord zip file not found at {TFR_ZIP_PATH}. Please upload it to the specified directory.")


Extracted /root/MultiAgent/FTCTraining/FTCYolo.v7i.tfrecord.zip to /root/MultiAgent/FTCTraining/train


In [10]:
import fnmatch

def find_files(directory, pattern):
    for root, dirs, files in os.walk(directory):
        for basename in files:
            if fnmatch.fnmatch(basename, pattern):
                filename = os.path.join(root, basename)
                yield filename

def set_tfrecord_variables(directory):
    train_record_fname = ''
    val_record_fname = ''
    label_map_pbtxt_fname = ''

    for tfrecord_file in find_files(directory, '*.tfrecord'):
        if '/train/' in tfrecord_file:
            train_record_fname = tfrecord_file
        elif '/valid/' in tfrecord_file:
            val_record_fname = tfrecord_file
        elif '/test/' in tfrecord_file:
            pass

    for label_map_file in find_files(directory, '*_label_map.pbtxt'):
        label_map_pbtxt_fname = label_map_file  # Assuming one common label map file

    return train_record_fname, val_record_fname, label_map_pbtxt_fname

train_record_fname, val_record_fname, label_map_pbtxt_fname = set_tfrecord_variables(EXTRACT_DIR)

print("Train Record File:", train_record_fname)
print("Validation Record File:", val_record_fname)
print("Label Map File:", label_map_pbtxt_fname)


Train Record File: /root/MultiAgent/FTCTraining/train/train/samples-specimens.tfrecord
Validation Record File: 
Label Map File: /root/MultiAgent/FTCTraining/train/train/samples-specimens_label_map.pbtxt


In [11]:
import tarfile

# Define the chosen model and its configurations
chosen_model = 'ssd-mobilenet-v2'
MODELS_CONFIG = {
    'ssd-mobilenet-v2': {
        'model_name': 'ssd_mobilenet_v2_320x320_coco17_tpu-8',
        'base_pipeline_file': 'limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.config',
        'pretrained_checkpoint': 'limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz',
    },
}

model_name = MODELS_CONFIG[chosen_model]['model_name']
pretrained_checkpoint = MODELS_CONFIG[chosen_model]['pretrained_checkpoint']
base_pipeline_file = MODELS_CONFIG[chosen_model]['base_pipeline_file']

# Create "mymodel" folder for pre-trained weights and configuration files
mymodel_dir = os.path.join(MODELS_DIR, 'mymodel')
os.makedirs(mymodel_dir, exist_ok=True)
print(f"Created directory: {mymodel_dir}")

# Download pre-trained model weights
download_tar = 'https://downloads.limelightvision.io/models/' + pretrained_checkpoint
checkpoint_tar_path = os.path.join(mymodel_dir, pretrained_checkpoint)

if not os.path.exists(checkpoint_tar_path):
    !wget {download_tar} -P {mymodel_dir}
    print(f"Downloaded {pretrained_checkpoint} to {mymodel_dir}")
else:
    print(f"Pre-trained checkpoint already exists at {checkpoint_tar_path}")

# Extract the checkpoint
with tarfile.open(checkpoint_tar_path) as tar:
    tar.extractall(path=mymodel_dir)
    print(f"Extracted {pretrained_checkpoint} in {mymodel_dir}")

# Download training configuration file for model
download_config = 'https://downloads.limelightvision.io/models/' + base_pipeline_file
pipeline_config_path = os.path.join(mymodel_dir, base_pipeline_file)

if not os.path.exists(pipeline_config_path):
    !wget {download_config} -P {mymodel_dir}
    print(f"Downloaded {base_pipeline_file} to {mymodel_dir}")
else:
    print(f"Pipeline config already exists at {pipeline_config_path}")


Created directory: /root/MultiAgent/FTCTraining/models/mymodel
--2025-02-01 11:13:32--  https://downloads.limelightvision.io/models/limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz
Resolving downloads.limelightvision.io (downloads.limelightvision.io)... 18.160.200.119, 18.160.200.4, 18.160.200.95, ...
Connecting to downloads.limelightvision.io (downloads.limelightvision.io)|18.160.200.119|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 46042990 (44M) [application/x-gzip]
Saving to: ‘/root/MultiAgent/FTCTraining/models/mymodel/limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz’


2025-02-01 11:13:35 (20.1 MB/s) - ‘/root/MultiAgent/FTCTraining/models/mymodel/limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz’ saved [46042990/46042990]

Downloaded limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz to /root/MultiAgent/FTCTraining/models/mymodel
Extracted limelight_ssd_mobilenet_v2_320x320_coco17_tpu-8.tar.gz in /root/MultiAgent/FTCTraining/models

In [12]:
from object_detection.utils import label_map_util

def get_num_classes(pbtxt_fname):
    label_map = label_map_util.load_labelmap(pbtxt_fname)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return len(category_index.keys())

def get_classes(pbtxt_fname):
    label_map = label_map_util.load_labelmap(pbtxt_fname)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    class_names = [category['name'] for category in category_index.values()]
    return class_names

def create_label_file(filename, labels):
    with open(filename, 'w') as file:
        for label in labels:
            file.write(label + '\n')

num_classes = get_num_classes(label_map_pbtxt_fname)
classes = get_classes(label_map_pbtxt_fname)

print('Total classes:', num_classes)
print('Classes:', classes)

# Generate labels file
labels_file_path = os.path.join(HOMEFOLDER, "limelight_neural_detector_labels.txt")
create_label_file(labels_file_path, classes)
print(f"Labels file created at: {labels_file_path}")


2025-02-01 11:13:36.791557: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-01 11:13:36.815796: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-01 11:13:36.815819: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-01 11:13:36.816513: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-01 11:13:36.821500: I tensorflow/core/platform/cpu_feature_guar

Total classes: 3
Classes: ['blue-specimen', 'red-specimen', 'yellow-specimen']
Labels file created at: /root/MultiAgent/FTCTraining/limelight_neural_detector_labels.txt


In [13]:
# Define training parameters
num_steps = 40000
checkpoint_every = 2000
batch_size = 16

# Paths
fine_tune_checkpoint = os.path.join(mymodel_dir, model_name, 'checkpoint/ckpt-0')  # Adjust if necessary

# Custom pipeline configuration file
custom_pipeline_config = os.path.join(HOMEFOLDER, 'models/mymodel', 'pipeline_file.config')

print('Writing custom configuration file...')

with open(pipeline_config_path, 'r') as f:
    s = f.read()

with open(custom_pipeline_config, 'w') as f:
    # Set fine_tune_checkpoint path
    s = re.sub('fine_tune_checkpoint: ".*?"',
               f'fine_tune_checkpoint: "{fine_tune_checkpoint}"', s)

    # Set tfrecord files for train and test datasets
    s = re.sub(
        r'input_path: ".*?PATH_TO_BE_CONFIGURED/train.*?"',
        f'input_path: "{train_record_fname}"', s)
    s = re.sub(
        r'input_path: ".*?PATH_TO_BE_CONFIGURED/val.*?"',
        f'input_path: "{val_record_fname}"', s)

    # Set label_map_path
    s = re.sub(
        'label_map_path: ".*?"', f'label_map_path: "{label_map_pbtxt_fname}"', s)

    # Set batch_size
    s = re.sub('batch_size: [0-9]+',
               f'batch_size: {batch_size}', s)

    # Set training steps, num_steps
    s = re.sub('num_steps: [0-9]+',
               f'num_steps: {num_steps}', s)

    # Set number of classes num_classes
    s = re.sub('num_classes: [0-9]+',
               f'num_classes: {num_classes}', s)

    # Change fine-tune checkpoint type from "classification" to "detection"
    s = re.sub(
        'fine_tune_checkpoint_type: "classification"',
        'fine_tune_checkpoint_type: "detection"', s)

    # If using ssd-mobilenet-v2, reduce learning rate
    if chosen_model == 'ssd-mobilenet-v2':
        s = re.sub('learning_rate_base: .8',
                   'learning_rate_base: 0.004', s)
        s = re.sub('warmup_learning_rate: 0.13333',
                   'warmup_learning_rate: 0.0016666', s)

    # Write the modified config to the custom pipeline file
    f.write(s)

print(f"Custom pipeline configuration written to: {custom_pipeline_config}")


Writing custom configuration file...
Custom pipeline configuration written to: /root/MultiAgent/FTCTraining/models/mymodel/pipeline_file.config


In [14]:
val_record_fname = "/root/MultiAgent/FTCTraining/valid/samples-specimens.tfrecord"

def validate_paths():
    required_files = {
        "Training TFRecord": train_record_fname,
        "Validation TFRecord": val_record_fname,
        "Label Map": label_map_pbtxt_fname,
        "Pipeline Config": custom_pipeline_config
    }

    missing = False
    for name, path in required_files.items():
        if not os.path.exists(path):
            print(f"❌ Missing {name}: {path}")
            missing = True
        else:
            print(f"✅ Found {name}: {path}")

    if missing:
        raise FileNotFoundError("Missing required training files. Check paths above.")

validate_paths()


✅ Found Training TFRecord: /root/MultiAgent/FTCTraining/train/train/samples-specimens.tfrecord
✅ Found Validation TFRecord: /root/MultiAgent/FTCTraining/valid/samples-specimens.tfrecord
✅ Found Label Map: /root/MultiAgent/FTCTraining/train/train/samples-specimens_label_map.pbtxt
✅ Found Pipeline Config: /root/MultiAgent/FTCTraining/models/mymodel/pipeline_file.config


In [15]:
# Navigate to research directory
%cd {RESEARCH_DIR}


# Start training
!python object_detection/model_main_tf2.py \
    --pipeline_config_path={custom_pipeline_config} \
    --model_dir={TRAINING_PROGRESS_DIR} \
    --alsologtostderr \
    --checkpoint_every_n={checkpoint_every} \
    --num_train_steps={num_steps} \
    --num_workers=2 \
    --sample_1_of_n_eval_examples=1


/root/MultiAgent/FTCTraining/models/research


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


2025-02-01 11:13:38.508315: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-01 11:13:38.536248: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-01 11:13:38.536303: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-01 11:13:38.537037: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-01 11:13:38.541295: I tensorflow/core/platform/cpu_feature_guar

In [16]:
import os
import shutil
import zipfile
import re
import random
import glob
import io
from PIL import Image
import tensorflow as tf

# --------------------------
# Variables (assumed defined previously)
# --------------------------
# HOMEFOLDER = '/root/MultiAgent/FTCTraining/'          # (your project home folder)
# MODELS_DIR = os.path.join(HOMEFOLDER, 'models')         # Cloned TF models repo folder
# TRAINING_PROGRESS_DIR = os.path.join(HOMEFOLDER, 'training_progress')
# custom_pipeline_config = os.path.join(HOMEFOLDER, 'models/mymodel', 'pipeline_file.config')
# train_record_fname, val_record_fname, label_map_pbtxt_fname defined earlier

# FINALOUTPUTFOLDER is the directory to store exported models and TFLite outputs.
FINALOUTPUTFOLDER = os.path.join(HOMEFOLDER, 'final_output')

In [17]:
# Remove final output folder if it exists
if os.path.exists(FINALOUTPUTFOLDER) and os.path.isdir(FINALOUTPUTFOLDER):
    shutil.rmtree(FINALOUTPUTFOLDER)
    print(f"Removed existing directory: {FINALOUTPUTFOLDER}")

# Create a new final output folder
!mkdir {FINALOUTPUTFOLDER}
print("Final output folder:", FINALOUTPUTFOLDER)

Removed existing directory: /root/MultiAgent/FTCTraining/final_output
Final output folder: /root/MultiAgent/FTCTraining/final_output


In [18]:
# --------------------------
# 2. Export the TFLite Graph
# --------------------------
# Path to training directory (the exporter will choose the highest checkpoint)
last_model_path = os.path.join(HOMEFOLDER, 'training_progress')

# Exporter script (using the TensorFlow models repo)
exporter_path = os.path.join(MODELS_DIR, 'research', 'object_detection', 'export_tflite_graph_tf2.py')
output_directory = FINALOUTPUTFOLDER

!python {exporter_path} \
    --trained_checkpoint_dir {last_model_path} \
    --output_directory {output_directory} \
    --pipeline_config_path {custom_pipeline_config}

2025-02-01 11:30:37.656899: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-02-01 11:30:38.042193: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-02-01 11:30:38.042248: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-02-01 11:30:38.045541: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-02-01 11:30:38.056924: I tensorflow/core/platform/cpu_feature_guar

In [19]:
# --------------------------
# 3. Convert Exported SavedModel to 32-bit TFLite Model
# --------------------------
# Convert the saved_model to TFLite Flatbuffer
saved_model_dir = os.path.join(FINALOUTPUTFOLDER, 'saved_model')
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
tflite_model = converter.convert()
model_path_32bit = os.path.join(FINALOUTPUTFOLDER, 'limelight_neural_detector_32bit.tflite')
with open(model_path_32bit, 'wb') as f:
    f.write(tflite_model)
print("32-bit TFLite model saved to:", model_path_32bit)

# Copy the labels file and the pipeline configuration file to the final output folder
!cp {HOMEFOLDER}limelight_neural_detector_labels.txt {FINALOUTPUTFOLDER}
!cp {custom_pipeline_config} {FINALOUTPUTFOLDER}

2025-02-01 11:31:29.415079: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-01 11:31:29.500802: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-01 11:31:29.501195: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-01 11:31:29.514920: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:887] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2025-02-01 11:31:29.514972: I external/local_xla/xla/stream_executor

32-bit TFLite model saved to: /root/MultiAgent/FTCTraining/final_output/limelight_neural_detector_32bit.tflite


In [20]:
# --------------------------
# 5. Extract Sample Images from the TFRecord
# --------------------------
def extract_images_from_tfrecord(tfrecord_path, output_folder, num_samples=100):
    """Extracts images from a TFRecord file and saves them to an output folder."""
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    saved_images = 0
    raw_dataset = tf.data.TFRecordDataset(tfrecord_path)
    for raw_record in raw_dataset.take(num_samples):
        example = tf.train.Example()
        example.ParseFromString(raw_record.numpy())
        # Adjust the key below if your TFRecord uses a different name
        image_data = example.features.feature['image/encoded'].bytes_list.value[0]
        image = Image.open(io.BytesIO(image_data))
        image.save(os.path.join(output_folder, f'image_{saved_images}.png'))
        saved_images += 1
        if saved_images >= num_samples:
            break
    print(f"Extracted {saved_images} images to {output_folder}")

# Define the output folder for extracted images
extracted_sample_folder = os.path.join(HOMEFOLDER, 'extracted_samples')

# Remove the sample folder if it exists
if os.path.exists(extracted_sample_folder) and os.path.isdir(extracted_sample_folder):
    shutil.rmtree(extracted_sample_folder)
    print(f"Removed existing directory: {extracted_sample_folder}")

# Extract images from the training TFRecord
extract_images_from_tfrecord(train_record_fname, extracted_sample_folder)

Extracted 100 images to /root/MultiAgent/FTCTraining/extracted_samples


In [21]:
# --------------------------
# 6. Create a Representative Dataset for Quantization
# --------------------------
# Gather list of sample images from the extracted samples folder
quant_image_list = []
quant_image_list.extend(glob.glob(os.path.join(extracted_sample_folder, '*.jpg')))
quant_image_list.extend(glob.glob(os.path.join(extracted_sample_folder, '*.jpeg')))
quant_image_list.extend(glob.glob(os.path.join(extracted_sample_folder, '*.JPG')))
quant_image_list.extend(glob.glob(os.path.join(extracted_sample_folder, '*.png')))
quant_image_list.extend(glob.glob(os.path.join(extracted_sample_folder, '*.bmp')))

print("Pulling samples from:", extracted_sample_folder)
print("Number of samples found:", len(quant_image_list))

Pulling samples from: /root/MultiAgent/FTCTraining/extracted_samples
Number of samples found: 100


In [22]:
# --------------------------
# 7. Representative Data Generator Function
# --------------------------
# Initialize an interpreter for the 32-bit model to obtain input size details
interpreter = tf.lite.Interpreter(model_path=model_path_32bit)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
# Assume input shape is [1, height, width, channels]
height = input_details[0]['shape'][1]
width = input_details[0]['shape'][2]

def representative_data_gen():
    """A generator function that yields batches for quantization calibration."""
    quant_num = 300  # Number of images to use for calibration
    for i in range(quant_num):
        pick_me = random.choice(quant_image_list)
        print("Using sample:", pick_me)
        image = tf.io.read_file(pick_me)
        if pick_me.lower().endswith('.jpg') or pick_me.lower().endswith('.jpeg'):
            image = tf.io.decode_jpeg(image, channels=3)
        elif pick_me.lower().endswith('.png'):
            image = tf.io.decode_png(image, channels=3)
        elif pick_me.lower().endswith('.bmp'):
            image = tf.io.decode_bmp(image, channels=3)
        image = tf.image.resize(image, [width, height])
        image = tf.cast(image / 255.0, tf.float32)
        image = tf.expand_dims(image, 0)
        yield [image]

INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


In [23]:
# --------------------------
# 8. Quantize the Model (8-bit Full Integer)
# --------------------------
# Reinitialize the converter using the saved model directory
converter = tf.lite.TFLiteConverter.from_saved_model(saved_model_dir)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that only quantizable operations are allowed
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, 
                                         tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.target_spec.supported_types = [tf.int8]
converter.inference_input_type = tf.uint8
converter.inference_output_type = tf.float32
print("Beginning full integer quantization...")
tflite_quant_model = converter.convert()
quant_model_path = os.path.join(FINALOUTPUTFOLDER, 'limelight_neural_detector_8bit.tflite')
with open(quant_model_path, 'wb') as f:
    f.write(tflite_quant_model)
print("8-bit quantized model saved to:", quant_model_path)

Beginning full integer quantization...


2025-02-01 11:34:12.010243: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:378] Ignored output_format.
2025-02-01 11:34:12.010284: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:381] Ignored drop_control_dependency.
2025-02-01 11:34:12.010426: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /root/MultiAgent/FTCTraining/final_output/saved_model
2025-02-01 11:34:12.033374: I tensorflow/cc/saved_model/reader.cc:51] Reading meta graph with tags { serve }
2025-02-01 11:34:12.033399: I tensorflow/cc/saved_model/reader.cc:146] Reading SavedModel debug info (if present) from: /root/MultiAgent/FTCTraining/final_output/saved_model
2025-02-01 11:34:12.094001: I tensorflow/cc/saved_model/loader.cc:233] Restoring SavedModel bundle.
2025-02-01 11:34:12.455400: I tensorflow/cc/saved_model/loader.cc:217] Running initialization op on SavedModel bundle at path: /root/MultiAgent/FTCTraining/final_output/saved_model
2025-02-01 11:34:12.585554: I

Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_88.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_10.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_66.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_39.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_64.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_20.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_94.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_45.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_72.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_35.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_67.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_74.png
Using sample: /root/MultiAgent/FTCTraining/extracted_samples/image_49.png
Using sample: /root/MultiAgent/FTCTrai

fully_quantize: 0, inference_type: 6, input_inference_type: UINT8, output_inference_type: FLOAT32
2025-02-01 11:35:19.166559: I tensorflow/compiler/mlir/lite/flatbuffer_export.cc:2989] Estimated count of arithmetic ops: 1.300 G  ops, equivalently 0.650 G  MACs


In [24]:
# --------------------------
# 9. Compile the Quantized Model for Edge TPU
# --------------------------
!curl https://packages.cloud.google.com/apt/doc/apt-key.gpg | sudo apt-key add -
!echo "deb https://packages.cloud.google.com/apt coral-edgetpu-stable main" | sudo tee /etc/apt/sources.list.d/coral-edgetpu.list
!sudo apt-get update
!sudo apt-get install -y edgetpu-compiler

# Run the Edge TPU compiler on the 8-bit model
!cd {FINALOUTPUTFOLDER} && edgetpu_compiler limelight_neural_detector_8bit.tflite && mv limelight_neural_detector_8bit_edgetpu.tflite limelight_neural_detector_coral.tflite && rm limelight_neural_detector_8bit_edgetpu.log


  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100  1022  100  1022    0     0   3150      0 --:--:-- --:--:-- --:--:--  3154
OK
deb https://packages.cloud.google.com/apt coral-edgetpu-stable main
Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Hit:2 http://archive.ubuntu.com/ubuntu noble InRelease                         
Hit:3 http://archive.ubuntu.com/ubuntu noble-updates InRelease                 
Hit:4 http://archive.ubuntu.com/ubuntu noble-backports InRelease               
Hit:5 https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64  InRelease
Hit:6 http://security.ubuntu.com/ubuntu noble-security InRelease    
Get:7 https://packages.cloud.google.com/apt coral-edgetpu-stable InRelease [1423 B]
Get:8 https://packages.cloud.goog

In [28]:
!sudo apt update
!sudo apt install zip -y


Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Hit:2 http://archive.ubuntu.com/ubuntu noble InRelease                         [0m[33m[33m
Hit:3 http://security.ubuntu.com/ubuntu noble-security InRelease    [0m       
Hit:4 http://archive.ubuntu.com/ubuntu noble-updates InRelease      [0m[33m
Hit:5 http://archive.ubuntu.com/ubuntu noble-backports InRelease    [0m[33m
Hit:6 https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64  InRelease
Hit:7 https://packages.cloud.google.com/apt coral-edgetpu-stable InRelease
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
115 packages can be upgraded. Run 'apt list --upgradable' to see them.
[1;33mW: [0mhttps://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted

In [29]:
# --------------------------
# 10. Package and Download the Final Detectors
# --------------------------
# Remove any previous zip file
!rm {HOMEFOLDER}limelight_detectors.zip
# Zip the final output folder
!zip -r {HOMEFOLDER}limelight_detectors.zip {FINALOUTPUTFOLDER}

# # If running in Google Colab, download the zip file
# from google.colab import files
# files.download(os.path.join(HOMEFOLDER, 'limelight_detectors.zip'))

rm: cannot remove '/root/MultiAgent/FTCTraining/limelight_detectors.zip': No such file or directory
  adding: root/MultiAgent/FTCTraining/final_output/ (stored 0%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/ (stored 0%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/saved_model.pb (deflated 91%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/variables/ (stored 0%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/variables/variables.data-00000-of-00001 (deflated 8%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/variables/variables.index (deflated 77%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/assets/ (stored 0%)
  adding: root/MultiAgent/FTCTraining/final_output/saved_model/fingerprint.pb (stored 0%)
  adding: root/MultiAgent/FTCTraining/final_output/pipeline_file.config (deflated 67%)
  adding: root/MultiAgent/FTCTraining/final_output/limelight_neural_detector_labels.txt (deflated 35

In [34]:
!sudo apt-get update
!sudo apt-get install libedgetpu1-std


Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Get:1 file:/var/nv-tensorrt-local-repo-ubuntu2404-10.7.0-cuda-12.6  InRelease [1572 B]
Hit:2 http://security.ubuntu.com/ubuntu noble-security InRelease               
Hit:3 https://packages.cloud.google.com/apt coral-edgetpu-stable InRelease     
Hit:4 https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64  InRelease
Hit:5 http://archive.ubuntu.com/ubuntu noble InRelease
Hit:6 http://archive.ubuntu.com/ubuntu noble-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu noble-backports InRelease
Reading package lists... Done
W: https://packages.cloud.google.com/apt/dists/coral-edgetpu-stable/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted.gpg), see the DEPRECATION section in apt-key(8) for details.
W: https://developer.download.nvidia.com/compute/cuda/repos/wsl-ubuntu/x86_64/InRelease: Key is stored in legacy trusted.gpg keyring (/etc/apt/trusted.gpg), se

In [35]:
import cv2
import numpy as np
import tensorflow as tf

# Path to your quantized 8-bit model compiled for the Edge TPU
MODEL_PATH = '/root/MultiAgent/FTCTraining/final_output/limelight_neural_detector_coral.tflite'

# Try to load the Edge TPU delegate
try:
    edgetpu_delegate = tf.lite.experimental.load_delegate('libedgetpu.so.1')
    interpreter = tf.lite.Interpreter(model_path=MODEL_PATH,
                                        experimental_delegates=[edgetpu_delegate])
    print("Edge TPU delegate loaded successfully.")
except Exception as e:
    print("Failed to load Edge TPU delegate:", e)
    # Fall back to the default interpreter if needed.
    interpreter = tf.lite.Interpreter(model_path=MODEL_PATH)

interpreter.allocate_tensors()

# Get input and output details from the model
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

# Get input dimensions (assuming input tensor shape is [1, height, width, channels])
input_shape = input_details[0]['shape']
model_height = input_shape[1]
model_width = input_shape[2]

# Define a minimum detection threshold (adjust as needed)
DETECTION_THRESHOLD = 0.5

# Open the default camera (change 0 to another index if necessary)
cap = cv2.VideoCapture(0)
print("Starting video stream... Press 'q' to exit.")

while True:
    ret, frame = cap.read()
    if not ret:
        print("Failed to grab frame.")
        break

    # Preprocess the frame: resize to model dimensions and convert to uint8 (for quantized models)
    resized_frame = cv2.resize(frame, (model_width, model_height))
    input_data = np.expand_dims(resized_frame, axis=0).astype(np.uint8)
    
    # Run inference
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    
    # Retrieve detection results.
    # (Assumes the model output structure is similar to the TF2 Object Detection API)
    boxes = interpreter.get_tensor(output_details[0]['index'])[0]       # shape: [num_detections, 4]
    classes = interpreter.get_tensor(output_details[1]['index'])[0]     # shape: [num_detections]
    scores = interpreter.get_tensor(output_details[2]['index'])[0]      # shape: [num_detections]
    num_detections = int(interpreter.get_tensor(output_details[3]['index'])[0])
    
    # Draw detections on the original frame
    imH, imW, _ = frame.shape
    for i in range(num_detections):
        if scores[i] >= DETECTION_THRESHOLD:
            # Convert normalized box coordinates to pixel values.
            ymin, xmin, ymax, xmax = boxes[i]
            xmin = int(xmin * imW)
            xmax = int(xmax * imW)
            ymin = int(ymin * imH)
            ymax = int(ymax * imH)

            # Draw the bounding box and label
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2)
            label = f'ID:{int(classes[i])} {scores[i]:.2f}'
            cv2.putText(frame, label, (xmin, ymin - 10), 
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (10, 255, 0), 2)

    # Display the frame with detections
    cv2.imshow('8-bit Quantized Edge TPU Model Inference', frame)
    
    # Exit the loop when 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Clean up resources
cap.release()
cv2.destroyAllWindows()


Failed to load Edge TPU delegate: Failed to load delegate from libedgetpu.so.1



RuntimeError: Encountered unresolved custom op: edgetpu-custom-op.
See instructions: https://www.tensorflow.org/lite/guide/ops_custom Node number 0 (edgetpu-custom-op) failed to prepare.Encountered unresolved custom op: edgetpu-custom-op.
See instructions: https://www.tensorflow.org/lite/guide/ops_custom Node number 0 (edgetpu-custom-op) failed to prepare.