In [1]:
import tensorflow as tf

2025-11-29 00:40:12.159341: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-29 00:40:12.181566: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-29 00:40:12.188125: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-29 00:40:12.206843: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
from mobilenetv2ssd.core.config import load_config

In [3]:
from mobilenetv2ssd.models.ssd.ops.match_ops_tf import match_priors
from mobilenetv2ssd.models.ssd.ops.encode_ops_tf import encode_boxes_core, encode_boxes_batch

In [4]:
from typing import Any

## Config Files To Test Orchestration

In [5]:
main_cfg_path = "configs/train/default.yaml"
model_cfg_path = "configs/model/mobilenetv2_ssd_voc.yaml"
data_cfg_path = "configs/data/voc_224.yaml"
eval_cfg_path = "configs/eval/default.yaml"

In [48]:
config = load_config(main_cfg_path,model_cfg_path,data_cfg_path,eval_cfg_path)

In [49]:
gt_boxes_batched = tf.constant(
    [
        # Image 0: A, B, PAD
        [
            [0.1, 0.1, 0.3, 0.3],   # A (class 3)
            [0.6, 0.6, 0.9, 0.9],   # B (class 2)
            [0.0, 0.0, 0.0, 0.0],   # padding
        ],
        # Image 1: one GT C, then PAD, PAD
        [
            [0.15, 0.15, 0.35, 0.35],  # C (say class 1)
            [0.0,  0.0,  0.0,  0.0],   # padding
            [0.0,  0.0,  0.0,  0.0],   # padding
        ],
    ],
    dtype=tf.float32,
)  # shape [2, 3, 4]

gt_labels_batched = tf.constant(
    [
        # Image 0 labels: A, B, PAD
        [3, 2, 0],  # 0 here treated as "no GT" since youâ€™ll mask
        # Image 1 labels: C, PAD, PAD
        [1, 0, 0],
    ],
    dtype=tf.int32,
)

valid_mask = tf.constant(
    [
        [True,True,False],
        [True,False,False]
    ],
    dtype= tf.bool
)

priors_cxcywh = tf.constant(
    [
        [0.2,  0.2,  0.20, 0.20],   # P0 -> IoU 1.00 with A
        [0.2,  0.2,  0.16, 0.16],   # P1 -> IoU 0.64 with A
        [0.2,  0.2,  0.12, 0.12],   # P2 -> IoU 0.36 with A (ignore band if 0.4< IoU <0.5)
        [0.75, 0.75, 0.30, 0.30],   # P3 -> IoU 1.00 with B
        [0.75, 0.75, 0.24, 0.24],   # P4 -> IoU 0.64 with B
        [0.05, 0.90, 0.10, 0.10],   # P5 -> IoU 0.00 (negative)
    ],
    dtype=tf.float32,
)

In [50]:
gt_boxes_batched[0]

<tf.Tensor: shape=(3, 4), dtype=float32, numpy=
array([[0.1, 0.1, 0.3, 0.3],
       [0.6, 0.6, 0.9, 0.9],
       [0. , 0. , 0. , 0. ]], dtype=float32)>

In [51]:
config['train']

{'seed': 1337,
 'device': 'auto',
 'epochs': 50,
 'batch_size': 3,
 'grad_accum_steps': 1,
 'optimizer': {'name': 'sgd',
  'lr': 0.001,
  'momentum': 0.9,
  'weight_decay': 0.0005,
  'nesterov': True},
 'scheduler': {'name': 'cosine', 'warmup': {'epochs': 5, 'factor': 0.1}},
 'loss': {'cls_loss_type': 'ce_softmax',
  'from_logits': True,
  'ignore_index': -1,
  'label_smoothing': 0.0,
  'use_sigmoid': False,
  'class_weights': None,
  'reg_loss_type': 'smooth_l1',
  'smooth_l1_beta': 1.0,
  'bbox_norm': 'none',
  'alpha': 1.0,
  'gamma': 2.0,
  'cls_weight': 1.0,
  'reg_weight': 1.0,
  'normalization': {'type': 'num_pos', 'epsilon': '1e-6'}},
 'sampler': {'neg_pos_ratio': 3.0, 'min_neg': 0, 'max_neg': None},
 'targets': {'filters': {'min_box_area': 0.0,
   'clip_to_image': True,
   'drop_invalid': True}},
 'diagnostics': False}

## Helper Functions For Orchestration

In [52]:
def _extract_information_from_train_config(config : dict[str, Any]):
    model_config = config['model']
    train_config = config['train']
    target_config = {
        "variances": model_config['priors'].get("variances",[0.1,0.2]),
        'image_size': model_config.get("input_size",[224,224]),
        'iou_threshold_pos': model_config['matcher'].get("iou_threshold_pos",0.5),
        'iou_threshold_neg': model_config['matcher'].get("iou_threshold_neg",0.4),
        'allow_low_quality_matches': model_config['matcher'].get("allow_low_quality_matches",True), # Bipartite flag
        'center_in_gt': model_config['matcher'].get("center_in_gt",False),
        'neg_pos_ratio': train_config['sampler'].get('neg_pos_ratio',3.0),
        'min_neg': train_config['sampler'].get('min_neg',0),
        'max_neg': train_config['sampler'].get('max_neg',None),
        'diagnostics': True
    }

    return target_config

In [53]:
_extract_information_from_train_config(config)

{'variances': [0.1, 0.2],
 'image_size': [224, 224],
 'iou_threshold_pos': 0.7,
 'iou_threshold_neg': 0.5,
 'allow_low_quality_matches': False,
 'center_in_gt': False,
 'neg_pos_ratio': 3.0,
 'min_neg': 0,
 'max_neg': None,
 'diagnostics': True}

## Orchestration Function

In [54]:
def building_training_targets(config: dict[str, Any], priors_cxcywh: tf.Tensor, gt_boxes_xyxy: tf.Tensor, gt_labels: tf.Tensor, gt_valid_mask : tf.Tensor):
    # This is the orchestrator for building the training targets
    # Steps:
    # 1. Extract the configuration used to create the matches from train & model config
    # 2. Sanitize GT Boxes (Need to check if needed if not offload to preprocessing stage)
    # 3. Match Priors to GT boxes for one image (batch using tf.map_fn)
    # 4. Encode to Offsets for the matched GT boxes and Priors(batch using tf.map_fn)
    # 5. Check if Diagnostics are needed (matched_iou, num_pos)
    target_config = _extract_information_from_train_config(config)
    
    batched_output = {
        "matched_gt_xyxy": tf.TensorSpec(shape=(None, 4), dtype=tf.float32),
        "matched_gt_labels": tf.TensorSpec(shape=(None,), dtype=tf.int32),
        "pos_mask": tf.TensorSpec(shape=(None,), dtype=tf.bool),
        "neg_mask": tf.TensorSpec(shape=(None,), dtype=tf.bool),
        "ignore_mask": tf.TensorSpec(shape=(None,), dtype=tf.bool),
        "matched_gt_idx": tf.TensorSpec(shape=(None,), dtype=tf.int32),
        "num_pos": tf.TensorSpec(shape=(), dtype=tf.int32),
        "matched_iou": tf.TensorSpec(shape=(None,), dtype=tf.float32),
    }

    matched_dict = tf.map_fn(lambda inputs: match_priors(priors_cxcywh = priors_cxcywh, gt_boxes_xyxy = inputs[0], gt_labels = inputs[1],gt_valid_mask = inputs[2],positive_iou_thresh = target_config['iou_threshold_pos'], negative_iou_thresh = target_config['iou_threshold_neg'],max_pos_per_gt = None,allow_low_qual_matches = target_config['allow_low_quality_matches'],center_in_gt = target_config['center_in_gt'],return_iou = target_config['diagnostics']), 
                             elems = (gt_boxes_xyxy, gt_labels, gt_valid_mask), 
                             fn_output_signature = batched_output) 
    
    localization_targets =  encode_boxes_batch(matched_gt_xyxy = matched_dict['matched_gt_xyxy'],priors_cxcywh = priors_cxcywh, variances = tuple(target_config['variances']))
    
    # Get the values from the dict
    
    classification_targets = matched_dict["matched_gt_labels"]
    pos_mask = matched_dict["pos_mask"]
    neg_mask = matched_dict["neg_mask"]
    ignore_mask = matched_dict["ignore_mask"]
    
    if target_config['diagnostics']:
        diagnostics = {'matched_iou': matched_dict["matched_iou"], 'num_pos': matched_dict["num_pos"], "matched_gt_idx": matched_dict["matched_gt_idx"]}
    else:
        diagnostics = None
        
    return localization_targets, classification_targets, pos_mask, neg_mask, ignore_mask, diagnostics

In [55]:
building_training_targets(config, priors_cxcywh, gt_boxes_batched, gt_labels_batched,valid_mask)

(<tf.Tensor: shape=(2, 6, 4), dtype=float32, numpy=
 array([[[ 0.0000000e+00,  0.0000000e+00,  5.9604639e-07,  5.9604639e-07],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00, -8.9406973e-07, -8.9406973e-07],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00]],
 
        [[ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00],
         [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00,  0.0000000e+00]]],
       dtype=float32)>,
 <tf.Tensor: shape=