In [1]:
import os
os.environ['TF_GPU_ALLOCATOR'] = 'cuda_malloc_async'

In [2]:
import tensorflow as tf

2025-11-29 19:44:25.621227: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-11-29 19:44:25.639825: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-11-29 19:44:25.645081: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-11-29 19:44:25.659636: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
from mobilenetv2ssd.core.config import load_config

In [4]:
from typing import Any

In [5]:
from mobilenetv2ssd.models.ssd.ops.loss_ops_tf import softmax_cross_entropy_loss

## Config Files To Test Orchestration

In [6]:
main_cfg_path = "configs/train/default.yaml"
model_cfg_path = "configs/model/mobilenetv2_ssd_voc.yaml"
data_cfg_path = "configs/data/voc_224.yaml"
eval_cfg_path = "configs/eval/default.yaml"

In [7]:
config = load_config(main_cfg_path,model_cfg_path,data_cfg_path,eval_cfg_path)

In [8]:
config

{'experiment': {'name': 'voc_mnv2_baseline',
  'description': 'MobileNetV2-SSD baseline on PASCAL VOC 224x224',
  'tags': ['ssd', 'mobilenetv2', 'voc', 'baseline']},
 'include': {'model_cfg': 'configs/model/mobilenetv2_ssd_voc.yaml',
  'data_cfg': 'configs/data/voc_224.yaml',
  'eval_cfg': 'configs/eval/default.yaml'},
 'train': {'seed': 1337,
  'device': 'auto',
  'epochs': 50,
  'batch_size': 3,
  'grad_accum_steps': 1,
  'optimizer': {'name': 'sgd',
   'lr': 0.001,
   'momentum': 0.9,
   'weight_decay': 0.0005,
   'nesterov': True},
  'scheduler': {'name': 'cosine', 'warmup': {'epochs': 5, 'factor': 0.1}},
  'loss': {'cls_loss_type': 'softmax_ce',
   'from_logits': True,
   'ignore_index': -1,
   'label_smoothing': 0.0,
   'use_sigmoid': False,
   'class_weights': None,
   'reg_loss_type': 'smooth_l1',
   'smooth_l1_beta': 1.0,
   'bbox_norm': 'none',
   'alpha': 1.0,
   'gamma': 2.0,
   'cls_weight': 1.0,
   'reg_weight': 1.0,
   'reduction': 'sum',
   'normalization': {'type': 'nu

## Helper Functions For Orchestration

In [9]:
def _extract_information_from_train_config(config : dict[str, Any]):
    train_config = config['train']
    loss_config = train_config.get("loss",{})
    loss_config = {
        "cls_loss_type": loss_config.get("cls_loss_type","ce_softmax"),
        "from_logits": loss_config.get("from_logits",False),
        "ignore_index": loss_config.get("ignore_index",-1),
        "use_sigmoid": loss_config.get("use_sigmoid",False),
    }

    return loss_config

In [10]:
_extract_information_from_train_config(config)

{'cls_loss_type': 'softmax_ce',
 'from_logits': True,
 'ignore_index': -1,
 'use_sigmoid': False}

## Orchestration Function

In [11]:
def build_conf_loss(config: dict[str,Any], predicted_logits: tf.Tensor, classification_targets: tf.Tensor, pos_mask: tf.Tensor, neg_mask: tf.Tensor, ignore_mask: tf.Tensor):
    # This is the orchestrator to calculate the confidence loss between the priors and the matched boxes
    # Steps:
    # 1. Get the config for the loss
    # 2. Calculate the valid mask (Safeguard, my model should remove it already)
    # 3. Calculate the classification loss per anchor
    # 4. Apply label smoothing (Optional Implementation)
    # 5. Multiply Class wieights (Optional Implementation)
    
    loss_config = _extract_information_from_train_config(config)

    candidate_negative_mask = tf.logical_and(neg_mask,tf.logical_not(ignore_mask))
    ignored_labels = tf.zeros_like(classification_targets)

    valid_labels = tf.where(tf.logical_not(ignore_mask),classification_targets,ignored_labels)

    if loss_config['cls_loss_type'] == 'softmax_ce':
        per_class_loss = tf.map_fn(lambda inputs: softmax_cross_entropy_loss(inputs[0],inputs[1],reduction="none"),
                 elems = (predicted_logits, valid_labels),
                 fn_output_signature = tf.TensorSpec(shape=(None,), dtype=tf.float32)
                 )


    # Handle NaN
    nan = tf.constant(float("nan"), dtype = per_class_loss.dtype)

    conf_loss = tf.where(tf.logical_not(ignore_mask), per_class_loss, tf.fill(tf.shape(per_class_loss), nan))


    return conf_loss, candidate_negative_mask

In [12]:
pred_logits_b = tf.constant(
    [
        # -------- image 0 (same as above) --------
        [[ 2.0, -1.0,  0.5,  0.0],   # 0
         [ 0.1,  2.0, -0.5, -1.0],   # 1 (pos class 1)
         [ 1.5, -0.2,  0.0, -0.3],   # 2
         [-0.5,  0.2,  2.5,  0.1],   # 3 (pos class 2)
         [ 0.0,  0.0,  0.0,  0.0],   # 4 ignored
         [ 2.5,  0.1, -0.2,  0.3]],  # 5

        # -------- image 1 (different distribution) --------
        [[ 0.3,  1.8, -0.5,  0.0],   # 0 (pos class 1)
         [ 2.2, -0.1,  0.1, -0.8],   # 1 background
         [ 0.0,  0.0,  0.0,  0.0],   # 2 ignored
         [ 0.1, -0.2,  2.0,  0.3],   # 3 (pos class 2)
         [ 1.2,  0.4, -0.3, -0.5],   # 4 background
         [ 0.5,  0.7,  0.2, -0.1]]   # 5 background
    ],
    dtype=tf.float32
)

cls_targets_b = tf.constant(
    [
        # image 0
        [0, 1, 0, 2, -1, 0],
        # image 1
        [1, 0, -1, 2, 0, 0],
    ],
    dtype=tf.int32
)


pos_mask_b = tf.constant(
    [
        # image 0: priors 1, 3
        [False, True,  False, True,  False, False],
        # image 1: priors 0, 3
        [True,  False, False, True,  False, False],
    ],
    dtype=tf.bool
)

neg_mask_b = tf.constant(
    [
        # image 0: raw negatives at 0,2,5
        [True,  False, True,  False, False, True],
        # image 1: raw negatives at 1,4,5 (2 is ignored)
        [False, True,  False, False, True,  True],
    ],
    dtype=tf.bool
)

ignore_mask_b = tf.constant(
    [
        # image 0: prior 4 ignored
        [False, False, False, False, True,  False],
        # image 1: prior 2 ignored
        [False, False, True,  False, False, False],
    ],
    dtype=tf.bool
)

I0000 00:00:1764463468.090479   84456 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1764463468.189310   84456 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1764463468.189506   84456 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1764463468.193628   84456 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1764463468.193785   84456 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

In [13]:
build_conf_loss(config,pred_logits_b,cls_targets_b,pos_mask_b,neg_mask_b,ignore_mask_b)

(<tf.Tensor: shape=(2, 6), dtype=float32, numpy=
 array([[0.34234956, 0.24798493, 0.45178396, 0.21572715,        nan,
         0.23801371],
        [0.3978952 , 0.24098527,        nan, 0.36676258, 0.6179616 ,
         1.2559102 ]], dtype=float32)>,
 <tf.Tensor: shape=(2, 6), dtype=bool, numpy=
 array([[ True, False,  True, False, False,  True],
        [False,  True, False, False,  True,  True]])>)