In [48]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the ResNet backbone (using ResNet50 as an example)
def resnet_backbone(input_shape, trainable=True):
    base_model = tf.keras.applications.ResNet50(
        input_shape=input_shape,
        include_top=False,
        # weights='imagenet',
    )
    base_model.trainable = trainable  # Freeze the layers for transfer learning
    return base_model

# Define SSD detection head
def ssd_head(num_classes, input_shape):
    # Layers to predict bounding boxes and class probabilities
    def ssd_block(x, filters, name):
        x = layers.Conv2D(filters, (3, 3), padding='same', name=name + '_conv')(x)
        x = layers.BatchNormalization(name=name + '_bn')(x)
        x = layers.ReLU(name=name + '_relu')(x)
        return x

    input_layer = layers.Input(shape=input_shape, name="input")
    backbone = resnet_backbone(input_shape)

    # Feature map from different layers of ResNet
    x = backbone(input_layer)
    x = ssd_block(x, 512, 'ssd_block1')
    x = ssd_block(x, 256, 'ssd_block2')

    # Bounding box predictions (location)
    locs = layers.Conv2D(4 * 4, (3, 3), padding='same', activation='linear', name="loc_conv")(x)  # 4 coordinates per box
    locs = layers.Reshape((-1, 4), name="loc_reshape")(locs)

    # Class predictions
    confs = layers.Conv2D(num_classes * 4, (3, 3), padding='same', activation='softmax', name="conf_conv")(x)  # num_classes per box
    confs = layers.Reshape((-1, num_classes), name="conf_reshape")(confs)

    # Concatenate location and confidence predictions
    output = layers.Concatenate(axis=-1, name="detection_output")([locs, confs])

    # model = models.Model(inputs=input_layer, outputs=(backbone.output, output))
    model = models.Model(inputs=input_layer, outputs=output)
    # model = models.Model(inputs=input_layer, outputs=(locs, confs))
    return model

# Define the SSD model
def ssd_resnet(num_classes, input_shape):
    # input_shape = (300, 300, 3)  # Input image size
    model = ssd_head(num_classes, input_shape)
    return model

# Example usage
num_classes = 3
ssd_model = ssd_resnet(num_classes, input_shape=(300, 300, 3))

# Compile the model with a loss for object detection
ssd_model.compile(optimizer='adam', loss='categorical_crossentropy')
ssd_model.summary()

In [40]:
print(type(ssd_model.output))
print(ssd_model.output)


<class 'keras.src.backend.common.keras_tensor.KerasTensor'>
<KerasTensor shape=(None, 400, 7), dtype=float32, sparse=False, name=keras_tensor_2787>


In [None]:
import tensorflow as tf
import numpy as np

# Assume model output is of shape (batch_size, num_boxes, 4 + num_classes)
output = ssd_model.output  # This is the output of the SSD model

# Separate the location and class prediction components
# Location predictions (bounding boxes)
locs = output[:, :, :4]  # First 4 values per box are for location coordinates

# Class predictions (class scores)
confs = output[:, :, 4:]  # Remaining values are for class confidence scores

# Apply softmax to class scores to get probabilities
confs_softmax = tf.nn.softmax(confs, axis=-1)  # (batch_size, num_boxes, num_classes)

# Get the predicted class for each box (highest probability class)
predicted_classes = tf.argmax(confs_softmax, axis=-1)  # (batch_size, num_boxes)

# Get the confidence of the predicted class
predicted_confidences = tf.reduce_max(confs_softmax, axis=-1)  # (batch_size, num_boxes)

# Example on how to use this during inference
# Assuming batch_size = 1 for simplicity during inference
def extract_classes_and_boxes(model_output):
    locs = model_output[:, :, :4]
    confs = model_output[:, :, 4:]
    
    confs_softmax = tf.nn.softmax(confs, axis=-1)
    predicted_classes = tf.argmax(confs_softmax, axis=-1)
    predicted_confidences = tf.reduce_max(confs_softmax, axis=-1)

    return predicted_classes, predicted_confidences, locs

# Example output from model (for one image, with 10 boxes and 21 classes)
dummy_output = np.random.rand(1, 10, 4 + num_classes)  # Fake data for illustration
pred_classes, pred_confidences, pred_boxes = extract_classes_and_boxes(dummy_output)

print("Predicted Classes:", pred_classes)
print("Predicted Confidences:", pred_confidences)
print("Predicted Boxes:", pred_boxes)


In [50]:
# Assuming you have already compiled and trained the model
# Perform inference to get raw outputs (as NumPy arrays)
# predictions = np.random.rand(1, 10, 4 + num_classes)  # Fake data for illustration

input_data = np.random.rand(1, 300, 300, 3).astype(np.float32)
predictions = ssd_model.predict(input_data)

# # Extract the bounding boxes and class confidences from the output
locs = predictions[:, :, :4]  # Location predictions (bounding boxes)
confs = predictions[:, :, 4:]  # Class predictions (class scores)

# locs = predictions[0]
# confs = predictions[1]

# Now apply softmax using TensorFlow (or NumPy) on these NumPy arrays
import numpy as np

# Apply softmax to class predictions to get probabilities
confs_softmax = tf.nn.softmax(confs, axis=-1).numpy()  # Convert to NumPy if necessary

# Get the predicted class for each box (highest probability class)
predicted_classes = np.argmax(confs_softmax, axis=-1)

# Get the confidence of the predicted class
predicted_confidences = np.max(confs_softmax, axis=-1)

# Continue with further processing (e.g., NMS)...

print("Predicted Classes:", predicted_classes)
print("Predicted Confidences:", predicted_confidences)
print("Predicted Boxes:", locs)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 57ms/step
Predicted Classes: [[1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 0 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2
  2 1 0 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2 1 1 2 2
  1 0 2 2 2 0 2 2 1 1 2 2 1 0 2 2 1 0 2 2 1 0 2 0 1 0 2 0 1 1 2 2 1 1 2 2
  1 1 2 2 1 0 2 2 1 2 2 2 1 0 2 2 1 0 2 2 1 0 2 2 1 0 2 2 1 0 2 2 1 1 2 2
  1 1 2 2 1 1 2 2 1 0 2 2 2 2 2 2 1 0 2 2 1 0 2 2 1 0 2 2 1 0 2 2 1 2 2 2
  1 1 2 2 1 1 2 2 1 1 2 2 1 0 2 2 2 2 2 2 1 0 2 2 2 0 2 2 2 0 2 2 2 0 2 2
  1 2 2 2 1 2 2 2 1 0 2 2 1 1 2 2 1 0 2 0 1 2 2 0 1 0 2 2 2 0 2 2 2 1 2 2
  1 2 2 2 1 2 2 2 1 2 2 2 1 1 2 2 1 2 2 2 1 0 2 2 1 2 2 0 2 1 2 2 2 1 2 2
  2 2 2 2 1 2 2 2 1 2 2 2 1 2 2 2 1 0 2 2 2 2 2 2 1 0 2 2 2 2 2 2 1 0 2 2
  1 0 2 2 1 0 2 2 1 2 2 2 1 0 2 2 1 1 2 2 1 0 2 2 1 2 2 2 1 2 2 0 1 2 2 0
  1 0 2 0 1 0 2 0 1 0 2 0 1 0 2 0 1 0 2 1 1 0 2 1 1 0 2 0 1 0 2 0 2 2 2 1
  2 0 2 0]]
Predicted Confidences: [[0.3401969  0.3366033  0.337292   0.34583762 0.3424758  0.3

In [51]:
from src.models.resnet_ssd_v0 import ssd_resnet as ssd_resnet_v0

ssd_model_v0 = ssd_resnet_v0(num_classes, input_shape=(300, 300, 3))