# Per Bounding Box Embeddings Example

This notebook demonstrates how to extract embeddings for bounding boxes in `tlc` Tables using a pretrained EfficientNet model. The generated embeddings are then reduced in dimensionality and stored as metrics in a run.

Since the example uses a classification model, we can also extract class probabilities for each bounding box. The predicted labels are also stored as metrics in the run.

In [None]:
PROJECT_NAME = "Bounding Box Embeddings"
DATASET_NAME = "Balloons"
INSTALL_DEPENDENCIES = False
TRANSIENT_DATA_PATH = "./transient_data"
TEST_DATA_PATH = "../tests/test_data/data"
TLC_PUBLIC_EXAMPLES_DEVELOPER_MODE = True

In [None]:
%%capture
if INSTALL_DEPENDENCIES:
    %pip --quiet install torch --index-url https://download.pytorch.org/whl/cu118
    %pip --quiet install torchvision --index-url https://download.pytorch.org/whl/cu118
    %pip --quiet install timm
    %pip --quiet install tlc[umap]

In [None]:
from __future__ import annotations

from io import BytesIO

import tqdm
from PIL import Image
import numpy as np
import torch
from torchvision import transforms

import tlc

## Set Up Input Table

We will use a `TableFromCoco` to load the "Balloons" dataset from a annotations file and a folder of images.

In [None]:
table_url = tlc.Table.default_write_location() / f"{PROJECT_NAME}/table_from_coco.json"

annotations_file = tlc.Url(TEST_DATA_PATH + "/balloons/train/train-annotations.json").to_absolute()
images_dir = tlc.Url(TEST_DATA_PATH + "/balloons/train").to_absolute()

input_table = tlc.TableFromCoco(
    url=table_url,
    dataset_name=DATASET_NAME,
    project_name=PROJECT_NAME,
    input_url=annotations_file.to_relative(),
    image_folder_url=images_dir.to_relative(),
    row_cache_url="../table_from_coco.parquet",
)

print(input_table.url)

In [None]:
run = tlc.init(project_name=PROJECT_NAME)
run.add_input_table(input_table)

In [None]:
# Get the schema of the bounding box column of the input table
import json
bb_schema = input_table.schema.values["rows"].values["bbs"].values["bb_list"]
label_map = input_table.get_value_map_for_column("bbs")
print(f"Input table uses {len(label_map)} unique label(s): {json.dumps(label_map, indent=2)}")

## Initialize the Model

Now we load the EfficientNet model. If a pretrained model is available locally, it will be loaded. Otherwise, we'll download a pretrained version.

In [None]:
import timm

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Initialize a pretrained classifier model
try:
    model = timm.create_model("efficientnet_b0", num_classes=2, checkpoint_path=TRANSIENT_DATA_PATH + "/bb_classifier.pth").to(device)
    print("Loaded pretrained model")
except:
    print("Downloading pretrained model")
    model = timm.create_model("efficientnet_b0", num_classes=len(label_map), pretrained=True).to(device)

model.eval();


In [None]:
# The hidden layer we will use as embeddings
hidden_layer = model.global_pool.flatten

## Collecting Bounding Box Embeddings

In this section, we'll walk through the process of extracting embeddings for each bounding box present in our input images.


In [None]:
# Image Preprocessing
preprocess = transforms.Compose(
    [
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ]
)

# Initialize empty lists to store all embeddings and predicted labels
all_embeddings: list[np.ndarray] = []
all_labels: list[int] = []
all_hidden_outputs: list[np.ndarray] = []

# Register a hook to pick up the hidden layer output
output_list: list[torch.Tensor] = []

def hook_fn(module, input, output):
    """Store the output of the hooked layer."""
    output_list.append(output)

hook_handle = hidden_layer.register_forward_hook(hook_fn)

# Batched inference setup
batch_size = 4
mini_batch: list[torch.Tensor] = []
batch_to_image_map: list[int] = []

def run_inference_on_batch(mini_batch: list[torch.Tensor]) -> None:
    mini_batch_tensor = torch.stack(mini_batch).to(device)
    with torch.no_grad():
        mini_batch_embeddings = model(mini_batch_tensor)

    # Collect and clear the hook outputs
    mini_batch_hidden = output_list.pop().cpu().numpy()
    all_hidden_outputs.extend(mini_batch_hidden)
    
    all_embeddings.extend(mini_batch_embeddings.cpu().numpy())
    mini_batch_labels = torch.argmax(mini_batch_embeddings, dim=1)
    all_labels.extend(mini_batch_labels.cpu().numpy())

for row_idx, row in tqdm.tqdm(enumerate(input_table.table_rows), total=len(input_table), desc="Running inference on table"):
    image_bbs = row["bbs"]["bb_list"]
    if len(image_bbs) == 0:
        continue
    image_filename = row["image"]
    image_bytes = tlc.Url(image_filename).read()
    image = Image.open(BytesIO(image_bytes))
    w, h = image.size

    for bb in image_bbs:
        bb_crop = tlc.BBCropInterface.crop(image, bb, bb_schema, h, w)
        bb_crop_tensor = preprocess(bb_crop)
        
        # Check if adding this bb_crop_tensor will overfill the mini_batch
        if len(mini_batch) >= batch_size:
            run_inference_on_batch(mini_batch)
            mini_batch.clear()
        
        mini_batch.append(bb_crop_tensor)
        batch_to_image_map.append(row_idx)

# Run inference on remaining items in mini_batch if it's not empty
if len(mini_batch) > 0:
    run_inference_on_batch(mini_batch)

# Remove the hook
hook_handle.remove()

## Dimensionality Reduction

Once the embeddings are collected, the next step is to reduce their dimensionality for easier analysis.

In [None]:
import umap

all_embeddings_np = np.vstack(all_hidden_outputs)
print(f"UMAP input shape: {all_embeddings_np.shape}")

# Fit UMAP
reducer = umap.UMAP(n_components=3)
embedding_3d = reducer.fit_transform(all_embeddings_np)

## Store Metrics in a Run

Finally, we add these reduced embeddings and predicted labels to our current run for further analysis in the Dashboard.

In [None]:
# Repack embeddings and labels into groups per image
grouped_embeddings: list[list[np.ndarray]] = [[] for _ in range(len(input_table))]
grouped_labels: list[list[int]] = [[] for _ in range(len(input_table))]

for img_idx, embed, label in zip(batch_to_image_map, embedding_3d, all_labels):
    grouped_labels[img_idx].append(label)
    grouped_embeddings[img_idx].append(embed)

In [None]:

# Create a schema for the embeddings
float_list_list_schema = tlc.Schema(
    value=tlc.Float32Value(),
    size0=tlc.DimensionNumericValue(value_min=3, value_max=3),     # 3D embedding
    size1=tlc.DimensionNumericValue(value_min=0, value_max=1000),  # Max 1000 bbs per image
)

# Create a schema with a label map for the labels
label_value_map = {
    **{float(k): tlc.MapElement(v) for k, v in label_map.items()},
    **{len(label_map): tlc.MapElement("background")},
}

label_schema = tlc.Schema(
    value=tlc.Int32Value(value_map=label_value_map),
    size0 = tlc.DimensionNumericValue(value_min=0, value_max=1000),
)

data = {
    "per_bb_embeddings": grouped_embeddings,
    "per_bb_labels": grouped_labels,
}

schemas = {
    "per_bb_embeddings": float_list_list_schema,
    "per_bb_labels": label_schema,
}

# Add the computed per-bb embeddings and labels as metrics to the run
run.add_metrics_data(
    data,
    override_column_schemas=schemas,
    input_table_url=input_table.url,
)