# Add Embeddings to an Existing Table

In this example we will extend an existing table with embeddings computed from a pre-trained model.

- Write an initial table containing a single column of image URLs.
- Write a new table containing the input URLs and the embeddings computed from a pre-trained model.
- Apply dimensionality reduction to the extended table to get a final table containing the URLs, the embeddings, and the reduced embeddings.

In [1]:
from transformers import ViTImageProcessor, ViTModel
import torch
from torchvision import transforms
from PIL import Image
from pathlib import Path
import os
import tlc
import tqdm

In [2]:
batch_size = 4

## Write the initial table

We write a simple table containing a single column of image URLs from our COCO-128 dataset.

In [3]:
data_path = Path("../data/coco128/images").absolute().as_posix()
project_name = "add-image-metrics"

table_writer = tlc.TableWriter(
    table_name="coco128",
    dataset_name="coco128",
    project_name=project_name,
    description="COCO128 dataset",
    column_schemas={
        "image": tlc.ImagePath
    }
)

image_paths = []

for image_name in os.listdir(data_path):
    image_path = os.path.join(data_path, image_name)
    image_paths.append(image_path)
    table_writer.add_row({"image": image_path})

table = table_writer.finalize()

## Extend the table with embeddings from a pre-trained model

In [4]:
# Load the model and feature extractor
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

model_name = "google/vit-base-patch16-224"
image_processor = ViTImageProcessor.from_pretrained(model_name)
model = ViTModel.from_pretrained(model_name).to(device)

# Define the transformation pipeline
preprocess = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=image_processor.image_mean, std=image_processor.image_std),
])

# The warning about 'vit.pooler.dense.bias' and 'vit.pooler.dense.weight' being newly initialized
# is not problematic for this use case because we are only extracting embeddings from the last
# hidden state and do not rely on the pooler layer.

Some weights of ViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit.pooler.dense.bias', 'vit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
extended_table_writer = tlc.TableWriter(
    table_name="added-embeddings",
    dataset_name="coco128",
    project_name="add-embeddings",
    description="COCO128 dataset with added embeddings",
    column_schemas={
        "image": tlc.ImagePath,
        "embedding": tlc.Schema(
            value=tlc.Float32Value(number_role=tlc.NUMBER_ROLE_NN_EMBEDDING),
            size0=tlc.DimensionNumericValue(768),
            sample_type="hidden",
            writable=False,
        ),
    },
)

table.map(lambda row: preprocess(Image.open(row["image"]).convert("RGB")))

dataloader = torch.utils.data.DataLoader(
    table,
    batch_size=batch_size,
    num_workers=0,
    shuffle=False,
)

num_batches = len(dataloader)


def batched_iterator(list_, batch_size, num_batches):
    return (list_[i * batch_size : (i + 1) * batch_size] for i in range(num_batches))


for batch, filenames_batch in tqdm.tqdm(
    zip(
        dataloader,
        batched_iterator(image_paths, batch_size, num_batches),
    ),
    total=num_batches,
    desc="Running inference on batches",
):
    with torch.no_grad():
        outputs = model(batch.to(device))
        embeddings = outputs.last_hidden_state[:, 0, :].squeeze().numpy()

    extended_table_writer.add_batch(
        {
            "image": filenames_batch,
            "embedding": embeddings.tolist(),
        }
    )

extended_table = extended_table_writer.finalize()

print(extended_table[0])

Running inference on batches: 100%|██████████| 32/32 [00:30<00:00,  1.04it/s]

{'image': 'C:/Project/notebook-examples/data/coco128/images/000000000009.jpg'}





In [6]:
urls = tlc.reduce_embeddings(
    [extended_table],
    method="umap",
    n_components=2,
    metric="euclidean",
    retain_source_embedding_column=True,
)
reduced_table_url = urls[extended_table.url]

[90m3lc: [0mFitting UMAP model on column 'embedding' for table ../added-embeddings


Epochs completed:   0%|            0/500 [00:00]

[90m3lc: [0mTransforming column 'embedding' in table ../added-embeddings with UMAP model ./added-embeddings_reduced_embedding_00.pkl


In [7]:
reduced_table = tlc.Table.from_url(reduced_table_url)
print(reduced_table[0])

{'image': 'C:/Project/notebook-examples/data/coco128/images/000000000009.jpg', 'embedding_umap': [3.49639892578125, 11.783905982971191]}


In [8]:
# DELETE ME!

def flatten_function(layer_output):
    return layer_output[:, 0, :]

collector = tlc.EmbeddingsMetricsCollector(layers=[223], reshape_strategy={223: flatten_function})
predictor = tlc.Predictor(model, layers=[223])

for batch in dataloader:
    model_output = predictor(batch)
    metrics = collector(batch, model_output)

    # table_writer.add_batch(metrics, ...)
    break

print(metrics)


{'embeddings_223': tensor([[ 0.6929, -0.1778,  1.4126,  ..., -0.2675, -0.2489,  0.6796],
        [-0.0734, -0.2110,  0.3797,  ...,  0.5803,  0.7378,  0.1924],
        [-1.5251, -1.0982,  0.9778,  ...,  1.3487, -0.2712, -0.5566],
        [-0.3959, -0.5170,  0.3551,  ..., -0.4861, -0.4202, -0.7321]])}
