# Get image embeddings using SAM

In this example, we will show how to create a Run containing embeddings extracted from SAM for a set of images.

## Constants

In [1]:
from pathlib import Path

PROJECT_NAME = "3LC Tutorials"
MODEL_TYPE = "vit_b"
MODEL_URL = "https://dl.fbaipublicfiles.com/segment_anything/sam_vit_b_01ec64.pth"
CHECKPOINT = "../../transient_data/sam_vit_b_01ec64.pth"
DEVICE = None
EMBEDDING_DIM = 3
REDUCTION_METHOD = "umap"
BATCH_SIZE = 4
INSTALL_DEPENDENCIES = False

## Install dependencies

In [2]:
%%capture
if INSTALL_DEPENDENCIES:
    %pip --quiet install 3lc segment_anything opencv-python
    if REDUCTION_METHOD == "umap":
        %pip --quiet install umap-learn joblib
    elif REDUCTION_METHOD == "pacmap":
        %pip --quiet install pacmap joblib

## Imports

In [3]:
import tlc
import torch
from segment_anything import sam_model_registry
from segment_anything.utils.transforms import ResizeLongestSide
import cv2

## Infer which device to use

In [4]:
if DEVICE is None:
    if torch.cuda.is_available():
        device = "cuda:0"
    elif torch.backends.mps.is_available():
        device = "mps"
    else:
        device = "cpu"
else:
    device = DEVICE

## Download model weights

In [5]:
if not Path(CHECKPOINT).exists():
    torch.hub.download_url_to_file(MODEL_URL, CHECKPOINT)

## Set up model and preprocessing

In [6]:
def create_model():
    sam_model = sam_model_registry[MODEL_TYPE](checkpoint=CHECKPOINT)
    sam_model.to(device)
    sam_model.eval()
    return sam_model

In [7]:
sam_model = create_model()
RESIZE_TRANSFORM = ResizeLongestSide(sam_model.image_encoder.img_size)
PREPROCESS_TRANSFORM = sam_model.preprocess

def transform_to_sam_format(sample):
    image = cv2.cvtColor(cv2.imread(sample["image"]), cv2.COLOR_BGR2RGB)
    image = RESIZE_TRANSFORM.apply_image(image)
    image = torch.as_tensor(image, device=device).permute(2, 0, 1).contiguous()
    image = PREPROCESS_TRANSFORM(image)

    return {"image": image}

  state_dict = torch.load(f)


## Create 3LC Table and Run

In [8]:
# Reuse the COCO128 table from ../1-create-tables/create-table-from-coco and apply the transformation defined above
table = tlc.Table.from_names("initial", "COCO128", PROJECT_NAME).map(transform_to_sam_format)

# Initialize a 3LC Run
run = tlc.init(
    project_name=PROJECT_NAME,
    run_name="Collect embeddings",
    description="Collect embeddings for the COCO128 dataset using the SAM model",
)

[90m3lc: [0mCreated new run at C:/Users/gudbrand/AppData/Local/3LC/3LC/projects/3LC Tutorials/runs/Collect embeddings


## Collect embeddings using SAM

In [9]:

embeddings_metrics_collector = tlc.EmbeddingsMetricsCollector(layers=[0])

predictor = tlc.Predictor(
    sam_model.image_encoder,
    layers=[0],
    unpack_dicts=True,
    device=device,
)

tlc.collect_metrics(
    table,
    embeddings_metrics_collector,
    predictor,
)

Output()

## Reduce dimensionality of embeddings

In [10]:
run.reduce_embeddings_by_foreign_table_url(
    table.url,
    method=REDUCTION_METHOD,
    n_components=EMBEDDING_DIM
)

[90m3lc: [0mFitting UMAP model on column 'embeddings_0' for table ../metrics_0000


Epochs completed:   0%|            0/500 [00:00]

[90m3lc: [0mTransforming column 'embeddings_0' in table ../metrics_0000 with UMAP model ./reduced_0000.pkl


{Url('C:/Users/gudbrand/AppData/Local/3LC/3LC/projects/3LC Tutorials/runs/Collect embeddings/metrics_0000'): Url('C:/Users/gudbrand/AppData/Local/3LC/3LC/projects/3LC Tutorials/runs/Collect embeddings/reduced_0000')}