# Embedding Generation
This example performs generation of a single embedding vector from a 16x16 Sentinel-2 RGB image patch with TerraMind.

In [37]:
from pathlib import Path

import torch
import rioxarray as rxr
from huggingface_hub import hf_hub_download
from terratorch.registry import BACKBONE_REGISTRY

# Select device
if torch.cuda.is_available():
    device = 'cuda'
elif torch.backends.mps.is_available():
    device = 'mps'
else:
    device = 'cpu'

In [38]:
# build model
model = BACKBONE_REGISTRY.build(
    "terramind_v1_base",
    # modalities=["S2L2A"], # all S2 bands
    modalities=["S2RGB"], # S2 RGB bands
    pretrained=True,
)

model = model.to(device)

In [39]:
model.eval()

TerraMindViT(
  (encoder_embeddings): ModuleDict(
    (untok_sen2rgb@224): ImageEncoderEmbedding(
      (proj): Linear(in_features=768, out_features=768, bias=False)
    )
  )
  (encoder): ModuleList(
    (0-11): 12 x Block(
      (norm1): LayerNorm()
      (attn): Attention(
        (qkv): Linear(in_features=768, out_features=2304, bias=False)
        (attn_drop): Dropout(p=0.0, inplace=False)
        (proj): Linear(in_features=768, out_features=768, bias=False)
        (proj_drop): Dropout(p=0.0, inplace=False)
      )
      (drop_path): Identity()
      (norm2): LayerNorm()
      (mlp): GatedMlp(
        (fc1): Linear(in_features=768, out_features=2048, bias=False)
        (act): SiLU()
        (fc2): Linear(in_features=2048, out_features=768, bias=False)
        (fc3): Linear(in_features=768, out_features=2048, bias=False)
      )
    )
  )
  (encoder_norm): LayerNorm()
  (tokenizer): ModuleDict()
)

In [48]:
# Download image if necessary
image_path = Path('examples/S2L2A/Santiago.tif')
if not image_path.exists():
    hf_hub_download(repo_id='ibm-esa-geospatial/Examples', filename='S2L2A/Santiago.tif', repo_type='dataset', local_dir='examples/')

In [53]:
# Open image
data = rxr.open_rasterio(image_path).values
# Select RGB image of dimension 16x16
data = data[[4,3,2], 0:16, 0:16]
data.shape

(3, 16, 16)

In [55]:
#create torch input
input = torch.tensor(data, dtype=torch.float, device=device).unsqueeze(0)
input.shape

torch.Size([1, 3, 16, 16])

## Retrieve Embedding

In [57]:
# we evaluate the pretrained model on the embedding and retrieve the last layer
embedding = model(input)[-1]

In [58]:
# print output embedding
embedding.shape

torch.Size([1, 1, 768])