## Embeddings

In [28]:
from arcgis.learn import Embeddings 

### Embeddings for images

In [2]:
emb_image = Embeddings('image')

Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to C:\Users\Neh13173/.cache\torch\hub\checkpoints\resnet34-b627a593.pth
100%|█████████████████████████████████████████████████████████████████████████████| 83.3M/83.3M [00:04<00:00, 19.7MB/s]


#### Get supported backbones

In [3]:
emb_image.supported_backbones('image')

['resnet18',
 'resnet34',
 'resnet50',
 'resnet101',
 'resnet152',
 'densenet121',
 'densenet169',
 'densenet161',
 'densenet201',
 'vgg11',
 'vgg11_bn',
 'vgg13',
 'vgg13_bn',
 'vgg16',
 'vgg16_bn',
 'vgg19',
 'vgg19_bn',
 'mobilenet_v2']

#### Get embeddings

In [10]:
ret_emb = emb_image.get(r"D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial", return_embeddings= True, show_progress=True) #folder path containing images
ret_emb

array([[0.044039, 0.014694, 0.016457, 0.0682  , ..., 0.024395, 0.019469, 0.008133, 0.013546]])

#### Get normalized embeddings

In [12]:
ret_emb = emb_image.get(r"D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial", return_embeddings= True, normalize=True) #folder path containing images
ret_emb

array([[0.044039, 0.014694, 0.016457, 0.0682  , ..., 0.024395, 0.019469, 0.008133, 0.013546]])

#### Save embeddings as .h5 file (embeddings folder should be present at the notebook location)

In [13]:
emb_image.get(r"D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial", return_embeddings= False)

'D:\\Work\\Certification220\\arcgis-python-api\\samples\\04_gis_analysts_data_scientists\\certification\\embeddings\\embeddings_2025-04-04_13-44-30.h5'

#### Load saved embeddings

In [14]:
emb_image_load = Embeddings('image')
load = emb_image_load.load(r'D:\\Work\\Certification220\\arcgis-python-api\\samples\\04_gis_analysts_data_scientists\\certification\\embeddings\\embeddings_2025-04-04_13-44-30.h5')
load

(array([[0.044039, 0.014694, 0.016457, 0.0682  , ..., 0.024395, 0.019469, 0.008133, 0.013546]]),
 array([b'D:\\Work\\Certification220\\arcgis-python-api\\samples\\04_gis_analysts_data_scientists\\certification\\trial\\6135838727f4170005130771.tif'],
       dtype=object))

In [19]:
emb_image.visualize(r'D:\\Work\\Certification220\\arcgis-python-api\\samples\\04_gis_analysts_data_scientists\\certification\\embeddings\\embeddings_2025-04-04_13-44-30.h5',
                    visualize_with_items=True,
                    n_clusters = 5,
                    dimensions=2
                    )

ValueError: n_components=2 must be between 0 and min(n_samples, n_features)=1 with svd_solver='full'

### Embeddings in Text

In [18]:
Embeddings.supported_backbones('text')

['sentence-transformers/distilbert-base-nli-stsb-mean-tokens',
 'sentence-transformers/bert-base-nli-max-tokens',
 'sentence-transformers/bert-base-nli-cls-token',
 'See all `TextEmbedding` models at https://huggingface.co/sentence-transformers']

In [7]:
emb_text = Embeddings('text', backbone = 'sentence-transformers/distilbert-base-nli-stsb-mean-tokens')

config.json:   0%|          | 0.00/555 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/505 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [8]:
emb_text = Embeddings('text', backbone = 'sentence-transformers/bert-base-nli-max-tokens')

config.json:   0%|          | 0.00/624 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/397 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

In [9]:
emb_text = Embeddings('text', backbone = 'sentence-transformers/bert-base-nli-cls-token')

config.json:   0%|          | 0.00/623 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/395 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.00 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

### Get text embeddings with text file

In [20]:
ret_emb_text = emb_text.get(r'D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial', return_embeddings= True)

In [21]:
ret_emb_text

array([[-0.017518,  0.011196,  0.031311, -0.022087, ...,  0.00259 ,  0.008869, -0.003223,  0.017029]])

### Get text embeddings with csv file

In [25]:
ret_emb_csv = emb_text.get(r'D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial\csv', return_embeddings= True)
ret_emb_csv

array([[-0.030914, -0.021163,  0.033397, -0.008598, ...,  0.013314,  0.00919 ,  0.000365,  0.01363 ],
       [ 0.006984, -0.003774,  0.054507,  0.002138, ...,  0.00448 , -0.005641,  0.010384, -0.005792],
       [ 0.006984, -0.003774,  0.054507,  0.002138, ...,  0.00448 , -0.005641,  0.010384, -0.005792],
       [ 0.007554, -0.013317,  0.013438, -0.009092, ..., -0.010879,  0.0103  , -0.025905, -0.022846],
       ...,
       [ 0.006984, -0.003774,  0.054507,  0.002138, ...,  0.00448 , -0.005641,  0.010384, -0.005792],
       [ 0.006984, -0.003774,  0.054507,  0.002138, ...,  0.00448 , -0.005641,  0.010384, -0.005792],
       [-0.016861, -0.002968,  0.025342,  0.018305, ...,  0.004125, -0.014327,  0.00789 ,  0.010198],
       [-0.016861, -0.002968,  0.025342,  0.018305, ...,  0.004125, -0.014327,  0.00789 ,  0.010198]])

### Get text embeddings with json file

In [27]:
ret_emb_json = emb_text.get(r'D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial\json', return_embeddings= True)
ret_emb_json

JSONDecodeError: Expecting property name enclosed in double quotes: line 2 column 1 (char 2)

### Save embeddings as .h5 file (embeddings folder should be present at the notebook location)

In [22]:
emb_text.get(r'D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial', return_embeddings = False)

NameError: name 'img_list' is not defined

In [26]:
emb_text.get(r'D:\Work\Certification220\arcgis-python-api\samples\04_gis_analysts_data_scientists\certification\trial\csv', return_embeddings= False)

NameError: name 'img_list' is not defined