In [1]:
## Since the GeoSquare V2.0 is not open source to the public yet (2024-06-30), here, we use a demo to 
## simulate the storage and retrieval, and other interactive behavior for webservices in this application case.

# Specifically, we utilize the toolboxes function of ArcGIS, which has corresponding ArcPy interfaces.
# All WebServices are stored in the file 'WebService.csv'.
# We perform sentence embedding on the description of each WebService,
# resulting in a 'webservices.pth' file that contains a dictionary {"key": "value"},
# where each 'key' is the name of a toolbox, and 'value' is the embedding of its description.

import torch
from transformers import AutoTokenizer, AutoModel

model_name = 'BAAI/bge-m3'
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)

device = 'cpu'
model.to(device)

def generate_embedding(text):
    input = tokenizer(text, return_tensors='pt', padding=True, truncation=True).to(device)
    with torch.no_grad():
        output = model(**input)
        embedding = output.last_hidden_state[:,0,:]
        embedding = torch.nn.functional.normalize(embedding, p=2, dim=1)
        return embedding

import pandas as pd

# Load the CSV file
file_path = r'D:\Work\Data\Paper_Projects\GEEKnowledgeTemplate\demo\template4WebService\data\WebServices.csv'
df = pd.read_csv(file_path)

tools = df['Tool Name'].dropna().unique().tolist()
descriptions = df['Description'].dropna().unique().tolist()

embeddings_dict = {}
for i, tool in enumerate(tools):
    embeddings_dict[tool] = generate_embedding(descriptions[i])

torch.save(embeddings_dict, 'webservices.pth')

['Add Surface Information', 'Aspect', 'Clip', 'Contour', 'Euclidean Distance', 'Fill', 'Flow Direction', 'Interpolate Shape', 'Kriging', 'Reclassify', 'Slope', 'Zonal Statistics', 'Slice', 'Kernel Density', 'Intersect', 'Plus', 'Times', 'Con', 'Buffer', 'Dissolve', 'Spatial Join', 'IDW', 'Extract by Mask', 'Divide', 'NDVI'] ['Attributes features with spatial information derived from a surface.', 'Derives the aspect from each cell of a raster surface. The aspect identifies the compass direction that the downhill slope faces for each location.', 'Cuts out a portion of a raster dataset, mosaic dataset, or image service layer/ Extracts input features that overlay the clip features.', 'Creates a feature class of contours from a raster surface.', 'Calculates, for each cell, the Euclidean distance to the closest source.', 'Fills sinks in a surface raster to remove small imperfections in the data.', 'Creates a raster of flow direction from each cell to its downslope neighbor, or neighbors, usi