In [None]:
!pip install txtai
!pip install

import constants
import requests
import json
from txtai.embeddings import Embeddings
import pandas as pd

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting txtai
  Downloading txtai-5.4.0-py3-none-any.whl (166 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m166.7/166.7 KB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu>=1.7.1.post2
  Downloading faiss_cpu-1.7.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.0/17.0 MB[0m [31m75.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting transformers>=4.22.0
  Downloading transformers-4.27.4-py3-none-any.whl (6.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.8/6.8 MB[0m [31m79.6 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollec

In [None]:
def getAuthContent():
    """ Retrieve info needed to authorize TFS API endpoint call
        auth_content should have 'access_token' and 'instance_url' fields at minimum, otherwise None is returned.
    """
    # Make authentication call
    auth_res = requests.post(constants.AUTH_CALLBACK_URL,
                             params={
                                 'grant_type': 'password',
                                 'client_id': constants.AUTH_CONSUMER_KEY,
                                 'client_secret': constants.AUTH_CONSUMER_SECRET,
                                 'username': constants.AUTH_USER,
                                 'password': constants.AUTH_PASS
                             }
                             )
    # Check if Auth response was successful or not
    if auth_res.status_code != 200 or "application/json" not in auth_res.headers.get("Content-Type"):
        return None
    auth_content = json.loads(auth_res.content)
    if auth_content.get("access_token") is None or auth_content.get("instance_url") is None:
        return None

    return auth_content

In [None]:
def getAPIContent(auth_content, query):
    """ Get APIs from TFS API Portal endpoint for the given query.
        auth_content should have 'access_token' and 'instane_url' fields """
    access_token = auth_content["access_token"]
    instance_url = auth_content["instance_url"]
    # Make call to get APIs from TFS endpoint
    api_res = requests.get(instance_url + constants.TFS_API_ENDPOINT,
                           params={'q': query},
                           headers={'Authorization': "Bearer " + access_token}
                           )
    if api_res.status_code != 200 or "application/json" not in api_res.headers.get("Content-Type"):
        return None
    api_content = json.loads(api_res.content)
    return api_content

In [None]:
def apiToPlainText(api):
    """ api must be a SObject of type acm_pkg__CommunityApi__c """
    name = api[constants.API_NAME]
    description = api[constants.API_DESC]
    if description is None:
        return name, name
    return name, description

In [None]:
auth_content = getAuthContent()
api_content = getAPIContent(auth_content, constants.API_QUERY_UTDPORTAL)
data = api_content['records']
response = []

for api in data:
    name, desc, id = apiToPlainText(api)
    response.append([name, desc, id])
df = pd.DataFrame(response)
df.columns = ['Name', 'Description', 'Id']
df.drop_duplicates(subset=['Name'], inplace=True)
embeddings = Embeddings({"path": "sentence-transformers/nli-mpnet-base-v2", "content": True})
embeddings.index([(uid, text, None) for uid, text in enumerate(df['Description'])])
embeddings.save('newModel')

[{'id': '11',
  'text': 'This API sends customer Activity and Case information to BI (Business Intelligence) system for reports.',
  'score': 0.5227376818656921},
 {'id': '21',
  'text': 'SFMC Transactional Email & SMS API provides functionality for sending Low Latency Email and SMS to individuals or groups of contacts.',
  'score': 0.46152830123901367},
 {'id': '29',
  'text': 'The operation purpose of this service is to provide Data Market Place and other domains an access to consume AXON related facets and information that can be leveraged in their development process.',
  'score': 0.42580646276474}]

In [None]:
embeddings.save('/content/index')