In [None]:
# # import packages
! pip install openai
! pip install python-dotenv
! pip install semantic-kernel

In [1]:
# import libraries 
import requests
import json
import semantic_kernel as sk

from semantic_kernel.ai.open_ai import AzureTextEmbedding

## Setup the Semantic Kernel

In [None]:
kernel = sk.Kernel()
_, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
kernel.config.add_embedding_backend("ada", AzureTextEmbedding("text-embedding-ada-002", endpoint, api_key))
kernel.register_memory_store(memory_store=sk.memory.VolatileMemoryStore())

In [15]:
# Read the text-sample.json
with open('../data/text-sample.json', 'r', encoding='utf-8') as file:
    input_data = json.load(file)

## Create embeddings and structure input data format for Azure Search

In [17]:
for item in input_data:
    title = item['title']
    content = item['content']
    title_embeddings = await kernel.memory._embeddings_generator.generate_embeddings_async([title])
    content_embeddings = await kernel.memory._embeddings_generator.generate_embeddings_async([content])
    item['titleVector'] = list(title_embeddings[0])
    item['contentVector'] = list(content_embeddings[0])
    item['@search.action'] = 'upload'

In [18]:
# Output embeddings to docVectors.json file
with open("../output/sk_docVectors.json", "w") as f:
    json.dump(input_data, f)

In [21]:
# Generate a query embedding
ask = "what is Azure Search"
embedding = await kernel.memory._embeddings_generator.generate_embeddings_async([ask])

In [25]:
# Output embeddings to queryVector.json file
with open("../output/sk_queryVector.json", "w") as f:
    json.dump(list(embedding[0]), f)

print(embedding)

[[-0.00343422 -0.01066914  0.03192004 ...  0.00686359  0.01894039
   0.00243186]]


# Azure Search Setup

In [None]:
cognitive_search_name = "" #TODO: fill in your cognitive search name
index_name = "" #TODO: fill in your index name
api_key = "" #TODO: fill in your api key with admin key
url = f"https://{cognitive_search_name}.search.windows.net/indexes/{index_name}/docs/index?api-version=2023-07-01-Preview"

In [30]:
EMBEDDING_LENGTH = len(embedding[0])
print("Embedding length: {}".format(EMBEDDING_LENGTH))

Embedding length: 1536


## Create Index

In [None]:
#TODO: change the dimensions in payload to 1536 (if using ada-embeddings)

payload = json.dumps({
  "name": index_name,
  "fields": [
    {
      "name": "id",
      "type": "Edm.String",
      "key": True,
      "filterable": True
    },
    {
      "name": "title",
      "type": "Edm.String",
      "searchable": True,
      "retrievable": True
    },
    {
      "name": "content",
      "type": "Edm.String",
      "searchable": True,
      "retrievable": True
    },
    {
      "name": "category",
      "type": "Edm.String",
      "filterable": True,
      "searchable": True,
      "retrievable": True
    },
    {
      "name": "contentVector",
      "type": "Collection(Edm.Single)",
      "searchable": True,
      "retrievable": True,
      "dimensions": EMBEDDING_LENGTH,
      "algorithmConfiguration": "my-vector-config"
    }
  ],
  "vectorSearch": {
    "algorithmConfigurations": [
      {
        "name": "my-vector-config",
        "algorithm": "hnsw",
        "hnswParameters": {
          "m": 4,
          "efConstruction": 400,
          "metric": "cosine"
        }
      }
    ]
  },
  "semantic": {
    "configurations": [
      {
        "name": "my-semantic-config",
        "prioritizedFields": {
          "titleField": {
            "fieldName": "title"
          },
          "prioritizedContentFields": [
            {
              "fieldName": "content"
            }
          ],
          "prioritizedKeywordsFields": [
            {
              "fieldName": "category"
            }
          ]
        }
      }
    ]
  }
})
headers = {
  'Content-Type': 'application/json',
  'api-key': api_key
}

response = requests.request("PUT", url, headers=headers, data=payload)

print(response.text)

## Insert Entries

In [None]:
payload = json.loads("../output/sk_docVectors.json"")

headers = {
  'Content-Type': 'application/json',
  'api-key': api_key,
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)
print(response.status_code)

## Retrieve Entries

In [None]:
payload = json.dumps({
  "vector": {
    "value": list(embedding[0]),
    "fields": "contentVector",
    "k": 1
  }
})
headers = {
  'Content-Type': 'application/json',
  'api-key': api_key,
}

response = requests.request("POST", url, headers=headers, data=payload)

print(response.text)
print(response.status_code)