In [None]:
# install packages
! pip install openai
! pip install python-dotenv
! pip install semantic-kernel


In [None]:
# import libraries
import requests
import json
import os
import semantic_kernel as sk

from semantic_kernel.ai.open_ai import AzureTextEmbedding
from dotenv import load_dotenv
# Load environment variables from .env file
load_dotenv()


## Setup the Semantic Kernel

In [80]:
kernel = sk.Kernel()
_, api_key, endpoint = sk.azure_openai_settings_from_dot_env()
kernel.config.add_embedding_backend("ada", AzureTextEmbedding(
    "text-embedding-ada-002", endpoint, api_key))
kernel.register_memory_store(memory_store=sk.memory.VolatileMemoryStore())


In [82]:
# Read the text-sample.json
with open('../data/text-sample.json', 'r', encoding='utf-8') as file:
    input_data = json.load(file)


## Create embeddings and structure input data format for Azure Cognitive Search

In [None]:
for item in input_data:
    title = item['title']
    content = item['content']
    title_embeddings = await kernel.memory._embeddings_generator.generate_embeddings_async([title])
    content_embeddings = await kernel.memory._embeddings_generator.generate_embeddings_async([content])
    item['titleVector'] = list(title_embeddings[0])
    item['contentVector'] = list(content_embeddings[0])
    item['@search.action'] = 'upload'


In [85]:
# Output embeddings to docVectors.json file
with open("../output/sk_docVectors.json", "w") as f:
    json.dump(input_data, f)


In [86]:
# Generate a query embedding
ask = "what is Azure Cognitive Search"
embedding = await kernel.memory._embeddings_generator.generate_embeddings_async([ask])


In [None]:
# Output embeddings to queryVector.json file
with open("../output/sk_queryVector.json", "w") as f:
    json.dump(list(embedding[0]), f)

print(embedding)


# Azure Cognitive Search Setup

In [88]:
# TODO: fill in your cognitive search name
cognitive_search_name = os.getenv("AZURE_SEARCH_SERVICE_NAME")
# TODO: fill in your cognitive search index name
index_name = os.getenv("AZURE_SEARCH_SERVICE_INDEX_NAME")
# TODO: fill in your api key with your cognitive search admin key
api_key = os.getenv("AZURE_SEARCH_SERVICE_ADMIN_KEY")
create_index_url = f"https://{cognitive_search_name}.search.windows.net/indexes/{index_name}?api-version=2023-07-01-Preview"
insert_entries_url = f"https://{cognitive_search_name}.search.windows.net/indexes/{index_name}/docs/index?api-version=2023-07-01-Preview"
search_url = f"https://{cognitive_search_name}.search.windows.net/indexes/{index_name}/docs/search?api-version=2023-07-01-Preview"


In [89]:
EMBEDDING_LENGTH = len(embedding[0])
print("Embedding length: {}".format(EMBEDDING_LENGTH))


Embedding length: 1536


## Create Index

In [None]:
# TODO: change the dimensions in payload to 1536 (if using ada-embeddings)

payload = json.dumps({
    "name": index_name,
    "fields": [
        {
            "name": "id",
            "type": "Edm.String",
            "key": True,
            "filterable": True
        },
        {
            "name": "title",
            "type": "Edm.String",
            "searchable": True,
            "retrievable": True
        },
        {
            "name": "content",
            "type": "Edm.String",
            "searchable": True,
            "retrievable": True
        },
        {
            "name": "category",
            "type": "Edm.String",
            "filterable": True,
            "searchable": True,
            "retrievable": True
        },
        {
            "name": "titleVector",
            "type": "Collection(Edm.Single)",
            "searchable": True,
            "retrievable": True,
            "dimensions": EMBEDDING_LENGTH,
            "algorithmConfiguration": "my-vector-config"
        },
        {
            "name": "contentVector",
            "type": "Collection(Edm.Single)",
            "searchable": True,
            "retrievable": True,
            "dimensions": EMBEDDING_LENGTH,
            "algorithmConfiguration": "my-vector-config"
        }
    ],
    "vectorSearch": {
        "algorithmConfigurations": [
            {
                "name": "my-vector-config",
                "algorithm": "hnsw",
                "hnswParameters": {
                    "m": 4,
                    "efConstruction": 400,
                    "metric": "cosine"
                }
            }
        ]
    },
    "semantic": {
        "configurations": [
            {
                "name": "my-semantic-config",
                "prioritizedFields": {
                    "titleField": {
                        "fieldName": "title"
                    },
                    "prioritizedContentFields": [
                        {
                            "fieldName": "content"
                        }
                    ],
                    "prioritizedKeywordsFields": [
                        {
                            "fieldName": "category"
                        }
                    ]
                }
            }
        ]
    }
})
headers = {
    'Content-Type': 'application/json',
    'api-key': api_key
}

response = requests.request("PUT", create_index_url,
                            headers=headers, data=payload)

print(response.status_code)


## Insert Documents

In [None]:
with open("../output/sk_docVectors.json") as f:
    data = json.load(f)

payload = {
    "value": data
}

headers = {
    'Content-Type': 'application/json',
    'api-key': api_key,
}

response = requests.request(
    "POST", insert_entries_url, headers=headers, json=payload)

print(response.text)
print(response.status_code)


## Vector Searches

In [None]:
payload = json.dumps({
    "vector": {
        "value": list(embedding[0]),
        "fields": "contentVector",
        "k": 1
    }
})
headers = {
    'Content-Type': 'application/json',
    'api-key': api_key,
}

response = requests.request("POST", search_url, headers=headers, data=payload)

print(response.text)
print(response.status_code)
