# Generating your product search index
Thereis notebook is designed to automatically create the product search index for you. It uses the [product catalog](products.csv) file to create the index. In order to do so it needs names ane keys for the following services:

- Azure Search Service
- Azure OpenAI Service

You can find the names and keys in the Azure Portal. These need to be entered in a `.env` file in the root of this repository. The `.env` file is not checked in to source control. You can use the `local.env` file as a template.

In [5]:
import os
import pandas as pd
import requests
import json
from typing import List, Dict, Any
from openai import AzureOpenAI
from pathlib import Path
from dotenv import load_dotenv

load_dotenv()

cog_srch_api_version = "api-version=2023-07-01-Preview"

In [6]:
def delete_index(search_service: str, search_index: str, search_api_key: str):
    print(f"Deleting index {search_index} in {search_service}...")
    response = requests.delete(
        f"https://{search_service}.search.windows.net/indexes/{search_index}",
        headers={"api-key": search_api_key},
    )
    print(response.status_code)
    return response.status_code

In [7]:
def create_index(search_service: str, search_index: str, search_api_key: str):
    print(f"Creating index {search_index} in {search_service}...")
    index_definition = json.loads(Path("index.json").read_text(encoding="utf8"))
    index_definition["name"] = search_index
    index_definition["semantic"]["configurations"][0]["name"] = f"semantic-{search_index}"

    headers = {
        "Content-Type": "application/json",
        "api-key": search_api_key
    }

    response = requests.get(f"https://{search_service}.search.windows.net/indexes/{search_index}?{cog_srch_api_version}", 
                            headers={ "api-key": search_api_key })
    
    print(response.status_code)
    if response.status_code == 404:
        print("Index not found, creating...")
        response = requests.post(f"https://{search_service}.search.windows.net/indexes?{cog_srch_api_version}", headers=headers, json=index_definition)
        print(response.status_code)
        if response.status_code >= 400:
            print("Create index failed:")
            print(response.json())
            exit(1)


In [8]:
def index_docs(
    search_service: str,
    search_index: str,
    search_api_key: str,
    docs: List[Dict[str, any]],
):
    print(f"Indexing documents into {search_index} in {search_service}...")
    headers = {"Content-Type": "application/json", "api-key": search_api_key}
    items = [
        {
            "@search.action": "upload",
            "id": item["id"],
            "content": item["content"],
            "title": item["title"],
            "filename": item["filename"],
            "embedding": item["embedding"],
            "url": item["url"],
        }
        for item in docs
    ]

    response = requests.post(
        f"https://{search_service}.search.windows.net/indexes/{search_index}/docs/index?{cog_srch_api_version}",
        headers=headers,
        json={"value": items},
    )

    print(response.status_code)
    if response.status_code >= 400:
        print("Error indexing documents:")
        print(response.json())

In [9]:
def index(
    search_service: str,
    search_index: str,
    search_api_key: str,
    docs: List[Dict[str, any]],
):
    delete_index(search_service, search_index, search_api_key)
    create_index(search_service, search_index, search_api_key)
    index_docs(search_service, search_index, search_api_key, docs)

In [10]:
def gen_contoso_products(
    path: str,
) -> List[Dict[str, any]]:
    openai_service_endoint = os.environ["CONTOSO_AI_SERVICES_ENDPOINT"]
    openai_deployment = "text-embedding-ada-002"

    # openai.Embedding.create() -> client.embeddings.create()
    client = AzureOpenAI(
        api_version="2023-07-01-preview",
        azure_endpoint=openai_service_endoint,
        azure_deployment=openai_deployment,
        api_key=os.environ["CONTOSO_AI_SERVICES_KEY"],
    )

    products = pd.read_csv(path)
    items = []
    for product in products.to_dict("records"):
        content = product["description"]
        id = str(product["id"])
        title = product["name"]
        url = f"/products/{title.lower().replace(' ', '-')}"
        emb = client.embeddings.create(input=content, model=openai_deployment)
        rec = {
            "id": id,
            "content": content,
            "title": title,
            "filename": f"{title.lower().replace(' ', '-')}",
            "embedding": emb.data[0].embedding,
            "url": url,
        }
        items.append(rec)

    return items

In [11]:
contoso_search = os.environ["CONTOSO_SEARCH_SERVICE"]
contoso_search_key = os.environ["CONTOSO_SEARCH_KEY"]
index_name = "contoso-products"

contoso_products = gen_contoso_products("products.csv")
index(
    contoso_search,
    index_name,
    contoso_search_key,
    contoso_products,
)

Deleting index contoso-products-test in contoso-search...
400
Creating index contoso-products-test in contoso-search...
200
Indexing documents into contoso-products-test in contoso-search...
200
