<div align="center">
    <h1>Populating the Index</h1>
</div>

**Get Started**

In [None]:
!pip install -r ../requirements.txt

In [None]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from dotenv import load_dotenv 
from langchain_openai import AzureChatOpenAI, AzureOpenAIEmbeddings
import json 
import random
import string
from datetime import datetime, timezone

load_dotenv()

**Create the Client Connections**

In [None]:
# Get the service endpoint and API key from the environment
endpoint = os.environ["AZURE_SEARCH_ENDPOINT"]
key = os.environ["AZURE_SEARCH_KEY"]
index_name = "pwc-cw"
# Create a client
credential = AzureKeyCredential(key)
client = SearchClient(endpoint=endpoint,
                      index_name=index_name,
                      credential=credential)

aoai_endpoint = os.environ.get("AOAI_ENDPOINT")
aoai_key = os.environ.get("AOAI_KEY")

embeddings_model = AzureOpenAIEmbeddings(
    azure_deployment="text-embedding-3-large",
    api_key=aoai_key,
    azure_endpoint=aoai_endpoint
)

**Load the Data Using the JSON File**

In [None]:
with open('Chunked Output v1.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

In [None]:
for document in data:
    new_document = {
        "id": ''.join(random.choices(string.ascii_letters + string.digits, k=10)),
        "source_file": document["doc_name"],
        "content": document["chunk_text"],
        "content_vector": embeddings_model.embed_query(document["chunk_text"]),
        "created_date": datetime.now(timezone.utc).isoformat(),
    }
    
    # Upload the document to the index
    client.upload_documents(documents=[new_document])

In [None]:
results = client.search(search_text="*", top=5)

In [None]:
for document in results:
    print(document)