In [None]:
from dotenv import load_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
import os
import json

# Load .env file
load_dotenv()

# Get settings from .env
search_service_name = f"https://{os.getenv('AZURE_SEARCH_SERVICE_ENDPOINT')}.search.windows.net/"
search_service_api_key = os.getenv("AZURE_SEARCH_ADMIN_KEY")
index_name = os.getenv("INDEX_NAME")
chucks_file = os.getenv("CHUNKS_FILE")
directory = os.path.dirname(os.getcwd())
data_file_path = os.path.join(directory, chucks_file)

# Create a client
credential = AzureKeyCredential(search_service_api_key)
client = SearchClient(endpoint=f"{search_service_name}", credential=credential, index_name=index_name)

In [None]:
# Open the JSONL file and read the lines
with open(data_file_path, 'r') as f:
    # Parse the JSON line into a Python dictionary
    documents = [json.loads(line) for line in f]    
    
    for document in documents:
        document['id'] = str(document['id'])
        document['page'] = str(document['page'])        

    # Insert the documents into the Azure Search index
    result = client.upload_documents(documents=documents)

    # Check the results . Might want to comment it out if the list of items is too long
    for r in result:
        if not r.succeeded:
            print(f"Failed to insert document {r.key}: {r.error_message}")
        else:
            print(f"Successfully inserted document {r.key}")