In [None]:
from azure.search.documents.indexes.models import (
    SimpleField,
    SearchFieldDataType,
    SearchableField,
    SearchField,
    VectorSearch,
    HnswAlgorithmConfiguration,
    VectorSearchProfile,
    SearchIndex,
    SemanticConfiguration,
    SemanticPrioritizedFields,
    SemanticField,
    SemanticSearch,
)
import os
from dotenv import load_dotenv

load_dotenv()

index_name = os.getenv("SEARCH_INDEX_NAME")
vector_search_profile_name = f"{index_name}_profile"

fields = [
    SimpleField(name="id",               type=SearchFieldDataType.String, key=True),
    SearchableField(name="review_text",  type=SearchFieldDataType.String, analyzer_name="en.lucene"),
    SearchableField(name="review_title", type=SearchFieldDataType.String, analyzer_name="en.lucene"),
    SimpleField(name="dateAdded",        type=SearchFieldDataType.DateTimeOffset, filterable=True, sortable=True),
    SimpleField(name="city",             type=SearchFieldDataType.String,         filterable=True),
    SimpleField(name="hotel_name",       type=SearchFieldDataType.String,         filterable=True),
    SimpleField(name="hotel_state",      type=SearchFieldDataType.String,         filterable=True),
    SearchField(
        name="embedding",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        vector_search_dimensions=1536,  # Adjust this to match your embedding size
        vector_search_profile_name=vector_search_profile_name,
    ),
]

# Vector search configuration
hnsw_algorithm_name = f"{index_name}_hnsw"
vector_search = VectorSearch(

In [None]:
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient

import os
from dotenv import load_dotenv

load_dotenv()

# Azure Search service details
service_name = os.getenv("SERVICE_NAME")
admin_key = os.getenv("SEARCH_ADMIN_KEY")
index_name = os.getenv("SEARCH_INDEX_NAME")

# Initialize the search index client
endpoint = f"https://{service_name}.search.windows.net/"
credential = AzureKeyCredential(admin_key)
index_client = SearchIndexClient(endpoint=endpoint, credential=credential)

In [None]:
import os
import json
from azure.search.documents import SearchClient


# Create the index
index_client.create_or_update_index(index)

# Initialize the search client
search_client = SearchClient(endpoint=endpoint, index_name=index_name, credential=credential)

# Function to read and process JSON files
def process_json_file(file_path):
    with open(file_path, 'r') as file:
        data = json.load(file)
    
    # Ensure the embedding is a list of floats
    data['embedding'] = [float(x) for x in data['embedding']]
    
    return data

# Directory containing the JSON files
directory = '../data/embedded'

# List to store all documents
documents = []

# Iterate through all JSON files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.json'):
        print(f"Processing {filename}")
        file_path = os.path.join(directory, filename)
        document = process_json_file(file_path)
        documents.append(document)

        # If we have 1000 documents, upload them in batch
        if len(documents) == 1000:
            result = search_client.upload_documents(documents)
            print(f"Uploaded {len(result)} documents")
            documents = []  # Clear the list for the next batch


# Upload any remaining documents
if documents:
    result = search_client.upload_documents(documents)
    print(f"Uploaded {len(result)} documents")

print("Data upload completed.")