In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

In [None]:
load_dotenv(find_dotenv('../application/.env'))

In [None]:
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

print(key)

client = SearchIndexClient(endpoint, AzureKeyCredential(key))

In [None]:
name = "restaurant"
fields = [
    SimpleField(name="restaurantId", type=SearchFieldDataType.String, key=True),
    SimpleField(name="averageCost", type=SearchFieldDataType.Double),
    SearchableField(name="description", type=SearchFieldDataType.String),
    ComplexField(name="address", fields=[
        SimpleField(name="streetAddress", type=SearchFieldDataType.String),
        SimpleField(name="city", type=SearchFieldDataType.String),
    ])
]

In [None]:
from azure.core.exceptions import HttpResponseError

cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []

index = SearchIndex(
    name=name,
    fields=fields,
    scoring_profiles=scoring_profiles,
    cors_options=cors_options)


try:
    result = client.create_index(index)
    print(f"Index '{name}' created.")
except HttpResponseError as e:
    print(f"Index '{name}' already exists.")


### Add documents to the index

In [None]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

index_name = "restaurant"
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

documents = [
    {
        'restaurantId': '1',
        'averageCost': 50.0,
        'description': 'Traditional Italian cuisine with a modern twist.',
        'address': {
            'streetAddress': '123 Via Roma',
            'city': 'Rome',
        }
    },
    {
        'restaurantId': '2',
        'averageCost': 70.0,
        'description': 'Family-friendly Italian restaurant with classic dishes.',
        'address': {
            'streetAddress': '456 Via Milano',
            'city': 'Milan',
        }
    },
    {
        'restaurantId': '3',
        'averageCost': 35.0,
        'description': 'Cozy trattoria offering regional specialties.',
        'address': {
            'streetAddress': '789 Via Napoli',
            'city': 'Naples',
        }
    }
]

search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))
result = search_client.upload_documents(documents=documents)


In [None]:
for res in result:
    print(f"Upload of document with ID '{res.key}' succeeded: {res.succeeded}")

Now we can retrieve Documents from ACS

In [None]:
results = client.search(search_text="Family friendly?")

for result in results:
    print(result)

### Using LangChain with ACS

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch

In [None]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment="text-embedding-ada-002", chunk_size=1)
index_name: str = "langchain-example"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.environ.get("SEARCH_ENDPOINT"),
    azure_search_key=os.environ.get("SEARCH_API_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [None]:
from langchain.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader('./restaurant', glob="**/*.txt", loader_cls=TextLoader)
data = loader.load()
print(len(data))

In [None]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 120,
    chunk_overlap  = 20,
    length_function = len,
    is_separator_regex = False,
)
docs = text_splitter.split_documents(data)
print(len(docs))

In [None]:
vector_store.add_documents(documents=docs)

In [None]:

docs = vector_store.similarity_search(
    query="When are the opening hours of the restaurant?",
    k=3,
    search_type="similarity",
)
print(docs)


In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=vector_store.as_retriever())
qa.run("When are the opening hours of the restaurant?")