In [None]:
import os
from dotenv import load_dotenv, find_dotenv
from azure.core.credentials import AzureKeyCredential
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
    ComplexField,
    CorsOptions,
    SearchIndex,
    ScoringProfile,
    SearchFieldDataType,
    SimpleField,
    SearchableField
)

In [None]:
load_dotenv(find_dotenv())

In [None]:
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

# Create a service client
client = SearchIndexClient(endpoint, AzureKeyCredential(key))

In [None]:
# Create the index
name = "hotels"
fields = [
        SimpleField(name="hotelId", type=SearchFieldDataType.String, key=True),
        SimpleField(name="baseRate", type=SearchFieldDataType.Double),
        SearchableField(name="description", type=SearchFieldDataType.String),
        ComplexField(name="address", fields=[
            SimpleField(name="streetAddress", type=SearchFieldDataType.String),
            SimpleField(name="city", type=SearchFieldDataType.String),
        ])
    ]

In [None]:
from azure.core.exceptions import HttpResponseError

cors_options = CorsOptions(allowed_origins=["*"], max_age_in_seconds=60)
scoring_profiles = []

index = SearchIndex(
    name=name,
    fields=fields,
    scoring_profiles=scoring_profiles,
    cors_options=cors_options)


try:
    result = client.create_index(index)
    print(f"Index '{name}' created.")
except HttpResponseError as e:
    print(f"Index '{name}' already exists.")


### Add documents to the index

In [None]:
import os
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient

index_name = "hotels"
endpoint = os.environ["SEARCH_ENDPOINT"]
key = os.environ["SEARCH_API_KEY"]

# Prepare the documents to match the index fields
documents = [
    {
        'hotelId': '1',
        'baseRate': 199.0,
        'description': 'Deluxe two-room suite perfect for business or leisure travelers.',
        'address': {
            'streetAddress': '12345 Main St',
            'city': 'Seattle',
        }
    },
    {
        'hotelId': '2',
        'baseRate': 299.0,
        'description': 'Spacious suite with modern design and amenities.',
        'address': {
            'streetAddress': '67890 Side St',
            'city': 'Bellevue',
        }
    },
    {
        'hotelId': '3',
        'baseRate': 99.0,
        'description': 'Cozy suite perfect for leisure travelers on a budget.',
        'address': {
            'streetAddress': '54321 Budget Rd',
            'city': 'Seattle',
        }
    }
]


search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key))

# Upload the documents
result = search_client.upload_documents(documents=documents)



In [None]:
for res in result:
    print(f"Upload of document with ID '{res.key}' succeeded: {res.succeeded}")

Now we can retrieve Documents from ACS

In [None]:
credential = AzureKeyCredential(key)
client = SearchClient(endpoint=endpoint,
                      index_name=index_name,
                      credential=credential)

In [None]:
results = client.search(search_text="suite")

for result in results:
    print(result)

### Using LangChain with ACS

In [None]:
import os
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch

In [None]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment="text-embedding-ada-002", chunk_size=1)
index_name: str = "langchain-vector-demo"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=os.environ.get("SEARCH_ENDPOINT"),
    azure_search_key=os.environ.get("SEARCH_API_KEY"),
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [None]:
from langchain.document_loaders import DirectoryLoader, TextLoader

loader = DirectoryLoader('./hotels', glob="**/*.txt", loader_cls=TextLoader)
data = loader.load()

In [57]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 200,
    chunk_overlap  = 20,
    length_function = len,
    is_separator_regex = False,
)
docs = text_splitter.split_documents(data)

: 

In [None]:
vector_store.add_documents(documents=docs)

In [None]:

docs = vector_store.similarity_search(
    query="Which suite is the cheapest?",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

In [None]:
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI

qa = RetrievalQA.from_chain_type(llm=ChatOpenAI(), chain_type="stuff", retriever=vector_store.as_retriever())
qa.run("Which suite is the cheapest?")