### Azure Cognitive Search Setup with Langchain

In [18]:
import openai
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

# read local .env file
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 

# setup azure openai api
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OAI_ENDPOINT")
# openai.api_version = "2023-03-15-preview"
openai.api_version = "2023-05-15"
openai.api_key  = os.getenv('AZURE_OAI_KEY')

llm_name = "gpt-3.5-turbo"
deployment_name_gpt = os.getenv("AZURE_OAI_MODEL_GPT_3")
deployment_name_ada = os.getenv("AZURE_OAI_MODEL_ADA")

vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT")
vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY")

os.environ["OPENAI_API_TYPE"] = openai.api_type
os.environ["OPENAI_API_BASE"] = openai.api_base
os.environ["OPENAI_API_KEY"] = openai.api_key
os.environ["OPENAI_API_VERSION"] = openai.api_version

### Create Vector Index in Azure Cognitive Search

In [19]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=deployment_name_ada, chunk_size=1)
index_name: str = "facts-about-snakes"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

### Load data and split it into chunks

In [20]:
loader = PyPDFLoader("../data/snakes.pdf")
documents = loader.load()

In [21]:
len(documents)

2

In [25]:
chunk_size = 300
chunk_overlap = 4
separator = '\n'
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)

In [26]:
docs = text_splitter.split_documents(documents)
len(docs)

Created a chunk of size 434, which is longer than the specified 300


20

### Upload the docs in vector form into azure cogntivesearch vector database

In [27]:
vector_store.add_documents(documents=docs)

['MTNhODQyYzctNzc5ZC00NzE4LWEwNDAtMjdjOTgxZTQ0NTFm',
 'ODZlMTQ0ZDQtNjFjNy00YTNiLWE0YzEtYzJlMTRkMGE2NjRj',
 'ZmYyYzdmMTQtMDMzOC00YjdmLThlOWItYjgzODUyMmQwNGFl',
 'ZDI4ZjQ1MjItZGM5Yi00ODE2LTkwNmUtNDU3NWM2NWI0MDNk',
 'YzMzZmE2ZDUtNTE3Zi00MDNjLTllNDAtODc1MmVhMWQ0ZDgw',
 'NzhiZDA2MTYtZmYzOS00OTgwLTg1YjgtMjNlOWMyOThjMGUy',
 'Y2Y1YWMzNzQtZTQ4Ni00YmRjLTljNGEtMTM4ODIxNGJhZTRm',
 'MjU0MTQxMDctZjgxYy00ZDM0LTk1MWEtNjA4NjhjOTAzNTlm',
 'NzM0NTI2Y2MtNDdhNi00NzAzLWE2YjEtZjNlMjYxNzFkNDBk',
 'NTU4MWUyMWQtOWU4Ni00ZGEwLWEwZTctMzk1ZjcyZjdkZDMx',
 'YmJiYTlkMjItYWFmZC00YzkxLWIxNGItNTcwZDIzNDQ0YmQ4',
 'NTJhMjg0ZDctM2Q4Ny00NTk2LTk3MmYtMzI0MzJkODhiMDYx',
 'N2Q2MDFhZmUtOWMyZC00OTFhLTg3ZDQtNDFmZTU4ODU0YTZi',
 'ZTY5ODE2NjAtNDI1MS00OGFmLWI2YjUtYjNlZGIyZjZiZmY4',
 'MjRkOGJiZWItYjJhYy00NTU1LWE3NDktODM4ZGI1ODdkNDRj',
 'NTc4MTQ5YzUtYzM1NS00NTExLWFiOTktYzQ1NzA0NGMxZTIy',
 'NzBhY2U5YzktMWRiNy00MWNlLWI0ZjktMTZmNzIzMzg0ZGUx',
 'YjFmYTdiZTYtMWE3Zi00MmMwLWEyMjktZTY5NWM1ODQ0ZGMy',
 'NzFjNDBmMTItMWFmZC00NjBlLWI3MDAtNzljMDUwYjNk

### Perform Vector Similarity Search

In [29]:
docs = vector_store.similarity_search(
    query="What are the venomous snakes in north carolina",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

Eastern Coral Snake. The Copperhead is the most common 
and widespread venomous snake in North Carolina, occur-
ring in both rural and urban environments. Four of the six 
are protected species in North Carolina, and as such, should 
not be handled or disturbed: the Timber and Pigmy Rattle-


### Perform Hybrid Search

In [30]:
docs = vector_store.similarity_search(
    query="What are the venomous snakes in north carolina",
    k=3, 
    search_type="hybrid"
)
print(docs[0].page_content)

How many snakes in North Carolina are 
Venomous?
Out of the thirty-eight different species of snakes in North 
Carolina, only six are venomous. Venomous snakes include 
the Copperhead, Cottonmouth, Timber Rattlesnake, Eastern 
Diamondback Rattlesnake, Pigmy Rattlesnake, and the


### Semantic Search

In [44]:
# vectore_store_setup for semantic search.
# semantic search has to be enabled in azure cognitive search
# and add semantic configuration with the name "default" in the index that you do query.
# index located in azure cognitive search service
index_name: str = "facts-about-snakes"

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    semantic_configuration_name="default",
    embedding_function=embeddings.embed_query
)

In [45]:
docs = vector_store.similarity_search(
    query="What are the venomous snakes in north carolina",
    k=3, 
    search_type="semantic_hybrid"
)
print(docs[0].page_content)

How many snakes in North Carolina are 
Venomous?
Out of the thirty-eight different species of snakes in North 
Carolina, only six are venomous. Venomous snakes include 
the Copperhead, Cottonmouth, Timber Rattlesnake, Eastern 
Diamondback Rattlesnake, Pigmy Rattlesnake, and the
