### Azure Cognitive Search Setup with Langchain

In [12]:
import openai
import os
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.vectorstores.azuresearch import AzureSearch
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

# read local .env file
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv()) 

# setup azure openai api
openai.api_type = "azure"
openai.api_base = os.getenv("AZURE_OAI_ENDPOINT")
# openai.api_version = "2023-03-15-preview"
openai.api_version = "2023-05-15"
openai.api_key  = os.getenv('AZURE_OAI_KEY')

llm_name = "gpt-3.5-turbo"
deployment_name_gpt = os.getenv("AZURE_OAI_MODEL_GPT_3")
deployment_name_ada = os.getenv("AZURE_OAI_MODEL_ADA")

vector_store_address: str = os.getenv("AZURE_SEARCH_ENDPOINT")
vector_store_password: str = os.getenv("AZURE_SEARCH_ADMIN_KEY")

os.environ["OPENAI_API_TYPE"] = openai.api_type
os.environ["OPENAI_API_BASE"] = openai.api_base
os.environ["OPENAI_API_KEY"] = openai.api_key
os.environ["OPENAI_API_VERSION"] = openai.api_version

### Create Vector Index in Azure Cognitive Search

In [13]:
embeddings: OpenAIEmbeddings = OpenAIEmbeddings(deployment=deployment_name_ada, chunk_size=1)
index_name: str = "facts-about-snakes"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

### Load data and split it into chunks

In [14]:
loader = PyPDFLoader("../data/snakes.pdf")
documents = loader.load()

In [15]:
len(documents)

2

In [16]:
chunk_size = 300
chunk_overlap = 4
separator = '\n'
text_splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap, separator=separator)

In [17]:
docs = text_splitter.split_documents(documents)
len(docs)

Created a chunk of size 434, which is longer than the specified 300


20

### Upload the docs in vector form into azure cogntivesearch vector database

In [18]:
vector_store.add_documents(documents=docs)

['NjFhOGNmNzAtNjVhNS00YTg5LWI3MzAtODk3ZDZmNmIxYTAy',
 'ZGJiMjk0NGYtOWVhYS00MjBmLWE5YTktYmQwMjAwMmRhMWQ2',
 'NTY3ZjhjZTUtMTczMC00NTgyLWJlYTgtZDg1ODhjZjY5YWE2',
 'NDUxOTA3YjctNWNiOS00ZGRkLWEzODMtN2FjOTQyMTE5MDhk',
 'ZTE5ZDc0OTYtYzMzZi00NjBjLTk1ZmMtZTdhYTljNzQ0NDBh',
 'N2U4NjJkNWItOGNiNy00MmQ1LTg1YzItZjMxY2IyZWExNWMy',
 'ZWY5M2RjNzItMGZiNi00NWI4LTgwYzUtZDk1OWEwMDU1ODNi',
 'YTI4YTYyY2ItNzFjYS00ZjUxLTk0ZDQtZWUxYTM0MTczZjUz',
 'MWZiZTI2NjUtZmU4Yi00Y2Y3LWI3NWEtZmZlNGQzZGI0MjMx',
 'N2M3YjM4ODItZGY1NC00NDk0LWI3YTItYzUwMDViNmE0ZWY1',
 'ZWU0YzBhOTgtZTllYi00YzFlLWI5ZTItMWY3OWUxNGMzYjBi',
 'NDI0ZDczZmUtNTNmNS00MGEwLWI3NGUtNjY1NWRhNWI5ZWY3',
 'NGJhZmIwNTgtZGU1MS00ODJkLWI0MDEtMjYyN2FkMjZiNTRm',
 'NzFkMjlmYTktOWQyNy00NjAxLTliODMtZGE1ZDY0ODM3NGQ0',
 'N2M3NjlhMzgtZWExYS00MDM0LTliM2YtMDRiNTYwN2ZhYmVh',
 'YjA0ODNlMjItNWI2NS00N2E4LWI2MTYtODdmMGFhOTljNTU3',
 'MGZiZTIwMmEtY2ZmNy00MDVhLTk3MTQtNzRmMWI1MmFjMjQw',
 'Mzg0ZGI5MDMtMDVkMi00NGZiLTllZjQtYzhkNjUwNWI4OTRk',
 'MDNiYzdlMzctYWU5ZS00ODBlLWIwN2EtMGI0MTRkNmI5

### Perform Vector Similarity Search

In [19]:
docs = vector_store.similarity_search(
    query="What are the venomous snakes in north carolina",
    k=3,
    search_type="similarity",
)
print(docs[0].page_content)

Eastern Coral Snake. The Copperhead is the most common 
and widespread venomous snake in North Carolina, occur-
ring in both rural and urban environments. Four of the six 
are protected species in North Carolina, and as such, should 
not be handled or disturbed: the Timber and Pigmy Rattle-


### Perform Hybrid Search

In [20]:
docs = vector_store.similarity_search(
    query="What are the venomous snakes in north carolina",
    k=3, 
    search_type="hybrid"
)
print(docs[0].page_content)

Eastern Coral Snake. The Copperhead is the most common 
and widespread venomous snake in North Carolina, occur-
ring in both rural and urban environments. Four of the six 
are protected species in North Carolina, and as such, should 
not be handled or disturbed: the Timber and Pigmy Rattle-


### Semantic Search

In [21]:
# vectore_store_setup for semantic search.
# semantic search has to be enabled in azure cognitive search
# and add semantic configuration with the name "default" in the index that you do query.
# index located in azure cognitive search service
index_name: str = "facts-about-snakes"

vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    semantic_configuration_name="default",
    embedding_function=embeddings.embed_query
)

In [22]:
docs = vector_store.similarity_search(
    query="What are the venomous snakes in north carolina",
    k=3, 
    search_type="semantic_hybrid"
)
print(docs[0].page_content)

How many snakes in North Carolina are 
Venomous?
Out of the thirty-eight different species of snakes in North 
Carolina, only six are venomous. Venomous snakes include 
the Copperhead, Cottonmouth, Timber Rattlesnake, Eastern 
Diamondback Rattlesnake, Pigmy Rattlesnake, and the
