In [1]:
import os

from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_openai import AzureOpenAIEmbeddings, OpenAIEmbeddings

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
azure_endpoint: str = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_api_key: str = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_api_version: str = "2023-05-15"
azure_deployment: str = "text-embedding-3-small"

In [3]:
vector_store_address: str = os.getenv("VECTOR_STORE_ADDRESS_Free")
vector_store_password: str = os.getenv("VECTOR_STORE_PASSWORD_Free")

In [4]:
# Use AzureOpenAIEmbeddings with an Azure account
embeddings: AzureOpenAIEmbeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_deployment,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_endpoint,
    api_key=azure_openai_api_key,
)

In [5]:
index_name: str = "langchain-vector-demo"
# Specify additional properties for the Azure client such as the following https://github.com/Azure/azure-sdk-for-python/blob/main/sdk/core/azure-core/README.md#configurations
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
    # Configure max retries for the Azure client
    additional_search_client_options={"retry_total": 4},
)

#### NLP Read Documents

In [6]:
from langchain_community.document_loaders import UnstructuredPDFLoader

file_path = "/Users/apahuja/MacWorkspace/cloud_genAI/Sample_Data/Agency-files-documents/Agency Manual MA Combines Files/AP-2 12 22.pdf"
loader = UnstructuredPDFLoader(file_path)

In [None]:
import os, nltk
nltk.data.path.append(os.path.join(os.getcwd(), "nltk_data"))

# Core tokenizers
nltk.download("punkt")

# POS tagger (older name used by many libs)
nltk.download("averaged_perceptron_tagger")

# If you're on newer NLTK (≥3.9), also grab the new name just in case:
nltk.download("averaged_perceptron_tagger_eng")




In [13]:
docs = loader.load()
docs[0]



Document(metadata={'source': '/Users/apahuja/MacWorkspace/cloud_genAI/Sample_Data/Agency-files-documents/Agency Manual MA Combines Files/AP-2 12 22.pdf'}, page_content='COMMERCIAL AUTO\n\nAP-2 (12/22)\n\nTHIS ENDORSEMENT CHANGES THE POLICY. PLEASE READ IT CAREFULLY.\n\nAUTO ENHANCEMENT ENDORSEMENT\n\nThis endorsement modifies insurance provided under the following:\n\nBUSINESS AUTO COVERAGE FORM\n\n1. AIRBAG COVERAGE\n\nThe following is added to SECTION III - PHYSICAL DAMAGE COVERAGE, B.3.a. – Exclusions:\n\nMechanical breakdown does not include the accidental discharge of an airbag.\n\n2. AMENDED DEFINITION OF BODILY INJURY (Not Applicable in New York)\n\nUnder SECTION V – DEFINITIONS, the definition of bodily injury is deleted in its entirety and replaced by the following:\n\n“Bodily injury” means bodily injury, sickness or disease sustained by a person, including mental anguish or death resulting from any of these. Mental anguish damages are considered the direct result of and payab

In [20]:
len(docs[0].page_content)

13218