In [9]:
from dotenv import load_dotenv
from azure.identity import DefaultAzureCredential
import os

# Load environment variables from .env file
load_dotenv(override=True)

# Retrieve environment variables
endpoint = os.getenv("AZURE_SEARCH_SERVICE_ENDPOINT")
key_credential = os.getenv("AZURE_SEARCH_ADMIN_KEY")
index_name = os.getenv("AZURE_SEARCH_INDEX")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
azure_openai_key = os.getenv("AZURE_OPENAI_API_KEY")
azure_openai_embedding_deployment = os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT")
azure_openai_api_version = os.getenv("AZURE_OPENAI_API_VERSION")
case_id = "1"

# Use DefaultAzureCredential if key_credential is not available
credential = key_credential if key_credential and len(key_credential) > 0 else DefaultAzureCredential()

# Print statements for debugging (optional)
print(f"Endpoint: {endpoint}")
print(f"Key Credential: {key_credential}")
print(f"Index Name: {index_name}")
print(f"Azure OpenAI Endpoint: {azure_openai_endpoint}")
print(f"Azure OpenAI Key: {azure_openai_key}")
print(f"Azure OpenAI Embedding Deployment: {azure_openai_embedding_deployment}")
print(f"Azure OpenAI API Version: {azure_openai_api_version}")

# Now you can proceed with using these variables in your Azure services logic

Endpoint: https://azure-searchservicemed.search.windows.net
Key Credential: 4zHTIQTj5ERssSk6MkggjYolbJey3AMgQytWnUj4vVAzSeBpOCzt
Index Name: meddoc
Azure OpenAI Endpoint: https://marlonopenai.openai.azure.com/
Azure OpenAI Key: 63f217470de1428992c267a747c05d33
Azure OpenAI Embedding Deployment: MedDocEmbeddings
Azure OpenAI API Version: 2024-05-13


In [10]:
from langchain_openai import AzureOpenAIEmbeddings
from azure.identity import DefaultAzureCredential, get_bearer_token_provider

openai_credential = DefaultAzureCredential()
token_provider = get_bearer_token_provider(openai_credential, "https://cognitiveservices.azure.com/.default")



# Use API key if provided, otherwise use RBAC authentication
embeddings = AzureOpenAIEmbeddings(
    azure_deployment=azure_openai_embedding_deployment,
    openai_api_version=azure_openai_api_version,
    azure_endpoint=azure_openai_endpoint,
    api_key=azure_openai_key,
    azure_ad_token_provider=token_provider if not azure_openai_key else None
)

In [12]:
from langchain.vectorstores.azuresearch import AzureSearch
from azure.search.documents.indexes.models import (
    ScoringProfile,
    SearchableField,
    SearchField,
    SearchFieldDataType,
    SimpleField
    
)

fields = [
    SimpleField(
        name="id",
        type=SearchFieldDataType.String,
        key=True,
        filterable=True,
    ),
    SearchableField(
        name="content",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    SearchField(
        name="content_vector",
        type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
        searchable=True,
        vector_search_dimensions=1536,
        vector_search_profile_name="myHnswProfile",
    ),
    SearchableField(
        name="metadata",
        type=SearchFieldDataType.String,
        searchable=True,
    ),
    SearchableField(
        name="case_id",
        type=SearchFieldDataType.String,
        filterable=True,
        searchable=True,
    ),
     SimpleField(
        name="source",
        type=SearchFieldDataType.String,
        filterable=True,
    ),
]

vector_store = AzureSearch(
    azure_search_endpoint=endpoint,
    azure_search_key=key_credential,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
    fields=fields
)



name is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
type is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
key is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
searchable is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
filterable is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
facetable is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
sortable is not a known attribute of class <class 'azure.search.documents.indexes.models._index.SearchField'> and will be ignored
hidden is not a known attribute of class <class 'azure.search.documents.indexes.models._index.Sear

NotFoundError: Error code: 404 - {'error': {'code': '404', 'message': 'Resource not found'}}

In [228]:
from azure.search.documents import SearchClient
from azure.core.credentials import AzureKeyCredential

search_client = SearchClient(endpoint, index_name, AzureKeyCredential(key_credential))


filter_expression = "case_id eq '" + case_id + "'"

results = search_client.search("", filter=filter_expression)
document_ids = [result["id"] for result in results]

for doc_id in document_ids:
    search_client.delete_documents(documents=[{"id": doc_id}])

print(document_ids)


[]


In [229]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import TokenTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.prompts import PromptTemplate


loader = DirectoryLoader('data/', glob="*.txt", loader_cls=TextLoader, loader_kwargs={'autodetect_encoding': True})

documents = loader.load()
text_splitter = TokenTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = text_splitter.split_documents(documents)

texts = [doc.page_content for doc in docs]
metadata = [{"case_id": case_id, "source": "ContractFile"} for doc in docs]

vector_store.add_texts(
    texts,
    metadata,
)


['YzI5OWEzZjMtMmEyMC00ZDNkLWI3YzAtMzRkMzMzZDBlOThi',
 'MDgzY2NjZjUtNzZkOS00ZTE3LWE1NWItZDQ1YmMxOWNiZDVk']

In [230]:
from langchain_openai import AzureChatOpenAI
llm = AzureChatOpenAI(deployment_name="casify")


In [237]:
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template("""Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question.
Chat History:
{chat_history}
Follow Up Input: {question}
Standalone question:""")


filter_expression = "case_id eq '2'"

retrieverSim = vector_store.as_retriever(
       search_type="similarity",
       k=10,
       search_kwargs={'filters': filter_expression}
)

qa = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retrieverSim,
    condense_question_prompt=CONDENSE_QUESTION_PROMPT,
    return_source_documents=True,
    verbose=False
)

print(qa)

combine_docs_chain=StuffDocumentsChain(llm_chain=LLMChain(prompt=ChatPromptTemplate(input_variables=['context', 'question'], messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], template="Use the following pieces of context to answer the user's question. \nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\n----------------\n{context}")), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['question'], template='{question}'))]), llm=AzureChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x000001C63E9BD890>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x000001C63D7D5050>, openai_api_key=SecretStr('**********'), openai_proxy='', azure_endpoint='https://casify-ai-dev.openai.azure.com/', deployment_name='casify', openai_api_version='2024-05-01-preview', openai_api_type='azure')), document_variable_name='context') question_generator=LLMChain(prom

In [238]:
chat_history = []
query = "What is the principal objective of this contract"
result = qa({"question": query, "chat_history": chat_history})
metadata_list = [obj.metadata["source"] for obj in result["source_documents"]]

print("Question:", query)
print("Answer:", result["answer"])
print("Source Documents:", metadata_list)


Question: What is the principal objective of this contract
Answer: I'm sorry, but without the specific contract you are referring to, I can't provide the principal objective. Contracts can vary widely in their purpose and terms. If you can provide more details or context about the contract, I may be able to assist you better.
Source Documents: []


In [215]:
chat_history = [(query, result["answer"])]
print("chat_history:", chat_history)


query = "based on the history, What is my age?"
result = qa({"question": query, "chat_history": chat_history})
print("Question:", query)
print("Answer:", result["answer"])
print("Source Documents:", metadata_list)


chat_history: [('My name is Marlon Sanguino Jimenez and I born in 1989 , What is the principal objective of this contract', 'The principal objective of this contract is for EXPO KIDS FASHION WEEK, represented by the CONTRATANTE, to provide a process of specialized instruction and training for the runway model. This training aims to perfect human and social relationships through the development of psychological, cognitive, and physical skills necessary for modeling activities. The training will take place at the Hotel Viaggio in Medellín, Antioquia.')]
Question: based on the history, What is my age?
Answer: The provided information does not mention Marlon Sanguino Jimenez or his age. Therefore, I do not know Marlon Sanguino Jimenez's age.
Source Documents: ['ContractFile', 'ContractFile']
