In [50]:
from langchain_community.llms import OpenAI
from langchain_community.vectorstores import Chroma
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain.chains import RetrievalQA
import os

## Load PDF document

In [51]:
# Read PDF using PyPDF2
loader = PyPDFLoader(file_path="C:/Documents/AI/GenAI/Langchain/Claims_Chatbot/pdf_files/claims_flow.pdf")
pages = loader.load_and_split()

In [52]:
len(pages)

50

## Split into chunks

In [63]:
doc_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap  = 50,
    )
split_pages = doc_splitter.split_documents(pages)


In [64]:
len(split_pages)

131

In [65]:
split_pages[100]

Document(page_content='intervention rules, the claim pends. The user is presented with the options of (1) accepting the way\nClaims processed the claim, (2) denying the entire claim or (3) rerouting the claim to an earlier step in\nthe flow in order to change the claim’s fields or results.\nFinalization\nThe most important part of finalizing a claim is the detection of concurrent use of counters and cases.\nConcurrent use must be detected in order to prevent a counter from exceeding its limit due to the\nsimultaneous processing of multiple claims. Likewise, the creation of the identical overlapping cases is\ndetected. Once a claim is cleared and confirmed not to be in conflict with other claims, a snapshot of\nthe claim, that is, a claim transaction, is stored in the Claims T ransaction R epository.\nFinalized claims can be unfinalized through the user interface or an integration point. Unfinalizing a\nclaim means that the claim is open for changes, but the claim will need to go throug

## Store chunks in vector DB

In [66]:
embeddings = OpenAIEmbeddings()
# Create vector db to store raw documents
vectordb = Chroma.from_documents(
        documents=split_pages,
        embedding=embeddings,
    )

# Retrieve relevant chunks and store them in a variable
#retriever = vectordb.as_retriever(k=3)

In [71]:
retriever = vectordb.as_retriever()

In [72]:
llm = OpenAI(temperature=0.2, max_tokens=500)


  warn_deprecated(


In [73]:
qa_chain = RetrievalQA.from_chain_type(
llm=llm,
chain_type="stuff",
retriever=retriever,
return_source_documents=True,
output_key='Answer'
)

In [81]:
question = "What is claim finaliztion?"

In [82]:
output = qa_chain(question)

In [83]:
print(output)

{'query': 'What is claim finaliztion?', 'Answer': ' Claim finalization is the process of detecting concurrent use of counters and cases in order to prevent a counter from exceeding its limit and creating identical overlapping cases. Once a claim is cleared and confirmed not to be in conflict with other claims, a snapshot of the claim is stored in the Claims Transaction Repository. Finalized claims can be unfinalized through the user interface or an integration point.', 'source_documents': [Document(page_content='intervention rules, the claim pends. The user is presented with the options of (1) accepting the way\nClaims processed the claim, (2) denying the entire claim or (3) rerouting the claim to an earlier step in\nthe flow in order to change the claim’s fields or results.\nFinalization\nThe most important part of finalizing a claim is the detection of concurrent use of counters and cases.\nConcurrent use must be detected in order to prevent a counter from exceeding its limit due to 

In [70]:
context = vectordb.similarity_search(question,k=3)

In [33]:
question_answering_prompt = ChatPromptTemplate.from_messages(
        [
            (
                "user",
                '''
                You are an expert claims processor. You are a subject matter expert in claims adjudication system named oracle health insurance.
                Your job is to answer user's question based on the context given below delimited by triple backticks.
                ```{context}```                
                If the question is not related to the context, just say I don't know.

                ''',
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )