In [2]:
import os
import getpass

from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.document_loaders import TextLoader
from langchain import hub
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
from langchain_core.prompts import PromptTemplate
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.tools.retriever import create_retriever_tool
from langchain.agents import AgentExecutor, create_react_agent
from langchain.agents.format_scratchpad import format_log_to_str
# from langchain_text_splitters import CharacterTextSplitter

from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [173]:
RunnablePassthrough.assign(agent_scratchpad=lambda x: format_log_to_str(x["intermediate_steps"]))

RunnableAssign(mapper={
  agent_scratchpad: RunnableLambda(lambda x: format_log_to_str(x['intermediate_steps']))
})

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
vector_store_address: str = os.environ["YOUR_AZURE_SEARCH_ENDPOINT"]
vector_store_password: str = os.environ["YOUR_AZURE_SEARCH_ADMIN_KEY"]

In [5]:
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass("Provide your Google API key here")

# print(os.environ["GOOGLE_API_KEY"])

In [6]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [7]:
index_name: str = "langchain-vector-demo"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [17]:
loader = PyPDFLoader("D:\MusaddiqueHussainLabs\omniqhub\data\Benefit_Options.pdf")
documents = loader.load_and_split()
documents

[Document(page_content='Contoso Electronics \nPlan and Benefit Packages', metadata={'source': 'D:\\MusaddiqueHussainLabs\\omniqhub\\data\\Benefit_Options.pdf', 'page': 0}),
 Document(page_content='This document contains information generated using a language model (Azure OpenAI). The information \ncontained in this document is only for demonstration purposes and does not reflect the opinions or \nbeliefs of Microsoft. Microsoft makes no representations or warranties of any kind, express or implied, \nabout the completeness, accuracy, reliability, suitability or availability with respect to the information \ncontained in this document. \nAll rights reserved to Microsoft', metadata={'source': 'D:\\MusaddiqueHussainLabs\\omniqhub\\data\\Benefit_Options.pdf', 'page': 1}),
 Document(page_content='Welcome to Contoso  Electronics ! We are excited to offer our employees two comprehensive health \ninsurance plans through Northwind Health. \nNorthwind Health Plus  \nNorthwind Health Plus is a co

In [18]:
vector_store.add_documents(documents=documents)

['ZTBiYTUyZjktOTkwMi00NDlkLWEzYmMtYzJiZGQ5ZDVhZmY0',
 'M2FkMjBiOTEtMWE1NC00NDMwLTgwMmQtNGJlY2JkYWQ4NGY0',
 'YjllYjBkMzctM2IzYS00MGNhLWFlZmMtNTkyNWVmMjY0NjBl',
 'MjVmNDMzZDUtNzRmNC00YWIyLThmOGUtMjFkMDI4NzcxMjdj']

In [47]:
# Perform a similarity search
docs = vector_store.similarity_search(
    query="What is included in my Northwind Health Plus plan that is not in standard?",
    k=3,
    search_type="hybrid",
)

docs[0].metadata

{'source': 'C:\\Users\\musad\\AppData\\Local\\Temp\\tmptsmcfi_m/omnihub-container/Northwind_Health_Plus_Benefits_Details.pdf'}

In [41]:
# Retrieve and generate using the relevant snippets of the blog.
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
prompt = hub.pull("rlm/rag-prompt")
llm = ChatGoogleGenerativeAI(model="gemini-pro")

In [42]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [46]:
from langchain.globals import set_verbose, set_debug

set_debug(False)
set_verbose(False)

# result = rag_chain.invoke("WHAT DO I DO IF I'M OUTSIDE WASHINGTON?")
# print(result)

In [65]:
# Define your desired data structure.
class Joke(BaseModel):
    answer: str = Field(description="the answer to the question. If no source available, put the answer as I don't know.")
    # thoughts: str = Field(description="brief thoughts on how you came up with the answer, e.g. what sources you used, what you thought about, etc.")
    thoughts: str = Field(description="brief thoughts on how you came up with the answer, you should always think about what to do, the action to take, the result of the action and the finaly return the complete thoughts process to the original input question")
#     thoughts: str = Field(description="""Use the following format:

# Question: the input question you must answer
# Thought: you should always think about what to do
# Action: the action to take,
# Action Input: the input to the action
# Observation: the result of the action
# ... (this Thought/Action/Action Input/Observation can repeat N times)
# Thought: I now know the final answer
# Final Answer: the final answer to the original input question

# Begin!

# Question: 
# Thoughts:""")

In [66]:
# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=Joke)

In [70]:
template = """You are a system assistant who helps the company employees with their questions. Be brief in your answers.
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

{format_instructions}

Question: {question}"""
# custom_rag_prompt = PromptTemplate.from_template(template)
custom_rag_prompt = PromptTemplate(
    template=template,
    input_variables=["question"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | parser
)



In [None]:
query = "provide some tips related to Northwind Health Plus plan?"

rag_chain.invoke(query)

In [71]:
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
    | custom_rag_prompt
    | llm
    | parser
)

rag_chain_with_source = RunnableParallel(
    {"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)



In [72]:
result = rag_chain_with_source.invoke("what are all services covered under Northwind Health Plus plan?")
result

{'context': [Document(page_content='Right To And Payment Of Benefits OTHER INFORMATION ABOUT THIS PLAN: Right To And Payment Of Benefits\n\nUnder the Northwind Health Plus plan, members are entitled to receive the benefits listed in the plan documents. These benefits can be received from any in-network provider, as long as all eligibility requirements are met. The plan pays for covered health care services and supplies, including preventive care services, at the rates specified in the plan documents.\n\nThe plan pays for covered services only after the member has met their annual deductible. The plan pays for covered services after the member has met the annual deductible, up to the maximum out-of-pocket limit. The maximum out-of-pocket limit will be specified in the plan documents. For covered services, Northwind Health Plus pays either a percentage of the cost or a fixed dollar amount, whichever is less.\n\nNorthwind Health Plus also pays for services that are not listed in the plan 

In [None]:
from langchain.schema.document import Document

Document()

In [16]:
docs = result['context']
doc_name = set()

for doc in docs:

    path = doc.metadata['source']
    pdf_name = os.path.basename(path)

    doc_name.add(pdf_name)

doc_name.add('Northwind_Health_Plus.pdf')
doc_name_list = list(doc_name)
doc_name_list

['Northwind_Health_Plus.pdf', 'Northwind_Health_Plus_Benefits_Details.pdf']

In [81]:
str_doc = ""
for singdoc in doc_name_list:
    str_doc += "["+singdoc+"]"

str_doc

ans = "this is final ans "
ans += str_doc
ans

'this is final ans [Northwind_Health_Plus_Benefits_Details.pdf][Northwind_Health_Plus.pdf]'

In [73]:

tool = create_retriever_tool(
    retriever,
    "Search_Northwind_Health_Plus_Benefits_Details",
    "Search Northwind Health Plus Benefits and Details and return answer to employee question",
)
tools = [tool]

# Get the prompt to use - you can modify this!
prompt = hub.pull("hwchase17/react")

# Construct the ReAct agent
agent = create_react_agent(llm, tools, prompt)

In [100]:
# Create an agent executor by passing in the agent and tools
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=False, return_intermediate_steps=True)

In [104]:
agent_result = agent_executor.invoke({"input": "what are all health plan services covered?"})

In [109]:
agent_result

{'input': 'what are all health plan services covered?',
 'output': 'The Northwind Health Plus plan covers a wide range of health care services and supplies, including preventive care services, medical, vision, and dental services, prescription drug coverage, mental health and substance abuse coverage, and coverage for skilled nursing facility services and spinal and other manipulations.',
 'intermediate_steps': [(AgentAction(tool='Search_Northwind_Health_Plus_Benefits_Details', tool_input='What are all health plan services covered?', log='Action: Search_Northwind_Health_Plus_Benefits_Details\nAction Input: What are all health plan services covered?'),
   "Right To And Payment Of Benefits OTHER INFORMATION ABOUT THIS PLAN: Right To And Payment Of Benefits\n\nUnder the Northwind Health Plus plan, members are entitled to receive the benefits listed in the plan documents. These benefits can be received from any in-network provider, as long as all eligibility requirements are met. The plan 

In [111]:
agent_action = agent_result["intermediate_steps"]
a, b = agent_action[0]
print(b)

Right To And Payment Of Benefits OTHER INFORMATION ABOUT THIS PLAN: Right To And Payment Of Benefits

Under the Northwind Health Plus plan, members are entitled to receive the benefits listed in the plan documents. These benefits can be received from any in-network provider, as long as all eligibility requirements are met. The plan pays for covered health care services and supplies, including preventive care services, at the rates specified in the plan documents.

The plan pays for covered services only after the member has met their annual deductible. The plan pays for covered services after the member has met the annual deductible, up to the maximum out-of-pocket limit. The maximum out-of-pocket limit will be specified in the plan documents. For covered services, Northwind Health Plus pays either a percentage of the cost or a fixed dollar amount, whichever is less.

Northwind Health Plus also pays for services that are not listed in the plan documents, if the health care provider det

In [249]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
llm = ChatGoogleGenerativeAI(model="gemini-pro")

tool = create_retriever_tool(
    retriever,
    "Search_Northwind_Health_Plus_Benefits_Details",
    "Search Northwind Health Plus Benefits and Details and return answer to employee question",
)
tools = [tool]

template = """You are a system assistant who helps the company employees with their questions. Be brief in your answers.
Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Use three sentences maximum and keep the answer as concise as possible.
Always say "thanks for asking!" at the end of the answer.

{context}

{format_instructions}

Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}"""

custom_rag_prompt = PromptTemplate(
    template=template,
    input_variables=["input"],
    partial_variables={"format_instructions": parser.get_format_instructions(), "tools": tools, "tool_names": ", ".join([t.name for t in tools])}
)

rag_chain = (
    {"context": retriever | format_docs, "input": RunnablePassthrough(), "agent_scratchpad": RunnablePassthrough()}
    | custom_rag_prompt
    | llm
    | parser
)

# rag_chain.invoke("what are all plans covered ?")


In [251]:
rag_chain.invoke("does the Northwind Health Plus covers hearning exams?")

{'answer': 'I am sorry, I cannot answer your question as the provided text does not contain information about hearing exams.',
 'thoughts': []}

In [232]:
print(custom_rag_prompt)

input_variables=['agent_scratchpad', 'context', 'input', 'tool_names', 'tools'] partial_variables={'format_instructions': 'The output should be formatted as a JSON instance that conforms to the JSON schema below.\n\nAs an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}\nthe object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.\n\nHere is the output schema:\n```\n{"properties": {"answer": {"title": "Answer", "description": "the answer to the question, add a source or document name reference to the end of each sentence. e.g. Apple is a fruit [reference1.pdf][reference2.pdf]. If no source available, put the answer as I don\'t know.", "type": "string"}, "thoughts": {"title": "Thoughts", "description": "brief thoughts on how you came up with the answer, e.g. what sources you use

In [233]:
# Construct the ReAct agent
agent = create_react_agent(llm, tools, custom_rag_prompt)

In [234]:
agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)

In [237]:
agent_executor.invoke({"input": "what are all plans covered?"})



[1m> Entering new AgentExecutor chain...[0m


KeyError: "Input to PromptTemplate is missing variables {'context'}.  Expected: ['agent_scratchpad', 'context', 'input'] Received: ['input', 'intermediate_steps', 'agent_scratchpad']"

In [204]:
prompt = hub.pull("hwchase17/react")


In [205]:
print(prompt.template)

Answer the following questions as best you can. You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}


In [207]:
print(prompt)

input_variables=['agent_scratchpad', 'input', 'tool_names', 'tools'] template='Answer the following questions as best you can. You have access to the following tools:\n\n{tools}\n\nUse the following format:\n\nQuestion: the input question you must answer\nThought: you should always think about what to do\nAction: the action to take, should be one of [{tool_names}]\nAction Input: the input to the action\nObservation: the result of the action\n... (this Thought/Action/Action Input/Observation can repeat N times)\nThought: I now know the final answer\nFinal Answer: the final answer to the original input question\n\nBegin!\n\nQuestion: {input}\nThought:{agent_scratchpad}'


In [113]:
import sys

for chunk in llm.stream("Tell me a short poem about snow"):
    print(chunk)
    

content="White blankets the ground,\nA winter's soft embrace,\nSnow falls"
content=" gently down,\nA world transformed with grace.\n\nFlurries dance and swirl,\nA symphony of sound,\nNature's gentle purl,"
content="\nAs winter's charm is found."


In [1]:
from langchain_core.prompts import ChatPromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI bot. Your name is {name}."),
        ("human", "Hello, how are you doing?"),
        ("ai", "I'm doing well, thanks!"),
        ("human", "{user_input}"),
    ]
)

messages = chat_template.format_messages(name="Bob", user_input="What is your name?")
messages

[SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
 HumanMessage(content='Hello, how are you doing?'),
 AIMessage(content="I'm doing well, thanks!"),
 HumanMessage(content='What is your name?')]

In [14]:
from typing import List

from langchain.retrievers.multi_query import MultiQueryRetriever

from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic.v1 import BaseModel, Field


# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(PydanticOutputParser):
    def __init__(self) -> None:
        super().__init__(pydantic_object=LineList)

    def parse(self, text: str) -> LineList:
        lines = text.strip().split("\n")
        return LineList(lines=lines)


output_parser = LineListOutputParser()

llm = ChatGoogleGenerativeAI(model="gemini-pro", convert_system_message_to_human=True)

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five 
    different versions of the given user question to retrieve relevant documents from a vector 
    database. By generating multiple perspectives on the user question, your goal is to help
    the user overcome some of the limitations of the distance-based similarity search. 
    Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

# Other inputs
question = "What are all services included in my Northwind Health Plus plan?"

# Run
retriever = MultiQueryRetriever(
    retriever=vector_store.as_retriever(), llm_chain=llm_chain, parser_key="lines"
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.get_relevant_documents(
    query=question
)
# len(unique_docs)

OutputParserException: Invalid json output: - What are the comprehensive benefits covered under my Northwind Health Plus insurance plan?
- List all healthcare services encompassed by my Northwind Health Plus insurance policy.
- Provide a detailed breakdown of the medical services available to me as a Northwind Health Plus subscriber.
- What healthcare procedures and treatments are included in the coverage of my Northwind Health Plus plan?
- What are the specific medical services and treatments that I am eligible to receive under my Northwind Health Plus insurance coverage?