In [1]:
import os
import getpass

from langchain_community.vectorstores.azuresearch import AzureSearch
from langchain_core.output_parsers import StrOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_google_genai import ChatGoogleGenerativeAI

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
vector_store_address: str = os.environ["AZURE_SEARCH_ENDPOINT"]
vector_store_password: str = os.environ["AZURE_SEARCH_ADMIN_KEY"]

In [4]:
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = getpass("Provide your Google API key here")

# print(os.environ["GOOGLE_API_KEY"])

In [5]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [6]:
index_name: str = "langchain-vector-demo"
vector_store: AzureSearch = AzureSearch(
    azure_search_endpoint=vector_store_address,
    azure_search_key=vector_store_password,
    index_name=index_name,
    embedding_function=embeddings.embed_query,
)

In [53]:
from typing import List

from langchain.retrievers.multi_query import MultiQueryRetriever

from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain_core.output_parsers import BaseOutputParser
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_core.prompts import PromptTemplate
from pydantic.v1 import BaseModel, Field

import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

from langchain_google_genai import ChatGoogleGenerativeAI

# Output parser will split the LLM result into a list of queries
class LineList(BaseModel):
    # "lines" is the key (attribute name) of the parsed output
    lines: List[str] = Field(description="Lines of text")


class LineListOutputParser(BaseOutputParser[List[str]]):
    """Output parser for a list of lines."""

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
        return lines



output_parser = LineListOutputParser()

llm = ChatGoogleGenerativeAI(model="gemini-pro", convert_system_message_to_human=True)

QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate three 
    follow up questions of the given user question. Provide these alternative questions separated by newlines.
    Original question: {question}""",
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=QUERY_PROMPT, output_parser=output_parser)

# Other inputs
question = "What are all services included in my Northwind Health Plus plan?"

# Run
retriever = MultiQueryRetriever(
    retriever=vector_store.as_retriever(), llm_chain=llm_chain
)  # "lines" is the key (attribute name) of the parsed output

# Results
unique_docs = retriever.get_relevant_documents(
    query=question, 
)
unique_docs

INFO:langchain.retrievers.multi_query:Generated queries: ['- Are there any additional services that can be added to my Northwind Health Plus plan?', '- What are the specific details and limitations of each service included in the Northwind Health Plus plan?', '- Is there a comprehensive list or document that outlines all the covered services under the Northwind Health Plus plan?']


[Document(page_content="offers a wider range of prescription drug coverage than Northwind Standard. Both plans offer coverage for vision and dental services, as well as medical services.\n\nCost Comparison Contoso Electronics deducts the employee's portion of the healthcare cost from each paycheck. This means that the cost of the health insurance will be spread out over the course of the year, rather than being paid in one lump sum. The employee's portion of the cost will be calculated based on the selected health plan and the number of people covered by the insurance. The table below shows a cost comparison between the different health plans offered by Contoso Electronics:\n\nNext Steps We hope that this information has been helpful in understanding the differences between Northwind Health Plus and Northwind Standard. We are confident that you will find the right plan for you and your family. Thank you for choosing Contoso Electronics!", metadata={'source': 'C:\\Users\\musad\\AppData\

In [29]:
import logging

logging.basicConfig()
logging.getLogger("langchain.retrievers.multi_query").setLevel(logging.INFO)

unique_docs = retriever.get_relevant_documents(query=question)
unique_docs

INFO:langchain.retrievers.multi_query:Generated queries: ['1. List the services covered by Northwind Health Plus plan.', '2. Retrieve documents containing a comprehensive overview of Northwind Health Plus plan benefits.', '3. Provide information about the scope of services offered under Northwind Health Plus plan.', '4. Identify and extract details regarding the services included in Northwind Health Plus plan membership.', '5. Show me documents that describe the benefits and services available to Northwind Health Plus plan subscribers.']


[Document(page_content="offers a wider range of prescription drug coverage than Northwind Standard. Both plans offer coverage for vision and dental services, as well as medical services.\n\nCost Comparison Contoso Electronics deducts the employee's portion of the healthcare cost from each paycheck. This means that the cost of the health insurance will be spread out over the course of the year, rather than being paid in one lump sum. The employee's portion of the cost will be calculated based on the selected health plan and the number of people covered by the insurance. The table below shows a cost comparison between the different health plans offered by Contoso Electronics:\n\nNext Steps We hope that this information has been helpful in understanding the differences between Northwind Health Plus and Northwind Standard. We are confident that you will find the right plan for you and your family. Thank you for choosing Contoso Electronics!", metadata={'source': 'C:\\Users\\musad\\AppData\

In [56]:
from langchain_core.output_parsers import BaseOutputParser

class LineListOutputParser(BaseOutputParser[List[str]]):
    """Output parser for a list of lines."""

    def parse(self, text: str) -> List[str]:
        lines = text.strip().split("\n")
        return lines

output_parser = LineListOutputParser()

In [65]:
from typing import Any
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.callbacks import (
    CallbackManagerForRetrieverRun
)
from langchain_core.retrievers import BaseRetriever
from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate

# run_manager = CallbackManagerForRetrieverRun()

class CustomRetriever(BaseRetriever):

    template = """You are an AI language model assistant. Your task is to generate three 
    questions based only on the following context:

    {context}

    Question: {question}

    use the same keywords present in the context to generate new questions.
    Provide these alternative questions separated by newlines.
    """
    prompt = ChatPromptTemplate.from_template(template)
    model = ChatGoogleGenerativeAI(model="gemini-pro")
    retriever = vector_store.as_retriever(search_kwargs={"k": 3})

    def format_docs(docs):
        return "\n\n".join([d.page_content for d in docs])


    chain = (
        {"context": retriever | format_docs, "question": RunnablePassthrough()}
        | prompt
        | model
        | output_parser
    )

    def _get_relevant_documents(
        self, query: str, *, run_manager: CallbackManagerForRetrieverRun
    ) -> List[str]:
        queries = self.generate_queries(query, run_manager)
        return queries
    
    def generate_queries(
        self, question: str, run_manager: CallbackManagerForRetrieverRun
    ) -> List[str]:
        """Generate queries based upon user input.

        Args:
            question: user query

        Returns:
            List of LLM generated queries that are similar to the user input
        """
        response = self.chain.invoke(
            question
        )
        # print(response)
        # lines = response["text"]
        return response

question = "What are all services included in my Northwind Health Plus plan?"
# retriever = MultiQueryRetriever.from_llm(retriever=vector_store.as_retriever(), llm=llm_chain)

retriever = CustomRetriever()

unique_docs = retriever.get_relevant_documents(query=question)
unique_docs

['1. What is the difference in prescription drug coverage between Northwind Health Plus and Northwind Standard plans?',
 '2. What is the difference in medical service coverage between Northwind Health Plus and Northwind Standard plans?',
 '3. What services are not covered under the Northwind Standard plan?']