In [1]:
from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
from pydantic import BaseModel, Field
from typing import List

from dotenv import load_dotenv
load_dotenv()

True

In [10]:
class MultiQuerySchema(BaseModel):
    """List of all queries generated by a LLM from a original query."""
    sub_queries: List[str] = Field(description="list of all queries generated by a LLM from a original query.")

In [11]:
llm =  ChatGroq(model="llama3-70b-8192")
llm_with_structure = llm.with_structured_output(MultiQuerySchema)

## Query-Transformation

In [15]:
multi_query_examples = [
    {"original_query": "What are the effects of chronic insomnia on cognitive performance?",
     "response": """["What are the cognitive consequences of chronic sleep deprivation and insomnia?", "Does chronic insomnia lead to a decline in specific cognitive abilities like decision-making or memory?"]"""},
     {"original_query": "What are the effects of insomnia on adolescents' mental health?",
      "response": """["How does insomnia influence the mental well-being of adolescents, including conditions like anxiety and depression?", "What mental health challenges do adolescents face as a result of chronic insomnia?"]"""}
]

example_multi_query_prompt = ChatPromptTemplate.from_messages([
    ("human", "Original Query: {original_query}"),
    ("ai", "{response}")
])

few_short_multi_query_prompt = FewShotChatMessagePromptTemplate(
    example_prompt = example_multi_query_prompt,
    examples = multi_query_examples
)


multi_query_system = """You are an AI assistant. Generate two distinct yet related queries about insomnia based on the user's original query to maximize the chances of retrieving relevant documents that can provide better answers. The generated queries should:
- Rephrase the original query to explore different ways it can be interpreted.
- Focus on the ky terms or topics within the query to emphasize specific aspects.
- Broaden or narrow the scope of the query to capture related but potentially overlooked information.
Your response must be a list of new queries.
"""

multi_query_prompt = ChatPromptTemplate.from_messages([
    ("system", multi_query_system),
    few_short_multi_query_prompt,
    ("human", "Original Query: {original_query}")
])

multi_query_chain = multi_query_prompt | llm_with_structure

In [16]:
def generate_multi_query(query, chain):
    queries = chain.invoke(query).sub_queries
    queries = [query] + queries
    return queries

query = "I'm 22 years old and I usually sleep at 2 AM. How can this affect my health?"
generate_multi_query(query, multi_query_chain)


["I'm 22 years old and I usually sleep at 2 AM. How can this affect my health?",
 'What are the potential health consequences of consistently going to bed at 2 AM?',
 'How does a late bedtime like 2 AM impact physical and mental health in young adults?']

# Decompose Query

In [18]:
decompose_examples = [
    {"original_query": "What are the most effective treatments for chronic insomnia, and how do they compare in terms of side effects and long-term outcomes?",
     "response": """["What are the most commonly used treatments for chronic insomnia?", "What are the success rates of these treatments for chronic insomnia?", "What are the potential side effects of these treatments for chronic insomnia?", "How do the long-term outcomes of different insomnia treatments compare?", "Are there any studies that directly compare multiple treatments for chronic insomnia?"]"""},
     {"original_query": "What are the factors that contribute to the development of insomnia, and how do age and gender affect its prevalence?",
      "response": """["What are the primary factors that contribute to the development of insomnia?", "How does age influence the prevalence of insomnia?", "How does gender influence the prevalence of insomnia?", "Are there any genetic factors associated with insomnia?", "How do lifestyle factors such as diet and exercise affect insomnia?"]"""}
]

example_decompose_prompt = ChatPromptTemplate.from_messages([
    ("human", "Original Query: {original_query}"),
    ("ai", "{response}")
])

few_short_decompose_prompt = FewShotChatMessagePromptTemplate(
    example_prompt = example_multi_query_prompt,
    examples = multi_query_examples
)


decompose_system = """You are an AI assistant. Given a complex query related to insomnia, your task is to break down the query into simpler, more specific sub-queries. The goal is to generate decomposed queries that target different aspects or steps needed to answer the original query effectively. Follow these guidelines:
- Analyze the original query and identify its main components.
- Decompose the query into a set of smaller, manageable queries.
- Ensure each sub-query is specific and focused on an individual aspect of the original query.
- Avoid redundancy or overlap between sub-queries.
- Prioritize breaking down any multifaceted or ambiguous parts into clear questions.
Your response must be a list of new sub-queries."""

decompose_prompt = ChatPromptTemplate.from_messages([
    ("system", decompose_system),
    few_short_decompose_prompt,
    ("human", "Original Query: {original_query}")
])

decompose_chain = decompose_prompt | llm_with_structure

In [19]:
def generate_decompose_queries(query, chain):
    queries = chain.invoke(query)
    return queries.sub_queries

In [20]:
query = "I usually drink coffee in the evening to work on my deadlines. Is it bad for my health? What can I do to change this habit?"
generate_decompose_queries(query=query, chain=decompose_chain)

['What are the negative effects of consuming coffee in the evening on sleep quality?',
 'How can I adjust my caffeine intake to avoid disrupting my sleep schedule?',
 'What are some healthy alternatives to coffee that can help me stay focused and productive in the evening?',
 'How can I develop a relaxing bedtime routine to improve my sleep hygiene?']