### Query Expansion:

In [2]:
#importing libraries

from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain.chat_models import init_chat_model
from langchain_core.prompts import PromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains import create_retrieval_chain
from langchain_core.runnables import RunnableMap

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
#ingesting data and dividing into chunks for context window

documents = TextLoader(file_path='langchain_crewai.txt', encoding="utf-8").load()
print(f"Data is ingested successfully")

splitter = RecursiveCharacterTextSplitter(chunk_size=200, chunk_overlap=50)
chunks = splitter.split_documents(documents)
print(f"Number of chunks: {len(chunks)}")

Data is ingested successfully
Number of chunks: 62


In [4]:
#embedding model and vectorstore retriever

embedding_model = HuggingFaceEmbeddings(model="sentence-transformers/all-MiniLM-L6-v2")

retriever = FAISS.from_documents(documents=chunks,
                                 embedding=embedding_model).as_retriever(search_type='mmr', serach_kargs={"k":4})

retriever



VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001AC8E0ECA90>, search_type='mmr', search_kwargs={})

In [5]:
#model

model = init_chat_model(model="groq:llama-3.1-8b-instant")
model

ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 8192, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001ACE1F66ED0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001ACE2026890>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [6]:
#query expansion prompt

query_expansion_prompt= PromptTemplate.from_template(
    ''' given a query, expand the following query to improve document retrieval by adding relevant words, synonyms or technical terms.
    only provide enhanced query, no explanation needed
    query: {query}
    enhanced_query:
    '''
)

query_expansion_prompt

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template=' given a query, expand the following query to improve document retrieval by adding relevant words, synonyms or technical terms.\n    only provide enhanced query, no explanation needed\n    query: {query}\n    enhanced_query:\n    ')

In [7]:
query_enhance_chain = query_expansion_prompt | model | StrOutputParser()
result=query_enhance_chain.invoke({"query": "LangChain memory"})
print(result)

"(LangChain OR Chain) AND (memory OR data_storage OR knowledge_retrieval) AND (memory_networks OR graph_memory OR cognitive_architecture)"


In [8]:
#qa prompt template to get context information
qa_prompt = PromptTemplate.from_template(
    '''Answer user's question based on  only given context information.
    context: {context}
    question: {input}
    '''
)

qa_prompt

PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="Answer user's question based on  only given context information.\n    context: {context}\n    question: {input}\n    ")

In [9]:
#creating stuff documents chain

documents_chain = create_stuff_documents_chain(llm=model,
                                               prompt=qa_prompt)
documents_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="Answer user's question based on  only given context information.\n    context: {context}\n    question: {input}\n    ")
| ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 8192, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001ACE1F66ED0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001ACE2026890>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_docu

In [10]:
#creating retrieval chain with query expansion

rag_pipeline = (
    RunnableMap( {"input" : lambda x : x['input'],
                  "context": lambda x : retriever.invoke(query_enhance_chain.invoke({"query": x['input']}))
    }
    )
| documents_chain
)


rag_pipeline

{
  input: RunnableLambda(...),
  context: RunnableLambda(...)
}
| RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
    context: RunnableLambda(format_docs)
  }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
  | PromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, template="Answer user's question based on  only given context information.\n    context: {context}\n    question: {input}\n    ")
  | ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 8192, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': False, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000001ACE1F66ED0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001ACE2026890>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**

In [11]:
#query run

query= {"input": " compare langchain and crewai"}
print(query_enhance_chain.invoke({"query": query}))
response = rag_pipeline.invoke(query)
print("Answer: \n", response)

{'input': '(compare langchain OR compare crewai) AND (langchain OR chain models OR transformer models) AND (crewai OR conversational AI OR ai assistant)'}
Answer: 
 LangChain provides the infrastructure for retrieval and reasoning systems, while CrewAI is designed for higher-level orchestration of agents, focusing on coordination and flexibility to work with different backends and frameworks.
