# Query Enhancement -  Query Expansion Techniques

In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_classic.chat_models import init_chat_model
from langchain_core.prompts import PromptTemplate
from langchain_core.documents import Document
from langchain_core.output_parsers import StrOutputParser

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
loader = TextLoader("langchain_crewai.txt")
documents = loader.load()

splitter = RecursiveCharacterTextSplitter(
    chunk_size= 500,
    chunk_overlap = 50,
    separators = ["\n\n", "\n"," ",""]
)

docs = splitter.split_documents(documents)

In [5]:
from langchain_community.vectorstores import FAISS
from langchain_huggingface import HuggingFaceEmbeddings

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vector_store = FAISS.from_documents(docs, embedding=embedding_model)

In [6]:
retriever = vector_store.as_retriever(
    search_type ="mmr",
    search_kwargs= {"k":5}
)

retriever

VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001DBCF8620D0>, search_type='mmr', search_kwargs={'k': 5})

In [7]:
query_expansion_prompt = PromptTemplate.from_template(
"""
You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.

Original Query: "{query}"

Expanded query:
"""
)

llm= init_chat_model("groq:openai/gpt-oss-20b")

query_expansion_chain = query_expansion_prompt | llm | StrOutputParser()

query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.\n\nOriginal Query: "{query}"\n\nExpanded query:\n')
| ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x000001DBF3867AD0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000001DBF3ABC390>, model_name='openai/gpt-oss-20b', model_kwargs={}, groq_api_key=SecretStr('**********'))
| StrOutputParser()

In [8]:
print(query_expansion_chain.invoke({"query":"How to analyze a large number of papers quickly"}))

**Expanded Query**

```
("analyze" OR "review" OR "evaluate" OR "scrutinize" OR "assess")
AND
("papers" OR "articles" OR "publications" OR "journals")
AND
("large number" OR "massive set" OR "thousands" OR "hundreds" OR "extensive collection")
AND
("quickly" OR "rapidly" OR "efficiently" OR "fast" OR "in short time")
AND
("text mining" OR "natural language processing" OR "machine learning" OR
 "topic modeling" OR "LDA" OR "BERT" OR "transformer" OR "embedding" OR
 "semantic analysis" OR "document clustering" OR "document classification" OR
 "automatic summarization" OR "citation analysis" OR "bibliometric analysis" OR
 "knowledge graph" OR "semantic web" OR "ontology mapping" OR
 "information retrieval" OR "text classification" OR "topic detection" OR
 "abstract summarization" OR "keyword extraction" OR "text similarity" OR
 "clustering algorithms" OR "scikit-learn" OR "spaCy" OR "gensim" OR "NLTK" OR
 "TensorFlow" OR "PyTorch" OR "Apache Spark" OR "Hadoop")
```

This Boolean expressio

In [13]:
from langchain_core.runnables import RunnableMap
from langchain_classic.chains.combine_documents import create_stuff_documents_chain

qa_prompt = PromptTemplate.from_template(
"""
Answer the question based on the context provided. Stick to the context and do not add information outside it in your answers

Context:
{context}

Question: {input}
"""
)

document_chain = create_stuff_documents_chain(llm=llm, prompt = qa_prompt)

rag_pipeline= (
    RunnableMap({
        "input": lambda x:x["input"],
        "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
    })
    | document_chain
)

In [14]:
# Run Query
query = {"input":"How to analyze a large number of scientific papers quickly"}
print(query_expansion_chain.invoke({"query": query}))
response = rag_pipeline.invoke(query)

print(f"Answer:\n{response}")

**Expanded query:**

"How to rapidly analyze a large volume of scientific papers, research articles, or academic publications efficiently using automated text mining, natural language processing (NLP), machine learning, deep learning, topic modeling, clustering, bibliometric analysis, citation‑network analysis, and AI‑driven summarization for fast systematic reviews, rapid evidence synthesis, and meta‑analyses (paper triage, high‑throughput literature screening, rapid review methodology, evidence mapping, automated systematic review tools, big‑data literature analysis, computational literature review, and research synthesis techniques)."
Answer:
**How to analyze a large number of scientific papers quickly**

1. **Use LangChain to extract and summarize**  
   * Run LangChain on the entire dataset to pull out key information (methods, results, citations).  
   * Let LangChain generate concise summaries for each paper, so you can skim the main points at a glance.

2. **Answer specific que

In [None]:
query = {"input":"Crew AI Agents"}
print(query_expansion_chain.invoke({"query": query}))
response = rag_pipeline.invoke(query)

print(f"Answer:\n{response}")

**Expanded Query (to boost document retrieval for “Crew AI Agents”):**

```
(
  "Crew AI Agents" OR
  "AI crew management" OR
  "AI crew scheduling" OR
  "AI crew optimization" OR
  "AI crew automation" OR
  "AI crew resource planning" OR
  "AI crew coordination" OR
  "AI crew assistant" OR
  "AI crew system" OR
  "AI‑driven crew management" OR
  "AI‑based crew operations" OR
  "AI crew tools" OR
  "AI crew platform" OR
  "AI crew simulation" OR
  "AI crew integration" OR
  "AI crew algorithms" OR
  "AI crew agent" OR
  "AI autonomous agent for crew" OR
  "intelligent agent for crew" OR
  "AI agent for crew management" OR
  "crew AI agent system" OR
  "crew AI agent architecture" OR
  "crew AI agent deployment"
)
AND (
  "aircraft crew" OR
  "flight crew" OR
  "pilot crew" OR
  "maritime crew" OR
  "ship crew" OR
  "space crew" OR
  "crew members" OR
  "crew operations"
)
AND (
  "human‑AI collaboration" OR
  "AI‑human teamwork" OR
  "human‑in‑the‑loop" OR
  "AI assistance for crew"
)
