In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap

In [3]:
## step1 : Load and split the dataset
loader = TextLoader("langchain_crewai_dataset.txt")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)


In [4]:
### step 2: Vector Store
embedding_model=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore=FAISS.from_documents(chunks,embedding_model)

## step 3:MMR Retriever
retriever=vectorstore.as_retriever(search_type="mmr",search_kwargs={"k":5})
retriever


  from .autonotebook import tqdm as notebook_tqdm


VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x3090f3a70>, search_type='mmr', search_kwargs={'k': 5})

In [5]:
## step 4 : LLM and Prompt

import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

llm=init_chat_model("openai:o4-mini")
llm


ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x32147c6b0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x328274d10>, root_client=<openai.OpenAI object at 0x3101931d0>, root_async_client=<openai.AsyncOpenAI object at 0x32147c9e0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [6]:
# Query expansion
query_expansion_prompt = PromptTemplate.from_template("""
You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.

Original query: "{query}"

Expanded query:
""")

query_expansion_chain=query_expansion_prompt| llm | StrOutputParser()
query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.\n\nOriginal query: "{query}"\n\nExpanded query:\n')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x32147c6b0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x328274d10>, root_client=<openai.OpenAI object at 0x3101931d0>, root_async_client=<openai.AsyncOpenAI object at 0x32147c9e0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [7]:
query_expansion_chain.invoke({"query":"Langchain memory"})

'Expanded query:\n\n“LangChain memory” OR “Lang Chain memory” OR “LangChain memory modules” OR “LangChain memory management” OR “LangChain conversation memory” OR “persistent memory in LangChain” OR “session state management” OR “buffer memory” OR “summary memory” OR “retrospective memory” OR “context retention” OR “stateful agent memory” OR “chat history storage” OR “memory retriever” OR “VectorStoreMemory” OR “RedisMemory” OR “InMemoryMemory” OR “SQLMemory” OR “embeddings cache” OR “vector embeddings” OR “RAG (retrieval-augmented generation)” OR “prompt context window” OR “memory plugin”'

In [9]:
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

# Define your prompt
prompt = PromptTemplate.from_template("""
Answer the question based on context below.
Context: {context}
Question: {input}
""")

# Initialize the LLM
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.4)

llm


ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x3282a5fd0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x328277980>, root_client=<openai.OpenAI object at 0x3282a72c0>, root_async_client=<openai.AsyncOpenAI object at 0x3282a7350>, temperature=0.4, model_kwargs={}, openai_api_key=SecretStr('**********'))

In [10]:
# RAG answering prompt
answer_prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
""")

document_chain=create_stuff_documents_chain(llm=llm,prompt=answer_prompt)

In [11]:
# Step 5: Full RAG pipeline with query expansion
rag_pipeline = (
    RunnableMap({
        "input": lambda x: x["input"],
        "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
    })
    | document_chain
)

In [12]:
# Step 6: Run query
query = {"input": "What types of memory does LangChain support?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\n", response)

Expanded query:
“LangChain memory support, memory types and modules in the LangChain Python framework; include short-term vs. long-term memory, ephemeral vs. persistent memory, ConversationBufferMemory, ConversationSummaryMemory, CombinedMemory, CompressedMemory, VectorStoreRetrieverMemory, RedisMemory, SQLMemory, file-based memory, token-window memory, timeline/multi-session memory, retrieval-augmented memory; context management, conversation state storage, chat history persistence, memory backends and strategies in LangChain.”
✅ Answer:
 LangChain supports memory modules like ConversationBufferMemory and ConversationSummaryMemory.


In [13]:
# Step 6: Run query
query = {"input": "CrewAI agents?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\n", response)

Expanded query:

("CrewAI" OR "Crew AI" OR "Crew-AI") AND (agent OR agents OR assistant OR assistants OR bot OR bots OR “autonomous agent” OR “autonomous agents” OR “intelligent agent” OR “intelligent agents” OR “virtual assistant” OR “virtual assistants” OR “digital agent” OR “digital agents” OR “multi-agent system” OR “agent-based model”) AND (crew management OR crew scheduling OR task allocation OR resource planning OR operations support OR “human-AI teaming” OR “decision support” OR “workload management” OR automation OR orchestration) AND (aviation OR maritime OR space OR “field service” OR “industrial operations” OR “logistics” OR “transportation”) AND (AI OR “artificial intelligence” OR machine learning OR “reinforcement learning” OR “deep learning” OR NLP OR “natural language processing” OR robotics OR “collaborative robotics”)
✅ Answer:
 CrewAI agents are autonomous agents that have defined roles within a structured workflow, such as researcher, planner, or executor. They oper