In [1]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chat_models import init_chat_model
from langchain.prompts import PromptTemplate
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains.retrieval import create_retrieval_chain
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableMap

In [2]:
## step1 : Load and split the dataset
loader = TextLoader("langchain_crewai_dataset.txt")
raw_docs = loader.load()
splitter = RecursiveCharacterTextSplitter(chunk_size=300, chunk_overlap=50)
chunks = splitter.split_documents(raw_docs)


In [3]:
len(chunks)

241

In [4]:
### step 2: Vector Store
embedding_model=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectorstore=FAISS.from_documents(chunks,embedding_model)

  from .autonotebook import tqdm as notebook_tqdm


In [5]:
## step 3:MMR Retriever
retriever=vectorstore.as_retriever(search_type="mmr",search_kwargs={"k":5})
retriever


VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001741EEBFB60>, search_type='mmr', search_kwargs={'k': 5})

In [6]:
## step 4 : LLM and Prompt

import os
from dotenv import load_dotenv
load_dotenv()

os.environ["OPENAI_API_KEY"]=os.getenv("OPENAI_API_KEY")

llm=init_chat_model("openai:o4-mini")
llm


ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000017444AAD7F0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000017444AAE270>, root_client=<openai.OpenAI object at 0x0000017444826E40>, root_async_client=<openai.AsyncOpenAI object at 0x0000017444AADFD0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))

In [7]:
# Query expansion
query_expansion_prompt = PromptTemplate.from_template("""
You are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.

Original query: "{query}"

Expanded query:
""")

query_expansion_chain=query_expansion_prompt| llm | StrOutputParser()
query_expansion_chain

PromptTemplate(input_variables=['query'], input_types={}, partial_variables={}, template='\nYou are a helpful assistant. Expand the following query to improve document retrieval by adding relevant synonyms, technical terms, and useful context.\n\nOriginal query: "{query}"\n\nExpanded query:\n')
| ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x0000017444AAD7F0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x0000017444AAE270>, root_client=<openai.OpenAI object at 0x0000017444826E40>, root_async_client=<openai.AsyncOpenAI object at 0x0000017444AADFD0>, model_name='o4-mini', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser()

In [8]:
query_expansion_chain.invoke({"query":"Langchain memory"})

'Expanded query:\n\n“LangChain memory” OR “LangChain memory modules” OR “LangChain context persistence” OR “LangChain state management” OR “conversational AI memory in LangChain” OR “chat history storage” OR “session memory”  \nAND (“ConversationBufferMemory” OR “ConversationSummaryMemory” OR “CombinedMemory” OR “VectorStoreRetrieverMemory” OR “EntityMemory”)  \nAND (“retrieval-augmented generation (RAG)” OR “embedding-based memory retrieval” OR “LLM chain memory” OR “long-term memory”)  \nAND (“Python LangChain library” OR “vector database” OR “Chroma” OR “Pinecone” OR “Weaviate” OR “Redis”)  \nAND (“best practices” OR “architecture” OR “implementation” OR “debugging” OR “performance”)'

In [9]:
# RAG answering prompt
answer_prompt = PromptTemplate.from_template("""
Answer the question based on the context below.

Context:
{context}

Question: {input}
""")

document_chain=create_stuff_documents_chain(llm=llm,prompt=answer_prompt)

In [10]:
# Step 5: Full RAG pipeline with query expansion
rag_pipeline = (
    RunnableMap({
        "input": lambda x: x["input"],
        "context": lambda x: retriever.invoke(query_expansion_chain.invoke({"query": x["input"]}))
    })
    | document_chain
)

In [11]:
# Step 6: Run query
query = {"input": "What types of memory does LangChain support?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\n", response)

Expanded query:  
“What types of memory (e.g. short-term/session memory, long-term/episodic memory, conversation state, chat history storage) does the LangChain framework support? Please include built-in memory modules and connectors such as in-memory buffers, vector stores (FAISS, Chroma, Pinecone, Weaviate, Milvus), database-backed options (Redis, SQLite, PostgreSQL, MongoDB), file-based persistence, custom memory classes, and any plug-and-play or technical terms used in the LangChain docs for memory management and retrieval.”
✅ Answer:
 LangChain today ships with two out-of-the-box memory abstractions:

1. ConversationBufferMemory  
   – Simply keeps the raw chat history in memory.

2. ConversationSummaryMemory  
   – Keeps a running, compressed summary of the dialogue so far (useful once token budgets get tight).


In [12]:
# Step 6: Run query
query = {"input": "CrewAI agents?"}
print(query_expansion_chain.invoke({"query":query}))
response = rag_pipeline.invoke(query)
print("✅ Answer:\n", response)

Expanded query:

(“CrewAI agents” OR “Crew AI agents” OR “AI crew assistants” OR “autonomous crew agents” OR “intelligent crew management agents” OR “AI‐driven crew coordination” OR “crew scheduling AI” OR “autonomous crew support” OR “crew planning AI platforms”)  
AND  
(“multi‐agent systems” OR “autonomous agents” OR “agent‐based modeling” OR “distributed AI” OR “reinforcement learning agents” OR “deep learning” OR “natural language processing” OR “knowledge‐based systems” OR “human–AI collaboration”)  
AND  
(“aviation” OR “maritime” OR “space mission” OR “healthcare” OR “gaming” OR “manufacturing”)  
AND  
(“features” OR “architecture” OR “use cases” OR “deployment” OR “performance metrics” OR “evaluation” OR “benefits” OR “limitations”)
✅ Answer:
 CrewAI agents are individual, LLM-powered “workers” in a multi-agent orchestration framework. Each agent is given a specific role or responsibility—examples include:  
• Researcher: gathers information or data relevant to the task  
• P