In [41]:
from dotenv import load_dotenv
from langchain_perplexity import ChatPerplexity

load_dotenv()
llm = ChatPerplexity()

In [42]:
import bs4
from langchain_community.document_loaders import WebBaseLoader #, PyPDFLoader, TextLoader

doc_loader = WebBaseLoader(web_path="https://python.langchain.com/docs/introduction/")
docs = doc_loader.load()

In [43]:
from langchain.text_splitter import RecursiveCharacterTextSplitter #, TextSplitter, CharacterTextSplitter

splitter_object = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunked_docs = splitter_object.split_documents(docs)


In [44]:
from langchain_community.embeddings import OllamaEmbeddings
embedding_model = OllamaEmbeddings(model="nomic-embed-text")

In [45]:
from langchain_community.vectorstores import Chroma #, Cassandra , FAISS

vector_store = Chroma.from_documents(documents=chunked_docs, embedding=embedding_model)

doc_retriever = vector_store.as_retriever()

### Query with similarity search
query = "Tell me about Python"

results = vector_store.similarity_search(query, k=1)

In [46]:
prompt_template = """Answer to the user question from the document context only.
In case of question being out of context reply with: Sorry, cannot answer irrelevant questions that are out of context.

Make sure the answer is helpful to user's query.
\n\n Context: {context}\n\n

\n\n Question: {input}
"""

from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_template(prompt_template)

# user_input = "what is langchain and what do you mean by langchain-ecosystem"

In [47]:
from langchain.chains.combine_documents import create_stuff_documents_chain
document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

# user_input = "what is langchain and what do you mean by langchain-ecosystem"

from langchain.chains.retrieval import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever=doc_retriever, combine_docs_chain=document_chain)

In [48]:
user_input = "what is langchain and what do you mean by langchain-ecosystem"

retrieved_response = retrieval_chain.invoke({"input": user_input})

In [49]:
retrieved_response['answer']

"**LangChain** is a framework that provides tools for building workflows powered by large language models (LLMs). It facilitates tasks such as integrating chat models, managing vector stores (databases that store vector embeddings), and handling documents in an efficient way to create applications leveraging LLMs[1][2][4].\n\nThe **LangChain ecosystem** refers to the collection of components, guides, integrations, and tools around LangChain that help users quickly accomplish common tasks. This includes conceptual guides explaining LangChain's key parts, how-tos on using LangChain-specific features like LangGraph, and integrations with various vector databases and embedding models to build end-to-end LLM-powered applications[Context].\n\nIn summary:\n\n| Term              | Description                                                                                         |\n|-------------------|--------------------------------------------------------------------------------------------

KEY LEARNING NOTES FROM THIS SESSION:

### LangChain — create_stuff_documents_chain

Purpose: ek document-combining chain banata hai jo multiple documents ko ek saath "stuff" (concatenate) karke LLM ko pass karta hai.

Hard rule necessary everytime -- Prompt Requirement:
Jab bhi create_stuff_documents_chain use karte ho, aapka ChatPromptTemplate dono placeholders {input} aur {context} contain karna chahiye.

Placeholders (compulsory for ChatPromptTemplate):
1. input → user ka question/sawal
2. context → retrieved documents ka text (LLM ke liye source of truth)



Example:

llm = ChatOpenAI()       # LLM instance


prompt = ChatPromptTemplate.from_template(       # Prompt with BOTH placeholders
    "You are a helpful assistant.\n"
    "Context: {context}\n\n"
    "Question: {input}\n"
    "Answer:"
)


doc_chain = create_stuff_documents_chain(llm, prompt)      # Create chain

### LangChain - Glue Logic: create_retrieval_chain

Jab aap create_retrieval_chain(retriever, doc_chain) banate ho → LangChain internally ek mapping function banata hai jo retriever output ko "context" key me daalta hai.

Aapko kabhi manually {context} pass nahi karna padta jis trh {input} ko pass krna pdta hai while invoking.

retrieval_chain internally retriever ke docs ko "context" key me convert karke doc_chain ko feed karta hai.

### 