In [1]:
from langchain_community.document_loaders import WebBaseLoader
import bs4

loader = WebBaseLoader(web_paths = ('https://en.wikipedia.org/wiki/LangChain',),
                       bs_kwargs= dict(parse_only=bs4.SoupStrainer(
                           class_ =("mw-body-content", "mw-first-heading")
                       )))

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:

webcontent = loader.load()


In [3]:

from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap= 200)

doc = text_splitter.split_documents(webcontent)



In [4]:
from langchain_community.embeddings import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2"
)


  embeddings = HuggingFaceEmbeddings(


In [5]:
from langchain_community.vectorstores import FAISS
db1 = FAISS.from_documents(
    documents=doc[:50],
    embedding=embeddings
)

In [8]:
from langchain_community.vectorstores import Chroma

db2 = Chroma.from_documents(
    documents=doc,
    embedding=embeddings,
    persist_directory="./chroma_db"
)
db2.persist()

In [9]:
retriever1 = db1.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 4}
)


In [10]:
retriever2 = db2.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 4}
)


In [13]:
query = "What is langchain?"

retrieved_docs = retriever1.invoke(query)

context1 = "\n\n".join(doc.page_content for doc in retrieved_docs)

context1

'History[edit]\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. In April 2023, LangChain had incorporated and the new startup raised over $20 million in funding at a valuation of at least $200 million from venture firm Sequoia Capital, a week after announcing a $10 million seed investment from Benchmark.[3][4]\nIn the third quarter of 2023, the LangChain Expression Language (LCEL) was introduced, which provides a declarative way to define chains of actions.[5][6]\nIn October 2023 LangChain introduced LangServe, a deployment tool to host LCEL code as a production-ready API.[7]\n\nLanguage model application development framework\nLangChainDeveloperHarrison ChaseInitial releaseOctober 2022Stable release0.1.16[1]\n   / 11 April 2024; 21 months ago\xa0(11 April 2024)\nRepositorygithub.com/langchain-ai/langchainWritten inPython and JavaScriptTypeSoftware framework for large language model appli

In [14]:
query = "What is langchain?"

retrieved_docs = retriever2.invoke(query)

context2 = "\n\n".join(doc.page_content for doc in retrieved_docs)

context2


'History[edit]\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. In April 2023, LangChain had incorporated and the new startup raised over $20 million in funding at a valuation of at least $200 million from venture firm Sequoia Capital, a week after announcing a $10 million seed investment from Benchmark.[3][4]\nIn the third quarter of 2023, the LangChain Expression Language (LCEL) was introduced, which provides a declarative way to define chains of actions.[5][6]\nIn October 2023 LangChain introduced LangServe, a deployment tool to host LCEL code as a production-ready API.[7]\n\nHistory[edit]\nLangChain was launched in October 2022 as an open source project by Harrison Chase, while working at machine learning startup Robust Intelligence. In April 2023, LangChain had incorporated and the new startup raised over $20 million in funding at a valuation of at least $200 million from venture fir

In [None]:
# Using Azure OpenAI LLM

In [None]:
from langchain_openai import AzureChatOpenAI

llm = AzureChatOpenAI(
    prompt="",
    azure_deployment="gpt-4.1",  # your Azure deployment name
    api_version="",
    temperature=0,
    azure_endpoint="",
    api_key=""
)


In [34]:
from langchain.prompts import PromptTemplate

rag_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are an assistant answering questions strictly using the provided context.

If the answer is not present in the context, say:
"I don't know based on the provided context."

Context:
{context}

Question:
{question}

Answer:
"""
)


In [None]:
from langchain.chains import RetrievalQA

qa_chain1 = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever1,
    chain_type="stuff",# this is for multidocuments
    chain_type_kwargs={
        "prompt": rag_prompt
    },
    return_source_documents=True
)
qa_chain2 = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever2,
    chain_type="stuff",
    chain_type_kwargs={
        "prompt": rag_prompt
    },
    return_source_documents=True
)


In [38]:
query = "Who won the IPL 2025?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


query = "What is Langchain?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


Answer:
 I don't know based on the provided context.
Answer:
 LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.


In [39]:
query = "Who won the IPL 2025?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


query = "What is Langchain?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


Answer:
 I don't know based on the provided context.
Answer:
 LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications. As a language model integration framework, LangChain's use-cases largely overlap with those of language models in general, including document analysis and summarization, chatbots, and code analysis.


In [40]:
# Using Ollama LLM

In [41]:
from langchain_community.chat_models import ChatOllama

llm = ChatOllama(
    model="llama3",
    temperature=0
)


  llm = ChatOllama(


In [43]:
from langchain.chains import RetrievalQA

qa_chain1 = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever1,
    chain_type="stuff",# this is for multidocuments
    chain_type_kwargs={
        "prompt": rag_prompt
    },
    return_source_documents=True
)
qa_chain2 = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=retriever2,
    chain_type="stuff",
    chain_type_kwargs={
        "prompt": rag_prompt
    },
    return_source_documents=True
)


In [44]:
query = "Who won the IPL 2025?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


query = "What is Langchain?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


Answer:
 I don't know based on the provided context. The context only talks about Langchain-ai, OpenAI, and related topics, but does not mention the Indian Premier League (IPL) or any information about its winners in 2025.
Answer:
 According to the provided context, LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications.


In [46]:
query = "Who won the IPL 2025?"
result = qa_chain1(query)

print("Answer:\n", result["result"])


query = "What is Langchain?" 
result = qa_chain1(query)

print("Answer:\n", result["result"])


Answer:
 I don't know based on the provided context. The context only talks about Langchain-ai, OpenAI, and related topics, but does not mention the Indian Premier League (IPL) or any information about its winners in 2025.
Answer:
 According to the provided context, LangChain is a software framework that helps facilitate the integration of large language models (LLMs) into applications.
