# WorkFlow 1: Load WebSite content into Vector DB

1.   Use LangChain LCEL
2.  Prompting + LCEL + Output Parser
3.  RAG (build once, re-use) with sources




In [None]:
#Map Google Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
%pip install --upgrade jupyter-client

In [None]:
%pip install -qU \
    "requests" \
    "langchain" \
    "langchain-openai" \
    "langchain-community" \
    "langchain-text-splitters" \
    beautifulsoup4 lxml faiss-cpu langchainhub tavily-python "gradio"

In [None]:
import importlib
def _ver(name):
    try:
        m = importlib.import_module(name)
        return getattr(m, "__version__", "n/a")
    except Exception as e:
        return f"not installed ({e})"
print("langchain           :", _ver("langchain"))
print("langgraph           :", _ver("langgraph"))
print("langchain-core      :", _ver("langchain_core"))
print("langchain-community :", _ver("langchain_community"))
print("langchain-openai    :", _ver("langchain_openai"))
print("langchainhub        :", _ver("langchainhub"))
print("langchain-text-splitters:", _ver("langchain_text_splitters"))
print("faiss-cpu           :", _ver("faiss"))
print("tavily-python       :", _ver("tavily"))

In [None]:
import os
from dotenv import load_dotenv
load_dotenv()
api_key = os.getenv("OPENAI_API_KEY")

# Load URL content

In [None]:
# RAG (v1): Web loader → splitter → FAISS → retriever → LCEL chain
#import os
#os.environ.setdefault("USER_AGENT", "IK-LangChain-RAG/1.0 (contact: ops@your-org)")  # fixes the warning

from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings, ChatOpenAI
from langchain_community.vectorstores import FAISS

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser

# 1) Load docs (pick any public pages you want indexed)
urls = [
    "https://python.langchain.com/docs/get_started/introduction/",
    "https://docs.smith.langchain.com/"
]
loader = WebBaseLoader(urls)
docs = loader.load()
print(docs)

#Chunking
Use LangChain Recursive Text Splitter for Chunking

In [None]:
# 2) Chunk using LangChain recursive splitter
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=200)
chunks = splitter.split_documents(docs)

print(chunks)


#Embeddings
Generate Open AI embeddings, store in in-memory database and create retreiver object for similarity search

In [None]:
# 3) Embed & index
emb = OpenAIEmbeddings()  # uses OPENAI_API_KEY from env
#vector database chromadb, FAISS , OPENSERACH ,
vs = FAISS.from_documents(chunks, emb)
retriever = vs.as_retriever(search_kwargs={"k": 4}) #return 4 semantically close results
print(retriever)

In [None]:
from langchain_core.prompts import ChatPromptTemplate;

prompt = ChatPromptTemplate.from_messages([
    ("system",
     "You are a precise assistant. Use the provided CONTEXT to answer.\n"
     "If the answer isn't in the context, say you don't know.\n\nCONTEXT:\n{context}"),
    ("human", "{question}")
])

#LLM Call
Query LLM using LCEL pipeline.

In [None]:
llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)

def format_docs(docs):
    return "\n\n".join(d.page_content for d in docs)

# 5) LCEL pipeline: {question} flows through; {context} is produced by retriever
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

# 6) Try it
rag_chain.invoke("What is LangSmith and how does it relate to LangChain?")

In [None]:
# @title
import gradio as gr
from langchain_core.messages import AIMessage

def _final_text(res):
    if isinstance(res, AIMessage):
        return res.content or ""
    if isinstance(res, dict) and "messages" in res:
        for m in reversed(res["messages"]):
            if isinstance(m, AIMessage) or getattr(m, "type", "") == "ai":
                return getattr(m, "content", "") or ""
    return str(res)

def _to_messages(history, message):
    msgs = []
    for u, a in history:
        if u: msgs.append({"role": "user", "content": u})
        if a: msgs.append({"role": "assistant", "content": a})
    msgs.append({"role": "user", "content": message})
    return msgs

def _ensure_agent():
    global agent
    try:
        agent
        return agent
    except NameError:
        from langchain_openai import ChatOpenAI
        from langchain.agents import create_agent  # Changed import
        from langchain_core.tools import tool

        @tool
        def add(a: float, b: float) -> float:
            "Add two numbers."
            return a + b

        llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
        agent = create_agent(llm, [add], system_prompt="You are helpful.")  # Changed parameter
        return agent

def chat_fn(message, history):
    try:
        ag = _ensure_agent()
        msgs = _to_messages(history, message)
        res = ag.invoke({"messages": msgs})
        return _final_text(res)
    except Exception as e:
        return f"Error: {e}"

try:
    demo.close()
except Exception:
    pass

with gr.Blocks() as demo:
    gr.Markdown("# LangChain Agent Chat")
    gr.Markdown("Ask about your KB (kb_search) or general queries. Web search only if TAVILY_API_KEY is set.")
    gr.ChatInterface(chat_fn)
    gr.Markdown('Tip: Try "Where are tracing docs?" or "Multiply 3.5 and 4."')  # Fixed quotes

demo.launch(share=False)