In [1]:
%pip install -qU "langchain>=0.2" "langchain-core>=0.2" "langchain-community>=0.2" \
               "langchain-openai>=0.1" langchainhub faiss-cpu langchain-text-splitters \
               tavily-python gradio

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:

import os, getpass, warnings, sys

warnings.filterwarnings("ignore")

def ensure_env(key: str, prompt: str):
    if not os.getenv(key):
        try:
            val = getpass.getpass(prompt)
        except Exception:
            # Fallback for environments without stdin (e.g. some hosted notebooks)
            val = ""
        if val:
            os.environ[key] = val

# --- Required for LLM & embeddings ---
ensure_env("OPENAI_API_KEY", "Enter your OpenAI API Key (skipped if already set): ")


LangSmith tracing not enabled (no LANGCHAIN_API_KEY). Proceeding without tracing.
No TAVILY_API_KEY found. Agent will run without web search tool (retriever-only).


In [7]:

from typing import List
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains import create_retrieval_chain
from langchain_classic.prompts import ChatPromptTemplate
from langchain_classic.schema import Document

def build_docs() -> List[Document]:
    urls = [
        "https://www.nytimes.com/",
        "https://www.washingtonpost.com/",
        "https://www.usatoday.com/"
    ]
    try:
        loader = WebBaseLoader(urls)
        docs = loader.load()
        if not docs:
            raise ValueError("Web loader returned no docs.")
        print(f"Loaded {len(docs)} documents from the web.")
        return docs
    except Exception as e:
        print("Web load failed, using fallback docs. Reason:", e)
        text = (
            "LangSmith is a platform for observing, evaluating, and debugging LLM applications. "
            "It provides tracing, dataset management, evaluations, and experiment tracking to help teams ship reliable LLM features."
        )
        return [
            Document(page_content=text, metadata={"source": "local_fallback", "title": "LangSmith Overview (Local)"})
        ]

raw_docs = build_docs()

splitter = RecursiveCharacterTextSplitter(chunk_size=1200, chunk_overlap=150)
chunks = splitter.split_documents(raw_docs)
print(f"Split into {len(chunks)} chunks.")

embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(chunks, embeddings)
retriever = vectorstore.as_retriever(search_kwargs={"k": 4})

rag_prompt = ChatPromptTemplate.from_template(
    "You are a precise assistant. Use the provided context to answer the question.\n"
    "If the answer is not in the context, say you don't know.\n\n"
    "Question: {input}\n\n"
    "Context:\n{context}\n\n"
    "Answer in 3 concise bullet points and include citations when possible."
)

document_chain = create_stuff_documents_chain(llm, rag_prompt)
retrieval_chain = create_retrieval_chain(retriever, document_chain)

q = "Name the most recent happening in USA, and explain briefly about the issue."
result = retrieval_chain.invoke({"input": q})

print("Q:", q)
print("\nRAG Answer:\n", result["answer"])

# Show top sources explicitly
print("\nTop sources:")
for d in retriever.invoke(q)[:4]:
    print("-", d.metadata.get("source", ""), d.metadata.get("title", ""))


Web load failed, using fallback docs. Reason: No module named 'bs4'
Split into 1 chunks.
Q: Name the most recent happening in USA, and explain briefly about the issue.

RAG Answer:
 - The most recent happening in the USA is the ongoing debate over voting rights legislation.
- The issue revolves around proposed voting restrictions in various states, with Democrats pushing for federal legislation to expand voting access and Republicans advocating for stricter voting laws.
- This debate has intensified following the 2020 presidential election and continues to be a major point of contention in American politics. (Source: NPR - https://www.npr.org/2021/06/22/1009146551/what-is-the-for-the-people-act-and-why-is-it-so-divisive)

Top sources:
- local_fallback LangSmith Overview (Local)
