## **LLM-Powered Multi-URL Information Retrival Chatbot**

In [None]:

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_groq import ChatGroq

from langchain_community.document_loaders import UnstructuredURLLoader
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain.chains import create_history_aware_retriever, create_retrieval_chain
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.runnables import RunnableWithMessageHistory

from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.chat_history import BaseChatMessageHistory

from pydantic import BaseModel
from typing import List, Optional
import joblib


In [2]:
# Load Environment Variables and Set API Keys

import os
from dotenv import load_dotenv

load_dotenv()
os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

groq_api_key = os.getenv("GROQ_API_KEY")


In [None]:
def create_vectorstore(urls: List[Optional[str]]) -> bool:
    """
    Given a list of URLs, process these URLs to create a vector store using HuggingFace-Embeddings and FAISS-DB.
    """
    try:
        loader = UnstructuredURLLoader(urls=urls)
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(
            separators = [
                "\n### ",     # Markdown-style or section headers
                "\n## ",
                "\n# ",
                "\n\n",       # Paragraphs
                "\n",         # Newlines
            ],
            chunk_size=1000, chunk_overlap=200
        )
        doc_splits = text_splitter.split_documents(docs)

        hf_embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vector_store = FAISS.from_documents(
            documents = doc_splits,
            embedding = hf_embeddings
        )

        with open("VectorStoreDB/faiss_vectorstore.joblib", "wb") as index_file:
            joblib.dump(vector_store, index_file)

    except Exception as e:
        print(f"Error in creating vectorstore: {e}")
        return False
    
    return True

In [None]:
def create_rag_pipeline(get_session_history: callable):
    """
    Create a retrieval chain using the vector store.
    """
    if os.path.exists("VectorStoreDB/faiss_vectorstore.joblib"):
        with open("VectorStoreDB/faiss_vectorstore.joblib", "rb") as file:
            vector_store = joblib.load(file)

    retriever = vector_store.as_retriever()

    ## LLM-model
    llm = ChatGroq(model="llama-3.3-70b-versatile", groq_api_key=groq_api_key)


    contextualize_q_prompt = """
            Given the chat history and the latest user question, reformulate the current question into a standalone version that can be understood without the previous context.

            Use the chat history only to clarify references (like “it”, “this function”, or “that library”).
            Do not answer the question.

            ---
            ### Chat History:
            {chat_history}
            ---
            ### Latest User Question:
            {input}
            ---

            ### Reformulated Standalone Question

    """
    contextualize_q_with_history_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_prompt),
            MessagesPlaceholder("chat_history"),
            ("human", "{input}")
        ]
    )

    history_aware_retriever = create_history_aware_retriever(llm, retriever, contextualize_q_with_history_prompt)

    qa_system_prompt = """
            You are an advanced AI documentation assistant designed to help users by answering queries using 
            the provided context from multiple documentation sources.

            Your primary goal is to deliver technically accurate, concise, and complete answers **strictly grounded in the retrieved context** and chat history.
            Do not hallucinate, assume, or fabricate information that is not supported by the given documentation.

            ---
            ### Context Information:
            {context}
            ---
            ### Chat History:
            {chat_history}
            ---
            ### User Query:
            {input}
            ---

            ### Response Guidelines:
            1. **Grounding:** Use only the information provided in the context to answer the question.  
            2. **Missing Info:** If the answer cannot be fully determined from the context, respond exactly with:  
            > "The provided documentation does not contain enough information to answer that precisely."
            3. **Clarity:** Write in a clear, developer-friendly tone. Avoid unnecessary repetition or overly generic statements.  
            4. **Structure:**  
                - Use Markdown formatting.  
                - Include **headings**, **bullet points**, and **code blocks** where appropriate.  
                - When multiple documents support the answer, synthesize them into a cohesive explanation.  
            5. **Source Attribution (if available):** At the end, list document names or URLs from which the information was derived.  
            6. **Consistency:** Maintain conversation context and continuity with previous answers (from chat history).  
            7. **Style:** Be factual, concise, and instructional — as if you were a senior developer or API mentor.

            ---

            ### Response Format:
            **Answer:**
            (Provide a clear explanation or step-by-step guide.)

            **Example (if applicable):**
            ```python
            # relevant code sample or command

    """

    qa_system_prompt_template = ChatPromptTemplate.from_messages(
        [
            ("system", qa_system_prompt),
            ("human", "{input}")
        ]
    )

    parser = StrOutputParser()

    qa_chain = qa_system_prompt_template | llm | parser

    rag_chain = create_retrieval_chain(
        history_aware_retriever,
        qa_chain
    )

    conversational_rag_chain = RunnableWithMessageHistory(
        rag_chain,
        get_session_history,
        input_messages_key="input",
        history_messages_key="chat_history",
        output_messages_key="answer"
    )

    return conversational_rag_chain


In [5]:
## FastAPI app

from fastapi import FastAPI
import uvicorn

app = FastAPI(
        title="LLM-Powered Multi-URL RAG Chatbot",
        version="1.0",
        description="A RAG chatbot using multiple URLs as context"
)

In [6]:
# basic endpoint
@app.get("/")
async def root():
    return {"message": "RAG Chatbot Backend is running successfully!"}


In [None]:

class URLsRequest(BaseModel):
    urls : List[Optional[str]]


# API Endpoint to process URLs and create vectorstore
@app.post("/process_urls")
async def process_urls(request: URLsRequest):
    """
    API endpoint to process a list of URLs and create a vectorstore.
    """
    urls = request.urls
    is_created = create_vectorstore(urls)
    if is_created:
        return {"status": True,
                "message": "Vectorstore created successfully from the provided URLs."}
    else:
        return {"status": False,
                "message": "Failed to create vectorstore. Please check the URLs and try again."}

In [None]:
class ChatResponseRequest(BaseModel):
    session_id : str
    user_query : str


# API Endpoint to get chat response   
@app.post("/chat_response")
async def chat_response(request: ChatResponseRequest):
    """
    API endpoint to get a chat response for a given user query and session ID.
    """

    def get_session_history(session_id: str) -> BaseChatMessageHistory:
        """Get the chat message history for a given session ID."""
        if not hasattr(app.state, "store"):
            app.state.store = {}
        if session_id not in app.state.store:
            app.state.store[session_id] = ChatMessageHistory()
        return app.state.store[session_id]
    
    # Extracting session_id and user_query from the JSON-request
    session_id = request.session_id
    user_query = request.user_query

    rag_chain = create_rag_pipeline(
        get_session_history=get_session_history
    )

    response = rag_chain.invoke(
        {"input": user_query},
        config = {
            "configurable": {"session_id": session_id}
        }
    )

    return response


#### To Run FastAPI server in the Notebook 
We use `Thread`

In [None]:
import nest_asyncio
from threading import Thread

# Allow nested event loops (Jupyter already runs one)
nest_asyncio.apply()

def run_app():
    uvicorn.run(app, host="127.0.0.1", port=8000)

# Start the FastAPI server
thread = Thread(target=run_app, daemon=True)
thread.start()

INFO:     Started server process [4560]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)


INFO:     127.0.0.1:54288 - "GET / HTTP/1.1" 200 OK
INFO:     127.0.0.1:54288 - "GET /favicon.ico HTTP/1.1" 404 Not Found
INFO:     127.0.0.1:64418 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:64418 - "GET /openapi.json HTTP/1.1" 200 OK


  from .autonotebook import tqdm as notebook_tqdm


INFO:     127.0.0.1:60956 - "POST /process_urls HTTP/1.1" 200 OK
INFO:     127.0.0.1:60987 - "POST /chat_response?session_id=abc123&user_query=What%20is%20an%20Array%20in%20Numpy%20%3F HTTP/1.1" 200 OK
INFO:     127.0.0.1:56516 - "POST /chat_response?session_id=abc123&user_query=Why%20to%20use%20Numpy%20and%20give%20me%20its%20applications%20%3F%3F HTTP/1.1" 200 OK
INFO:     127.0.0.1:55028 - "POST /chat_response?session_id=abc123&user_query=What%20are%20the%20previous%20questions%20i%20asked%20about%20numpy%20%3F HTTP/1.1" 200 OK
INFO:     127.0.0.1:53526 - "POST /chat_response?session_id=abc123&user_query=Explain%20the%20Series%20Data%20structure%20in%20pandas HTTP/1.1" 200 OK
INFO:     127.0.0.1:51537 - "GET /docs HTTP/1.1" 200 OK
INFO:     127.0.0.1:51537 - "GET /openapi.json HTTP/1.1" 200 OK
