In [2]:
import os
import pickle
import gradio as gr
from langchain.chains import RetrievalQA
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredURLLoader
from langchain.vectorstores import FAISS
from langchain.prompts import PromptTemplate
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import HuggingFacePipeline
from transformers import pipeline
import torch

FAISS_FILE = "faiss_store.pkl"

# Load model pipeline
device = 0 if torch.cuda.is_available() else -1
hf_pipeline = pipeline(
    "text2text-generation",
    model="MBZUAI/LaMini-Flan-T5-783M",
    device=device,
    max_new_tokens=256,
    do_sample=False,
    temperature=0
)
llm = HuggingFacePipeline(pipeline=hf_pipeline)

# Prompt template
custom_prompt = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a helpful assistant. Answer the question strictly using the context. If the answer is not about the exact car model mentioned in the question, say "I don't know".

Context: {context}

Question: {question}
Answer:
"""
)

# Function with progress bar
def process_urls(url1, url2, url3, progress=gr.Progress()):
    urls = [url1, url2, url3]
    loader = UnstructuredURLLoader(urls=urls)

    progress(0.1, desc="📥 Loading articles...")
    docs = loader.load()

    def clean_text(text):
        import re
        return re.sub(r"(Remove Ad|Story continues below.*?|Reuters|Advertisement)", "", text, flags=re.IGNORECASE)

    for doc in docs:
        doc.page_content = clean_text(doc.page_content)

    progress(0.4, desc="🔍 Splitting documents...")
    splitter = RecursiveCharacterTextSplitter(chunk_size=400, chunk_overlap=80)
    splits = splitter.split_documents(docs)

    progress(0.6, desc="📐 Generating embeddings...")
    embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vectorstore = FAISS.from_documents(splits, embedding)

    progress(0.9, desc="💾 Saving vector store...")
    with open(FAISS_FILE, "wb") as f:
        pickle.dump(vectorstore, f)

    progress(1.0, desc="✅ Done!")
    return "✅ URLs processed and saved successfully!"

# QA function
def answer_query(question):
    if not os.path.exists(FAISS_FILE):
        return "❌ Please process URLs first.", ""

    with open(FAISS_FILE, "rb") as f:
        vectorstore = pickle.load(f)

    retriever = vectorstore.as_retriever(search_type="similarity", k=3)
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        retriever=retriever,
        return_source_documents=True,
        chain_type_kwargs={"prompt": custom_prompt}
    )

    result = qa_chain.invoke({"query": question})
    answer = result["result"]
    sources = "\n".join(set(doc.metadata["source"] for doc in result["source_documents"]))
    return answer, sources

# Gradio UI
with gr.Blocks(theme=gr.themes.Default(primary_hue="blue")) as demo:
    gr.Markdown("""
        <div style='
        text-align: center;
        padding: 2rem 1rem;
        background: linear-gradient(135deg, #eaf4ff, #fdfdff);
        border-radius: 16px;
        margin-bottom: 20px;
        box-shadow: 0 8px 16px rgba(0,0,0,0.05);
        '>
        <h1 style='
            font-size: 2.5rem;
            margin-bottom: 0.5rem;
            color: #1f3b4d;
        '>📚 SmortBot</h1>
        
        <p style='
            font-size: 1.1rem;
            max-width: 800px;
            margin: 0 auto;
            color: #333;
        '>
            Instantly understand news articles by asking questions. Paste URLs and ask away — your AI analyst has got you covered!
        </p>
        </div>
        """)


    with gr.Tab("🌐 Step 1: Process Article URLs"):
        with gr.Row():
            with gr.Column():
                url1 = gr.Textbox(label="News URL 1", placeholder="Paste article link...", lines=1)
                url2 = gr.Textbox(label="News URL 2", placeholder="Paste article link...", lines=1)
                url3 = gr.Textbox(label="News URL 3", placeholder="Paste article link...", lines=1)
                process_btn = gr.Button("🚀 Process Articles")
                process_status = gr.Textbox(label="Status", interactive=False)

        process_btn.click(process_urls, inputs=[url1, url2, url3], outputs=process_status)

    with gr.Tab("🤖 Step 2: Ask Your Question"):
        with gr.Row():
            question = gr.Textbox(label="Ask something from the articles", placeholder="Any question related to the article", lines=2)
        with gr.Row():
            answer = gr.Textbox(label="SmortBot's Answer", lines=4, interactive=False)
        with gr.Row():
            sources = gr.Textbox(label="Sources", lines=3, interactive=False)
        ask_btn = gr.Button("💬 Get Answer")
        ask_btn.click(answer_query, inputs=question, outputs=[answer, sources])

demo.launch()




* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.




Error fetching or processing , exception: Exactly one of file, filename and url must be specified.
Error fetching or processing , exception: Exactly one of file, filename and url must be specified.
