In [None]:
import gradio as gr
import requests
import google.generativeai as genai
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OllamaEmbeddings
from langchain.text_splitter import MarkdownHeaderTextSplitter, RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, AIMessage
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

genai.configure(api_key="AIzaSyDq9fS8HZrox70cm2bKYsQTdypqyPlWU6s")
model = genai.GenerativeModel("gemini-2.0-flash")

chat_history = []

def fetch_and_process_url(user_url):
    """Fetch content from a URL and store in ChromaDB."""
    url = f"https://r.jina.ai/{user_url}"
    response = requests.get(url)
    if response.status_code != 200:
        return "Error fetching URL: " + response.text
    
    markdown_content = response.text
    summary_prompt = f"""Text:
    {markdown_content}
    Generate a detailed summary with headers (using # for each section)."""
    response = model.generate_content(summary_prompt)
    summary_text = response.text
    
    # Split and store in ChromaDB
    splitter = MarkdownHeaderTextSplitter(headers_to_split_on=[("#", "Section")])
    documents = splitter.split_text(summary_text)
    return store_in_chromadb(documents, "URL content processed and stored.")

def extract_text_from_pdf(pdf_file):
    """Extracts text from a PDF and stores embeddings in ChromaDB."""
    if pdf_file is None:
        return "No file uploaded."
    
    loader = PyPDFLoader(pdf_file.name)
    pages = loader.load()
    full_text = "\n".join([page.page_content for page in pages])
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=4000, chunk_overlap=50)
    chunks = text_splitter.split_text(full_text)
    documents = [Document(page_content=chunk) for chunk in chunks]
    return store_in_chromadb(documents, "PDF content processed and stored.")

def store_in_chromadb(documents, message):
    """Stores extracted content in ChromaDB."""
    embedding_function = OllamaEmbeddings(model="llama3.2:1b")
    vectorstore = Chroma(persist_directory="./chroma_db", collection_name="my_collection", embedding_function=embedding_function)
    vectorstore.delete_collection()
    vectorstore = Chroma.from_documents(documents, embedding_function, persist_directory="./chroma_db", collection_name="my_collection")
    return f"✅ {message}"

def chat_with_rag(query, chat_history_ui=None):
    """Handles user chat queries with retrieval from ChromaDB."""
    vectorstore = Chroma(persist_directory="./chroma_db", collection_name="my_collection", embedding_function=OllamaEmbeddings(model="llama3.2:1b"))
    retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 5})
    retrieved_docs = retriever.get_relevant_documents(query)
    
    retrieved_content = "\n\n".join([doc.page_content for doc in retrieved_docs]) if retrieved_docs else "No relevant content found."
    print("Retrieved Context:\n", retrieved_content)  # Print retrieved context
    
    response_prompt = f"""
    Use the following information to answer the question:
    Context:
    {retrieved_content}
    Question: {query}
    Answer:
    """
    response = model.generate_content(response_prompt)
    bot_response = response.text
    chat_history.append({"role": "human", "human": query})
    chat_history.append({"role": "ai", "ai": bot_response})
    
    display_text = f"Retrieved Content:\n{retrieved_content}\n\nResponse:\n{bot_response}"
    return chat_history_ui + [(query, display_text)] if chat_history_ui else [(query, display_text)]

with gr.Blocks() as app:
    gr.Markdown("# 🧠 RAG Chatbot with URL & PDF Support")
    
    with gr.Row():
        url_input = gr.Textbox(label="Enter URL")
        fetch_button = gr.Button("Fetch & Process URL")
    pdf_input = gr.File(label="Upload a PDF")
    process_pdf_button = gr.Button("Process PDF")
    output = gr.Textbox(label="Processing Status")
    
    fetch_button.click(fetch_and_process_url, inputs=[url_input], outputs=[output])
    process_pdf_button.click(extract_text_from_pdf, inputs=[pdf_input], outputs=[output])
    
    gr.Markdown("## 💬 Chat Interface")
    chatbot_ui = gr.Chatbot()
    chat_input = gr.Textbox(placeholder="Type your message...")
    chat_button = gr.Button("Send")
    chat_input.submit(chat_with_rag, inputs=[chat_input, chatbot_ui], outputs=[chatbot_ui])
    chat_button.click(chat_with_rag, inputs=[chat_input, chatbot_ui], outputs=[chatbot_ui])

app.launch()