# Import modules

In [None]:
import os
import getpass
from typing import List
from langchain.chat_models import init_chat_model
from langchain_huggingface import HuggingFaceEmbeddings

# File-type handlers
import fitz  # PyMuPDF
from docx import Document as DocxDocument
from langchain.docstore.document import Document
from langchain_community.docstore.in_memory import InMemoryDocstore
from langchain_community.vectorstores import FAISS



In [None]:
if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")

In [6]:
# Initialize the chat model
# Note: Ensure you have the correct model name and provider
llm = init_chat_model("llama3-8b-8192", model_provider="groq")

In [17]:
# Supported extensions
SUPPORTED_EXTENSIONS = {'.pdf', '.docx', '.txt'}

In [16]:
def load_pdf(path: str) -> List[Document]:
    doc = fitz.open(path)
    texts = [page.get_text() for page in doc]
    return [Document(page_content=text, metadata={"source": path}) for text in texts]

def load_docx(path: str) -> List[Document]:
    doc = DocxDocument(path)
    full_text = "\n".join(p.text for p in doc.paragraphs)
    return [Document(page_content=full_text, metadata={"source": path})]

def load_txt(path: str) -> List[Document]:
    with open(path, 'r', encoding='utf-8') as f:
        text = f.read()
    return [Document(page_content=text, metadata={"source": path})]

def load_file(path: str) -> List[Document]:
    ext = os.path.splitext(path)[-1].lower()
    if ext == '.pdf':
        return load_pdf(path)
    elif ext == '.docx':
        return load_docx(path)
    elif ext == '.txt':
        return load_txt(path)
    else:
        raise ValueError(f"Unsupported file type: {ext}")

In [None]:
def load_documents_from_folder(folder_path: str) -> List[Document]:
    docs = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            full_path = os.path.join(root, file)
            if os.path.splitext(file)[-1].lower() in SUPPORTED_EXTENSIONS:
                try:
                    docs.extend(load_file(full_path))
                except Exception as e:
                    print(f"Error loading {file}: {e}")
    return docs

In [None]:
# import bs4
from langchain import hub
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from langchain.memory import ConversationBufferMemory
from typing_extensions import List, TypedDict
from langchain.chains import RetrievalQA
# from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
import gradio as gr
import tempfile

In [None]:

# --- RAG Build ---
def build_vectorstore(documents: List[Document]):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
    chunks = text_splitter.split_documents(documents)
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    return FAISS.from_documents(chunks, embeddings)

In [None]:
def query_rag(db, query: str) -> str:
    retriever = db.as_retriever(search_kwargs={"k": 3})
    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
    return qa_chain.run(query)

In [None]:
# ----------- Global Variables -----------
vectorstore = None
memory = ConversationBufferMemory(
    memory_key="history",
    input_key="question",
    return_messages=True
)


* Running on local URL:  http://127.0.0.1:7860
* To create a public link, set `share=True` in `launch()`.




Traceback (most recent call last):
  File "c:\Users\faiza\anaconda3\envs\ai_env\Lib\site-packages\gradio\queueing.py", line 626, in process_events
    response = await route_utils.call_process_api(
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<5 lines>...
    )
    ^
  File "c:\Users\faiza\anaconda3\envs\ai_env\Lib\site-packages\gradio\route_utils.py", line 350, in call_process_api
    output = await app.get_blocks().process_api(
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<11 lines>...
    )
    ^
  File "c:\Users\faiza\anaconda3\envs\ai_env\Lib\site-packages\gradio\blocks.py", line 2240, in process_api
    result = await self.call_function(
             ^^^^^^^^^^^^^^^^^^^^^^^^^
    ...<8 lines>...
    )
    ^
  File "c:\Users\faiza\anaconda3\envs\ai_env\Lib\site-packages\gradio\blocks.py", line 1747, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
        fn, *

In [None]:

# ----------- Question Handler -----------
def ques_responses(question, history, system_prompt):
    global vectorstore

    if vectorstore is None:
        return "No documents loaded. Please restart with a valid folder."

    retriever_docs = vectorstore.similarity_search(question, k=3)
    context = "\n".join([doc.page_content for doc in retriever_docs])

    instruction = f"""
        1. If the user is greeting you then greet the user and tell your name.

        2. If user is asking you a question then answer the question = {question}.

        3. If you do not find any answer for any {question}, return "Sorry, I don't know."
        """

    template = """
            {system_prompt}

            {context}

            {instruction}

            Conversation history:
            {history}

            Question: {question}

            Answer:
            """

    prompt = PromptTemplate(
        input_variables=["system_prompt", "context", "instruction", "question", "history"],
        template=template,
    )

    llm = ChatOpenAI(temperature=0, max_tokens=int(token_limit))
    chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

    response = chain.predict(
        question=question,
        context=context,
        instruction=instruction,
        system_prompt=system_prompt,
    )
    return response


In [None]:
# --- Gradio UI ---
# with gr.Blocks() as demo:
#     gr.Markdown("## 🔍 RAG File QA (PDF/DOCX/TXT)")

#     file_input = gr.File(file_types=['.pdf', '.docx', '.txt'], file_count="multiple", label="Upload Files")
#     upload_btn = gr.Button("Load Documents")

#     status = gr.Textbox(label="Status")
#     upload_btn.click(fn=handle_file_upload, inputs=[file_input], outputs=[status])

#     question = gr.Textbox(label="Ask a Question")
#     answer = gr.Textbox(label="Answer", lines=6)
#     ask_btn = gr.Button("Get Answer")

#     ask_btn.click(fn=handle_query, inputs=[question], outputs=[answer])

# demo.launch()