In [None]:
!pip install -U langchain langchain-community




In [9]:
from langchain_text_splitters import RecursiveCharacterTextSplitter


In [10]:
!pip install langchain-text-splitters




In [11]:
from google.colab import userdata
GROQ_API_KEY = userdata.get('RAG_GROQ')

In [13]:
!pip install groq
!pip install pypdf
!pip install langchain-text-splitters
!pip install faiss-cpu
from groq import Groq
from pypdf import PdfReader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import gradio as gr
import os


# ---------------------------
# Initialize Groq Client
# ---------------------------
client = Groq(api_key= userdata.get('RAG_GROQ'))

# ---------------------------
# Load Embedding Model
# ---------------------------
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# ---------------------------
# Global FAISS variables
# ---------------------------
vector_store = None
stored_chunks = []

# ---------------------------
# PDF Processing
# ---------------------------
def process_pdf(pdf_file):
    global vector_store, stored_chunks

    reader = PdfReader(pdf_file)
    text = ""

    for page in reader.pages:
        text += page.extract_text()

    splitter = RecursiveCharacterTextSplitter(
        chunk_size=500,
        chunk_overlap=100
    )
    chunks = splitter.split_text(text)

    embeddings = embedding_model.encode(chunks)

    dimension = embeddings.shape[1]
    vector_store = faiss.IndexFlatL2(dimension)
    vector_store.add(np.array(embeddings))

    stored_chunks = chunks

    return "PDF processed successfully. You can now ask questions."

# ---------------------------
# Question Answering
# ---------------------------
def answer_question(question):
    if vector_store is None:
        return "Please upload and process a PDF first."

    question_embedding = embedding_model.encode([question])
    _, indices = vector_store.search(np.array(question_embedding), k=3)

    context = ""
    for idx in indices[0]:
        context += stored_chunks[idx] + "\n"

    prompt = f"""
Use the context below to answer the question.

Context:
{context}

Question:
{question}
"""

    response = client.chat.completions.create(
        model="llama-3.3-70b-versatile",
        messages=[
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content

# ---------------------------
# Gradio UI
# ---------------------------
with gr.Blocks() as app:
    gr.Markdown("## 📄 RAG-based PDF Question Answering using Groq")

    pdf_input = gr.File(label="Upload PDF")
    process_btn = gr.Button("Process PDF")
    status = gr.Textbox(label="Status")

    question = gr.Textbox(label="Ask a Question")
    answer = gr.Textbox(label="Answer")

    process_btn.click(process_pdf, inputs=pdf_input, outputs=status)
    question.submit(answer_question, inputs=question, outputs=answer)

app.launch()

Collecting faiss-cpu
  Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (7.6 kB)
Downloading faiss_cpu-1.13.2-cp310-abi3-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (23.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m23.8/23.8 MB[0m [31m71.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.13.2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

BertModel LOAD REPORT from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

Notes:
- UNEXPECTED	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.


tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://278644aa96b9500446.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


