 create a rag application, act as a supervisor who to help student study upload paper.  
- llm is llama3-instruct using groq (clear have system and user template)  
- gui to be gradio  
- sentence transformer for chunking  
- vector database to hold and retrive information  
gui need let user upload pdf docucent and the allow him to ask question from document
  i want to test it on google colab so make sure everything is according to that.

In [1]:
!pip install -q gradio sentence-transformers faiss-cpu pdfplumber groq


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.8/42.8 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.3/31.3 MB[0m [31m28.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.0/60.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.6/5.6 MB[0m [31m60.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m131.1/131.1 kB[0m [31m6.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.9/2.9 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [4]:
import os

# ✅ Paste your own Groq API key here
os.environ["GROQ_API_KEY"] = " "


In [3]:
# ✅ INSTALL DEPENDENCIES
!pip install -q gradio sentence-transformers faiss-cpu pdfplumber groq

# ✅ IMPORTS
import os
import pdfplumber
import faiss
import numpy as np
import gradio as gr
from sentence_transformers import SentenceTransformer
from groq import Groq

# ✅ LOAD EMBEDDING MODEL & LLaMA3 CLIENT
model = SentenceTransformer('all-MiniLM-L6-v2')
api_key = os.getenv("GROQ_API_KEY")
client = Groq(api_key=api_key)

# ✅ GLOBALS
chunks_global = []
index_global = None

# ✅ CHUNKING FUNCTION
def extract_chunks_from_pdf(pdf_file, chunk_size=500):
    text = ""
    with pdfplumber.open(pdf_file) as pdf:
        for page in pdf.pages:
            content = page.extract_text()
            if content:
                text += content + "\n"
    chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
    return chunks

# ✅ FAISS INDEX
def create_faiss_index(chunks):
    embeddings = model.encode(chunks)
    dim = embeddings.shape[1]
    index = faiss.IndexFlatL2(dim)
    index.add(np.array(embeddings))
    return index, embeddings

# ✅ TOP CHUNKS
def get_top_chunks(query, chunks, index, top_k=5):
    query_embedding = model.encode([query])
    distances, indices = index.search(np.array(query_embedding), top_k)
    return [chunks[i] for i in indices[0]]

# ✅ ASK LLaMA3
def ask_llama3(context, question):
    system_prompt = "You are a helpful study supervisor who answers questions based on provided study material."
    user_prompt = f"Study Material:\n{context}\n\nQuestion: {question}\nAnswer:"

    chat_completion = client.chat.completions.create(
        model="llama3-8b-8192",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt}
        ]
    )
    return chat_completion.choices[0].message.content

# ✅ PROCESS PDF
def upload_pdf_and_process(file):
    global chunks_global, index_global
    chunks_global = extract_chunks_from_pdf(file.name)
    index_global, _ = create_faiss_index(chunks_global)
    return "✅ PDF uploaded and processed. You can now ask questions."

# ✅ HANDLE USER QUESTION
def handle_question(question):
    if not chunks_global or not index_global:
        return "⚠️ Please upload and process a PDF first."
    relevant_chunks = get_top_chunks(question, chunks_global, index_global)
    context = "\n".join(relevant_chunks)
    return ask_llama3(context, question)

# ✅ GRADIO UI — VERTICAL ONE VIEW
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as app:
    gr.HTML("""
    <div style='text-align: center; padding: 20px;'>
        <h1 style="color: #2c3e50; font-size: 36px;">📚 AI Study Supervisor</h1>
        <p style="color: #34495e; font-size: 18px;">
            Upload your study PDF and ask questions based on its content.
        </p>
    </div>
    """)

    file_upload = gr.File(label="📄 Upload PDF", file_types=[".pdf"])
    upload_btn = gr.Button("📥 Process PDF")
    status_output = gr.Textbox(label="📌 Status", interactive=False)

    question_input = gr.Textbox(label="💬 Ask a Question", placeholder="e.g., What is the summary of Chapter 1?")
    ask_btn = gr.Button("🧠 Get Answer")
    answer_output = gr.Textbox(label="📘 Answer", lines=8, show_copy_button=True)

    gr.HTML("""
    <div style="text-align: center; padding: 10px; font-size: 14px; color: #7f8c8d;">
        Made with 💡 LLaMA3 + FAISS + Gradio
    </div>
    """)

    upload_btn.click(upload_pdf_and_process, inputs=file_upload, outputs=status_output)
    ask_btn.click(handle_question, inputs=question_input, outputs=answer_output)

app.launch()


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

It looks like you are running Gradio on a hosted a Jupyter notebook. For the Gradio app to work, sharing must be enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://c26e6cf43b28f36016.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


