<a href="https://colab.research.google.com/github/ProjectProRepo/How-to-Build-an-FAQ-ChatBot-using-Open-source-LLM-/blob/main/QuizGen_Chatbot.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install gradio transformers accelerate pypdf PyPDF2 bitsandbytes

In [1]:
import gradio as gr
from PyPDF2 import PdfReader
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM

In [None]:
from huggingface_hub import notebook_login
notebook_login()  # Login with your HF token

In [None]:
# Load LLM
model_id = "mistralai/Mistral-7B-Instruct-v0.1"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="auto",
    torch_dtype="auto",
    load_in_8bit=True
)

llm = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    do_sample=True,
    temperature=0.7,
    top_p=0.9
)


In [None]:
# Extract chunks from PDF
def extract_text_chunks(pdf_file, chunk_size=1500, overlap=200):
    reader = PdfReader(pdf_file)
    full_text = ""
    for page in reader.pages:
        full_text += page.extract_text() or ""

    chunks = []
    start = 0
    while start < len(full_text):
        end = start + chunk_size
        chunks.append(full_text[start:end])
        start += chunk_size - overlap
    return chunks

In [None]:
# Find relevant chunk using simple keyword match
def find_relevant_chunk(chunks, topic):
    best_score = 0
    best_chunk = ""
    topic_words = set(topic.lower().split())

    for chunk in chunks:
        chunk_words = set(chunk.lower().split())
        score = len(topic_words.intersection(chunk_words))
        if score > best_score:
            best_score = score
            best_chunk = chunk
    return best_chunk


In [None]:
# Generate quiz
def generate_quiz_from_pdf(pdf_file, topic):
    chunks = extract_text_chunks(pdf_file)
    relevant_chunk = find_relevant_chunk(chunks, topic)

    prompt = (
        f"You're an AI tutor. Based on the following content:\n\n"
        f"{relevant_chunk}\n\n"
        f"Create a quiz on the topic '{topic}' with 3 multiple-choice questions. "
        f"Each question must have 4 options and indicate the correct answer."
    )

    result = llm(prompt)[0]["generated_text"]
    return result.replace(prompt, "").strip()

In [None]:
# Gradio UI
iface = gr.Interface(
    fn=generate_quiz_from_pdf,
    inputs=[
        gr.File(file_types=[".pdf"], label="Upload a PDF (up to 22MB)"),
        gr.Textbox(lines=1, placeholder="Enter topic (e.g., Recursion, Sorting, etc.)", label="Quiz Topic")
    ],
    outputs="text",
    title="🧠 PDF-to-Quiz Generator",
    description="Upload a large PDF (like a textbook or research paper) and get a quiz generated from a topic of your choice."
)

if __name__ == "__main__":
    iface.launch()
