<a href="https://colab.research.google.com/github/Dheerajtripathi21/node.js/blob/main/Auto_MCQ_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

import random
import re
import PyPDF2
import spacy
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# ==============================
# Load SpaCy model
# ==============================
nlp = spacy.load("en_core_web_sm")

# ==============================
# Load Question Generation Model
# ==============================
QG_MODEL_NAME = "valhalla/t5-small-qg-hl"
tokenizer = AutoTokenizer.from_pretrained(QG_MODEL_NAME)
qg_model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL_NAME)
qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=tokenizer)

# ==============================
# Utility: Read PDF
# ==============================
def read_pdf(pdf_path: str) -> str:
    """Extracts text from a PDF file path."""
    if not pdf_path:
        return ""
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            try:
                txt = page.extract_text()
            except Exception:
                txt = ""
            if txt:
                text += txt + " "
    text = re.sub(r"\s+", " ", text).strip()
    return text

# ==============================
# Extract keywords
# ==============================
def extract_keywords(text: str, top_n: int = 5):
    """Pick key phrases from text (noun chunks)."""
    doc = nlp(text)
    chunks = []
    for chunk in doc.noun_chunks:
        phrase = chunk.text.strip()
        if len(phrase.split()) > 1:
            chunks.append(phrase)
    seen = set()
    keywords = []
    for k in chunks:
        key = k.lower()
        if key not in seen:
            seen.add(key)
            keywords.append(k)
        if len(keywords) >= top_n:
            break
    if len(keywords) < top_n:
        singles = [t.text for t in doc if t.pos_ in {"PROPN", "NOUN"} and t.is_alpha]
        for s in singles:
            if s.lower() not in seen:
                seen.add(s.lower())
                keywords.append(s)
            if len(keywords) >= top_n:
                break
    return keywords[:top_n]

# ==============================
# Build context for QG
# ==============================
def safe_highlight_context(text: str, answer: str, max_len: int = 600):
    """Add <hl> tags around answer and trim context."""
    pattern = re.compile(re.escape(answer), re.IGNORECASE)
    match = pattern.search(text)
    if match:
        start, end = match.start(), match.end()
        context = text[:start] + "<hl> " + text[start:end] + " <hl>" + text[end:]
    else:
        context = text + f" <hl> {answer} <hl>"
    hl_pos = context.lower().find("<hl>")
    if hl_pos != -1:
        left = max(0, hl_pos - max_len // 2)
        right = min(len(context), hl_pos + max_len // 2)
        context = context[left:right]
    else:
        context = context[:max_len]
    return "generate question: " + context

# ==============================
# Generate MCQs
# ==============================
def generate_mcqs(text: str, num_questions: int = 5, num_options: int = 4):
    text = (text or "").strip()
    if not text:
        return []
    text = text[:5000]
    answers = extract_keywords(text, top_n=num_questions)
    if not answers:
        return []
    fallback_pool = ["Technology", "Science", "Education", "History", "Geography", "Mathematics", "Biology"]
    mcqs = []
    for ans in answers:
        try:
            prompt = safe_highlight_context(text, ans)
            out = qg_pipeline(prompt, max_length=72, do_sample=False, num_return_sequences=1)
            question = out[0]["generated_text"].strip()
            if not question.endswith("?"):
                question += "?"
        except Exception:
            question = f"What is '{ans}'?"
        distractors = [a for a in answers if a != ans]
        while len(distractors) < (num_options - 1):
            pick = random.choice(fallback_pool)
            if pick not in distractors and pick.lower() != ans.lower():
                distractors.append(pick)
        options = distractors[: (num_options - 1)] + [ans]
        random.shuffle(options)
        mcqs.append({
            "question": question,
            "options": options,
            "answer": ans
        })
    return mcqs

# ==============================
# Gradio UI
# ==============================
MAX_Q = 10
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown("## 📝 Automatic MCQ Generator (Paste Text OR Upload PDF)")

    with gr.Row():
        text_input = gr.Textbox(
            label="✍️ Paste your text here",
            lines=8,
            placeholder="Paste text here... (or upload a PDF on the right)"
        )
        file_input = gr.File(
            label="📂 Or upload a PDF",
            type="filepath"
        )

    num_q = gr.Slider(1, MAX_Q, step=1, value=5, label="How many questions?")
    generate_btn = gr.Button("🚀 Generate Quiz", variant="primary")

    status_md = gr.Markdown()
    quiz_data_state = gr.State([])

    radios, results = [], []
    with gr.Column():
        for i in range(MAX_Q):
            q_radio = gr.Radio(choices=[], label=f"Q{i+1}.", interactive=True, visible=False)
            q_result = gr.Markdown(visible=False)
            radios.append(q_radio)
            results.append(q_result)

    submit_btn = gr.Button("✅ Submit All")
    score_md = gr.Markdown()

    # ==============================
    # Handlers
    # ==============================
    def on_generate(input_text, pdf_path, n_questions):
        if pdf_path:
            text = read_pdf(pdf_path)
        else:
            text = input_text or ""
        mcqs = generate_mcqs(text, num_questions=int(n_questions))
        if not mcqs:
            updates = [gr.update(value="❌ Please paste text or upload a readable PDF.", visible=True)]
            for _ in range(MAX_Q):
                updates += [gr.update(choices=[], label="", value=None, visible=False),
                            gr.update(value="", visible=False)]
            updates.append([])
            return updates
        updates = [gr.update(value=f"✅ Generated {len(mcqs)} questions. Select answers below ⬇️", visible=True)]
        for i in range(MAX_Q):
            if i < len(mcqs):
                q = mcqs[i]
                updates += [
                    gr.update(choices=q["options"], label=f"Q{i+1}. {q['question']}", value=None, visible=True),
                    gr.update(value="", visible=True)
                ]
            else:
                updates += [gr.update(choices=[], label=f"Q{i+1}.", value=None, visible=False),
                            gr.update(value="", visible=False)]
        updates.append(mcqs)
        return updates

    generate_btn.click(
        on_generate,
        inputs=[text_input, file_input, num_q],
        outputs=[status_md] + sum(([radios[i], results[i]] for i in range(MAX_Q)), []) + [quiz_data_state]
    )

    def check_single(choice, quiz_data, idx):
        if not quiz_data or idx >= len(quiz_data) or choice is None:
            return ""
        correct = quiz_data[idx]["answer"]
        return "✅ Correct!" if choice == correct else f"❌ Wrong! **Correct:** {correct}"

    for i in range(MAX_Q):
        radios[i].change(
            lambda choice, data, idx=i: check_single(choice, data, idx),
            inputs=[radios[i], quiz_data_state],
            outputs=results[i]
        )

    def score_all(*args):
        *answers, quiz_data = args
        if not quiz_data:
            return "ℹ️ Generate a quiz first."
        total = len(quiz_data)
        correct = 0
        lines = []
        for i in range(total):
            sel = answers[i] if i < len(answers) else None
            ans = quiz_data[i]["answer"]
            if sel == ans:
                correct += 1
                lines.append(f"**Q{i+1}.** ✅ Correct")
            elif sel is None:
                lines.append(f"**Q{i+1}.** ⏭️ Skipped (Correct: **{ans}**)")
            else:
                lines.append(f"**Q{i+1}.** ❌ Wrong (Your: _{sel}_ | Correct: **{ans}**)")
        lines.append(f"\n### 🎯 Final Score: **{correct}/{total}**")
        return "\n".join(lines)

    submit_btn.click(
        score_all,
        inputs=radios + [quiz_data_state],
        outputs=score_md
    )


Device set to use cpu


In [None]:
!pip install spacy transformers sentencepiece PyPDF2 gradio
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m56.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [None]:
gr.Markdown("""
# 🌟 AI-Powered MCQ Generator
Upload a PDF or Paste Text → Get **Instant Quiz Questions**!
""")

<gradio.components.markdown.Markdown at 0x7b45a175f710>

In [None]:
import random
import re
import PyPDF2
import spacy
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline

# ==============================
# Load SpaCy model
# ==============================
nlp = spacy.load("en_core_web_sm")

# ==============================
# Load Question Generation Model
# ==============================
QG_MODEL_NAME = "valhalla/t5-small-qg-hl"
tokenizer = AutoTokenizer.from_pretrained(QG_MODEL_NAME)
qg_model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL_NAME)
qg_pipeline = pipeline("text2text-generation", model=qg_model, tokenizer=tokenizer)

# ==============================
# Utility: Read PDF
# ==============================
def read_pdf(pdf_path: str) -> str:
    """Extracts text from a PDF file path."""
    if not pdf_path:
        return ""
    text = ""
    with open(pdf_path, "rb") as f:
        reader = PyPDF2.PdfReader(f)
        for page in reader.pages:
            try:
                txt = page.extract_text()
            except Exception:
                txt = ""
            if txt:
                text += txt + " "
    text = re.sub(r"\s+", " ", text).strip()
    return text

# ==============================
# Extract keywords
# ==============================
def extract_keywords(text: str, top_n: int = 5):
    """Pick key phrases from text (noun chunks)."""
    doc = nlp(text)
    chunks = []
    for chunk in doc.noun_chunks:
        phrase = chunk.text.strip()
        if len(phrase.split()) > 1:
            chunks.append(phrase)
    seen = set()
    keywords = []
    for k in chunks:
        key = k.lower()
        if key not in seen:
            seen.add(key)
            keywords.append(k)
        if len(keywords) >= top_n:
            break
    if len(keywords) < top_n:
        singles = [t.text for t in doc if t.pos_ in {"PROPN", "NOUN"} and t.is_alpha]
        for s in singles:
            if s.lower() not in seen:
                seen.add(s.lower())
                keywords.append(s)
            if len(keywords) >= top_n:
                break
    return keywords[:top_n]

# ==============================
# Build context for QG
# ==============================
def safe_highlight_context(text: str, answer: str, max_len: int = 600):
    """Add <hl> tags around answer and trim context."""
    pattern = re.compile(re.escape(answer), re.IGNORECASE)
    match = pattern.search(text)
    if match:
        start, end = match.start(), match.end()
        context = text[:start] + "<hl> " + text[start:end] + " <hl>" + text[end:]
    else:
        context = text + f" <hl> {answer} <hl>"
    hl_pos = context.lower().find("<hl>")
    if hl_pos != -1:
        left = max(0, hl_pos - max_len // 2)
        right = min(len(context), hl_pos + max_len // 2)
        context = context[left:right]
    else:
        context = context[:max_len]
    return "generate question: " + context

# ==============================
# Generate MCQs
# ==============================
def generate_mcqs(text: str, num_questions: int = 5, num_options: int = 4):
    text = (text or "").strip()
    if not text:
        return []
    text = text[:5000]
    answers = extract_keywords(text, top_n=num_questions)
    if not answers:
        return []
    fallback_pool = ["Technology", "Science", "Education", "History", "Geography", "Mathematics", "Biology"]
    mcqs = []
    for ans in answers:
        try:
            prompt = safe_highlight_context(text, ans)
            out = qg_pipeline(prompt, max_length=72, do_sample=False, num_return_sequences=1)
            question = out[0]["generated_text"].strip()
            if not question.endswith("?"):
                question += "?"
        except Exception:
            question = f"What is '{ans}'?"
        distractors = [a for a in answers if a != ans]
        while len(distractors) < (num_options - 1):
            pick = random.choice(fallback_pool)
            if pick not in distractors and pick.lower() != ans.lower():
                distractors.append(pick)
        options = distractors[: (num_options - 1)] + [ans]
        random.shuffle(options)
        mcqs.append({
            "question": question,
            "options": options,
            "answer": ans
        })
    return mcqs

# ==============================
# Gradio UI
# ==============================
MAX_Q = 10
with gr.Blocks(fill_height=True) as demo:
    gr.Markdown("## 📝 Automatic MCQ Generator (Paste Text OR Upload PDF)")

    with gr.Row():
        text_input = gr.Textbox(
            label="✍️ Paste your text here",
            lines=8,
            placeholder="Paste text here... (or upload a PDF on the right)"
        )
        file_input = gr.File(
            label="📂 Or upload a PDF",
            type="filepath"
        )

    num_q = gr.Slider(1, MAX_Q, step=1, value=5, label="How many questions?")
    generate_btn = gr.Button("🚀 Generate Quiz", variant="primary")

    status_md = gr.Markdown()
    quiz_data_state = gr.State([])

    radios, results = [], []
    with gr.Column():
        for i in range(MAX_Q):
            q_radio = gr.Radio(choices=[], label=f"Q{i+1}.", interactive=True, visible=False)
            q_result = gr.Markdown(visible=False)
            radios.append(q_radio)
            results.append(q_result)

    submit_btn = gr.Button("✅ Submit All")
    score_md = gr.Markdown()

    # ==============================
    # Handlers
    # ==============================
    def on_generate(input_text, pdf_path, n_questions):
        if pdf_path:
            text = read_pdf(pdf_path)
        else:
            text = input_text or ""
        mcqs = generate_mcqs(text, num_questions=int(n_questions))
        if not mcqs:
            updates = [gr.update(value="❌ Please paste text or upload a readable PDF.", visible=True)]
            for _ in range(MAX_Q):
                updates += [gr.update(choices=[], label="", value=None, visible=False),
                            gr.update(value="", visible=False)]
            updates.append([])
            return updates
        updates = [gr.update(value=f"✅ Generated {len(mcqs)} questions. Select answers below ⬇️", visible=True)]
        for i in range(MAX_Q):
            if i < len(mcqs):
                q = mcqs[i]
                updates += [
                    gr.update(choices=q["options"], label=f"Q{i+1}. {q['question']}", value=None, visible=True),
                    gr.update(value="", visible=True)
                ]
            else:
                updates += [gr.update(choices=[], label=f"Q{i+1}.", value=None, visible=False),
                            gr.update(value="", visible=False)]
        updates.append(mcqs)
        return updates

    generate_btn.click(
        on_generate,
        inputs=[text_input, file_input, num_q],
        outputs=[status_md] + sum(([radios[i], results[i]] for i in range(MAX_Q)), []) + [quiz_data_state]
    )

    def check_single(choice, quiz_data, idx):
        if not quiz_data or idx >= len(quiz_data) or choice is None:
            return ""
        correct = quiz_data[idx]["answer"]
        return "✅ Correct!" if choice == correct else f"❌ Wrong! **Correct:** {correct}"

    for i in range(MAX_Q):
        radios[i].change(
            lambda choice, data, idx=i: check_single(choice, data, idx),
            inputs=[radios[i], quiz_data_state],
            outputs=results[i]
        )

    def score_all(*args):
        *answers, quiz_data = args
        if not quiz_data:
            return "ℹ️ Generate a quiz first."
        total = len(quiz_data)
        correct = 0
        lines = []
        for i in range(total):
            sel = answers[i] if i < len(answers) else None
            ans = quiz_data[i]["answer"]
            if sel == ans:
                correct += 1
                lines.append(f"**Q{i+1}.** ✅ Correct")
            elif sel is None:
                lines.append(f"**Q{i+1}.** ⏭️ Skipped (Correct: **{ans}**)")
            else:
                lines.append(f"**Q{i+1}.** ❌ Wrong (Your: _{sel}_ | Correct: **{ans}**)")
        lines.append(f"\n### 🎯 Final Score: **{correct}/{total}**")
        return "\n".join(lines)

    submit_btn.click(
        score_all,
        inputs=radios + [quiz_data_state],
        outputs=score_md
    )
    if __name__ == "__main__":
        demo.launch(server_name="0.0.0.0")

        gr.Markdown("""
# 🌟 AI-Powered MCQ Generator
Upload a PDF or Paste Text → Get **Instant Quiz Questions**!
""")

Device set to use cpu


It looks like you are running Gradio on a hosted Jupyter notebook, which requires `share=True`. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://f66ac5358c5f496322.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


In [None]:
gr.Markdown("""
# 🌟 AI-Powered MCQ Generator
Upload a PDF or Paste Text → Get **Instant Quiz Questions**!
""")


<gradio.components.markdown.Markdown at 0x7b45a140cc50>