In [None]:
!pip install gradio transformers sentence-transformers PyPDF2 python-docx torch --quiet

In [None]:
!pip install python-docx python-pptx PyPDF2

In [None]:
# ==========================================
# üß† AI MCQ + Summarizer + Copilot (CPU Safe Mode) ‚Äî FIXED
# ==========================================

# Install (only if needed). In notebook remove the ! if already installed.
#!pip install -q gradio python-docx PyPDF2 transformers sentence-transformers torch nest_asyncio scikit-learn

import gradio as gr
import os, re, random, torch, docx, PyPDF2, warnings, asyncio, nest_asyncio
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
from sentence_transformers import SentenceTransformer, util
from sklearn.feature_extraction.text import TfidfVectorizer

# ----------------------- FIX EVENT LOOP -----------------------
warnings.filterwarnings("ignore")
nest_asyncio.apply()
asyncio.set_event_loop(asyncio.new_event_loop())

# ----------------------- FORCE CPU -----------------------
device = "cpu"
torch.device(device)
print("‚úÖ Running in CPU Safe Mode")

# ----------------------- LOAD MODELS ------------------------
QG_MODEL = "iarfmoose/t5-base-question-generator"
qg_tokenizer = AutoTokenizer.from_pretrained(QG_MODEL)
qg_model = AutoModelForSeq2SeqLM.from_pretrained(QG_MODEL).to(device)

emb_model = SentenceTransformer("all-MiniLM-L6-v2", device=device)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn", device=-1)

# ----------------------- HELPERS ------------------------
def clean_text(txt):
    txt = re.sub(r"\s+", " ", txt or "")
    txt = re.sub(r"[?]{2,}", "?", txt)
    return txt.strip()

def extract_sentences(text):
    return [s.strip() for s in re.split(r'(?<=[.!?]) +', text) if s.strip()] or [text]

def read_file(file):
    """Read TXT, DOCX, or PDF files safely and return extracted text (string)."""
    if not file:
        return ""
    ext = os.path.splitext(file.name)[1].lower()
    text = ""
    try:
        if ext == ".txt":
            # In Gradio, file supports read()
            data = file.read()
            if isinstance(data, bytes):
                text = data.decode("utf-8", errors="ignore")
            else:
                text = str(data)
        elif ext == ".docx":
            # file is a tempfile-like object; pass it directly to docx
            doc = docx.Document(file)
            text = "\n".join([p.text for p in doc.paragraphs])
        elif ext == ".pdf":
            # PyPDF2 accepts file-like object
            reader = PyPDF2.PdfReader(file)
            for page in reader.pages:
                page_text = page.extract_text() or ""
                text += page_text + " "
        else:
            return "‚ö†Ô∏è Unsupported file type. Use .txt, .docx, or .pdf"
    except Exception as e:
        return f"‚ö†Ô∏è Error reading file: {e}"
    return clean_text(text)[:8000]

# ------------------ Extractive fallback (TF-IDF) ------------------
def extractive_summary(text, n_sentences=3):
    sents = [s.strip() for s in re.split(r'(?<=[.!?])\s+', text) if s.strip()]
    if not sents:
        return ""
    n_sentences = min(len(sents), max(1, int(n_sentences)))
    if len(" ".join(sents)) < 200:
        return " ".join(sents[:n_sentences])
    try:
        vect = TfidfVectorizer(stop_words="english")
        X = vect.fit_transform(sents)
        scores = X.sum(axis=1).A1
        ranked_idx = scores.argsort()[::-1]
        top_idx = sorted(ranked_idx[:n_sentences])
        top_sents = [sents[i] for i in top_idx]
        return " ".join(top_sents)
    except Exception:
        return " ".join(sents[:n_sentences])

# ------------------ Safe abstractive summarizer with chunking ------------------
def summarize_text_safe(text, mode="Brief"):
    """
    Safe summarizer:
      - validates input length
      - uses model with truncation when possible
      - chunks long text and summarizes each chunk
      - falls back to extractive summary if abstractive fails
    """
    text = (text or "").strip()
    if not text:
        return "‚ö†Ô∏è Please enter or upload text."
    if len(text.split()) < 20:
        return "‚ö†Ô∏è Please enter longer text (at least 2‚Äì3 sentences)."

    # model tokenizer max heuristic
    try:
        model_max = getattr(summarizer.tokenizer, "model_max_length", 1024)
        if model_max is None or model_max <= 0 or model_max > 4096:
            model_max = 1024
    except Exception:
        model_max = 1024

    # set lengths based on mode
    if mode == "Detailed":
        max_len, min_len = 300, 80
    else:
        max_len, min_len = 150, 30

    cleaned = clean_text(text)
    # If relatively short, try direct summarization with truncation
    try:
        if len(cleaned) < model_max * 4:
            out = summarizer(cleaned, max_length=max_len, min_length=min_len, do_sample=False, truncation=True)
            return out[0]["summary_text"]
    except Exception:
        # try truncating and summarizing portion
        try:
            short = cleaned[: model_max * 3]
            out = summarizer(short, max_length=max_len, min_length=min_len, do_sample=False, truncation=True)
            return out[0]["summary_text"]
        except Exception:
            pass

    # Chunk long text by sentences (~safe char-sized chunks)
    sentences = [s for s in re.split(r'(?<=[.!?])\s+', cleaned) if s.strip()]
    chunks = []
    cur, cur_len = [], 0
    chunk_char_limit = model_max * 3  # heuristic
    for s in sentences:
        cur.append(s)
        cur_len += len(s)
        if cur_len >= chunk_char_limit:
            chunks.append(" ".join(cur))
            cur, cur_len = [], 0
    if cur:
        chunks.append(" ".join(cur))

    # Summarize each chunk (with fallback to extractive per chunk)
    chunk_summaries = []
    for ch in chunks:
        try:
            out = summarizer(ch, max_length=max_len, min_length=min_len, do_sample=False, truncation=True)
            chunk_summaries.append(out[0]["summary_text"])
        except Exception:
            chunk_summaries.append(extractive_summary(ch, n_sentences=2))

    combined = " ".join(chunk_summaries)
    if len(combined.split()) > 400:
        return extractive_summary(combined, n_sentences=5)
    if not combined.strip():
        return extractive_summary(text, n_sentences=4)
    return combined

# ----------------------- MCQ GENERATOR ------------------------
def gen_mcqs_from_text(text, n_q=5):
    text = (text or "").strip()
    if not text or len(text.split()) < 20:
        return "‚ö†Ô∏è Please provide more text (upload or paste document)."

    try:
        sentences = extract_sentences(text)
        passage = " ".join(sentences[:15])
        inputs = qg_tokenizer.encode(f"generate questions from: {passage}",
                                     return_tensors="pt", truncation=True)
        outputs = qg_model.generate(inputs, max_length=128, num_return_sequences=int(n_q),
                                    do_sample=True, top_p=0.92, temperature=0.8)
        questions = [clean_text(qg_tokenizer.decode(o, skip_special_tokens=True)) for o in outputs]

        result = ""
        for q in questions:
            hits = util.semantic_search(emb_model.encode(q, convert_to_tensor=True),
                                        emb_model.encode(sentences, convert_to_tensor=True), top_k=4)[0]
            opts = [sentences[h["corpus_id"]] for h in hits]
            if not opts:
                continue
            correct = opts[0]
            distractors = opts[1:4]
            while len(distractors) < 3:
                distractors.append(random.choice(sentences))
            all_opts = distractors[:3] + [correct]
            random.shuffle(all_opts)

            result += f"**Q:** {clean_text(q)}\n"
            for i, opt in enumerate(all_opts, 1):
                result += f"{i}. {clean_text(opt)}\n"
            result += f"‚úÖ **Answer:** {clean_text(correct)}\n\n"
        return result or "‚ö†Ô∏è No questions generated from the given text."
    except Exception as e:
        return f"‚ö†Ô∏è Error generating MCQs: {e}"

# ----------------------- COPILOT CHATBOT ------------------------
def copilot_reply(message, history):
    if not message.strip():
        return "Hi üëã Ask me anything!"
    if "mcq" in message.lower():
        return "üß© I can generate MCQs! Go to the MCQ Generator tab and upload your document."
    if "summary" in message.lower():
        return "üìù I can summarize text ‚Äî go to the Summarizer tab or paste text here."
    return "ü§ñ Copilot: I can summarize, generate MCQs, or explain your content!"

# ----------------------- UI ------------------------
def build_app():
    theme = gr.themes.Soft(primary_hue="violet", neutral_hue="gray").set(
        body_background_fill="#0f172a",
        body_text_color="#e2e8f0",
        block_background_fill="#1e293b"
    )

    with gr.Blocks(theme=theme, title="AI MCQ Generator APP") as demo:
        gr.Markdown("<h1 style='text-align:center;'>AI MCQ Generator & Summarizer</h1>")

        # ---------- MCQ GENERATOR TAB ----------
        with gr.Tab("MCQ Generator"):
            with gr.Row():
                file_input = gr.File(label="Upload File (.txt, .docx, .pdf)")
                n_questions = gr.Slider(1, 100, value=5, step=1, label="Number of Questions")
            text_box = gr.Textbox(label="Extracted Text", lines=10, placeholder="File text will appear here...")
            gen_btn = gr.Button("üöÄ Generate MCQs", variant="primary")
            mcq_output = gr.Markdown(label="Generated Questions")

            # wire up events
            file_input.change(fn=read_file, inputs=file_input, outputs=text_box)
            gen_btn.click(fn=gen_mcqs_from_text, inputs=[text_box, n_questions], outputs=mcq_output, api_name="generate_mcqs")

        # ---------- SUMMARIZER TAB ----------
        with gr.Tab("üìù Summarizer"):
            input_text = gr.Textbox(lines=8, label="Enter Text")
            mode_radio = gr.Radio(["Brief", "Detailed"], value="Brief", label="Summary Type")
            summarize_btn = gr.Button("‚ú® Summarize", variant="primary")
            summary_output = gr.Textbox(lines=8, label="Summary")
            summarize_btn.click(fn=lambda t, m: summarize_text_safe(t, mode=m), inputs=[input_text, mode_radio], outputs=summary_output, api_name="summarize")

        # ---------- COPILOT TAB ----------
        with gr.Tab("ü§ñ Copilot"):
            gr.ChatInterface(
                fn=copilot_reply,
                type="messages",
                title="ü§ñ Copilot Chat",
                description="Ask me anything or get help summarizing or generating MCQs!"
            )

        # ---------- FLOATING CHAT BUTTON ----------
        gr.HTML("""
        <style>
        .copilot-btn {
            position: fixed; bottom: 25px; right: 25px;
            background: linear-gradient(135deg,#4f46e5,#7c3aed);
            color: white; border-radius: 50%; width: 60px; height: 60px;
            display:flex; align-items:center; justify-content:center;
            font-size:28px; cursor:pointer; box-shadow:0 4px 8px rgba(0,0,0,0.3);
            transition:all 0.3s;
        }
        .copilot-btn:hover {transform:scale(1.1);}
        </style>
        <div class='copilot-btn' onclick="
        const tabs=document.querySelectorAll('button');
        tabs.forEach(btn=>{
            if(btn.innerText.includes('Copilot')) btn.click();
        });
        ">üí¨</div>
        """)

    return demo

# ----------------------- LAUNCH ------------------------
app = build_app()
app.launch(share=True, debug=False)
