In [1]:
from huggingface_hub import login
login()  # paste your HF token when prompted


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv‚Ä¶

In [3]:
# =========================================
# ‚ö°Ô∏è CBC Quiz Generator API (Colab + Ngrok)
# Using Mistral-7B-Instruct for faster generation
# =========================================
!pip install flask flask-cors pyngrok transformers accelerate torch peft bitsandbytes --quiet

from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import torch, time, re, json
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

# --------------------------
# 1. Ngrok Authentication
# --------------------------
ngrok.set_auth_token("35BfWVgoHqmh8NMd1FHIvk8AAP0_5osfd7Y6y6SN5FPPhwUKo")  # ‚úÖ replace with your token

# --------------------------
# 2. Load Model & Tokenizer
# --------------------------
MODEL_NAME = "mistralai/Mistral-7B-Instruct-v0.2"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,               # ‚úÖ still quantized
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.bfloat16,
)

print("‚è≥ Loading tokenizer...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

print("‚è≥ Loading 4-bit Mistral model...")
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto"
)
model.eval()
print("‚úÖ Mistral model loaded successfully!")

# =========================================
# ‚öôÔ∏è CBC Quiz Generator Logic
# =========================================

SYSTEM_PROMPT_BATCH = """You generate a SET of CBC-aligned MCQs for Grades 4‚Äì6 in Kenya.
Return ONLY a JSON array of **10‚Äì15 valid question objects**, no prose, no code fences.
The array MUST end with a closing square bracket ].

Each element in the array must strictly follow this schema:
{
  "subject": "<string>",
  "grade": <int>,
  "topic": "<string>",
  "bloom_level": <int 1..6>,
  "question": "<string>",
  "options": {"A":"<string>","B":"<string>","C":"<string>","D":"<string>"},
  "answer": "<A|B|C|D>",
  "rationale": "<string>"
}

Rules:
- Use realistic Kenyan everyday contexts and SI units.
- Each question must have only one correct answer and 3 plausible distractors.
- Keep each question ‚â§25 words, each option ‚â§12 words, rationale ‚â§18 words.
- Mix bloom levels 1‚Äì6 across items.
- Ensure variety; avoid duplicate or near-identical questions.
- Do not include explanations, commentary, or markdown ‚Äî just pure JSON array.
End your output with: ]
"""

USER_TEMPLATE_BATCH = """Create a quiz of 10‚Äì15 multiple-choice questions for:
Subject: {subject}
Grade: {grade}
Topic: {topic}

If topic is out of CBC Grades 4‚Äì6 scope, return []."""

def extract_json_array(text: str):
    text = text.strip()
    m = re.search(r"\[\s*{", text, flags=re.S)
    if not m:
        return None
    start = m.start()
    bal, i = 0, start
    while i < len(text):
        ch = text[i]
        if ch == "[":
            bal += 1
        elif ch == "]":
            bal -= 1
            if bal == 0:
                s = text[start:i+1]
                try:
                    return json.loads(s)
                except Exception:
                    return None
        i += 1
    return None

REQ_KEYS = {"subject","grade","topic","bloom_level","question","options","answer","rationale"}

def validate_item(it: dict, subject: str, grade: int, topic: str):
    if not isinstance(it, dict):
        return False, "Not a JSON object"
    missing = REQ_KEYS - set(it.keys())
    if missing:
        return False, f"Missing keys: {missing}"
    if it.get("subject") != subject or int(it.get("grade")) != int(grade) or it.get("topic") != topic:
        return False, "Mismatched subject/grade/topic"
    bl = it.get("bloom_level")
    if not isinstance(bl, int) or not (1 <= bl <= 6):
        return False, "Invalid bloom_level"
    opts = it.get("options")
    if not isinstance(opts, dict) or set(opts.keys()) != {"A","B","C","D"}:
        return False, "Options must be A,B,C,D"
    if any((not isinstance(v, str) or not v.strip()) for v in opts.values()):
        return False, "Empty option text"
    if it.get("answer") not in {"A","B","C","D"}:
        return False, "Answer must be A/B/C/D"
    if not isinstance(it.get("question"), str) or not it["question"].strip():
        return False, "Empty question"
    return True, "ok"

@torch.inference_mode()
def generate_quiz(subject: str, grade: int, topic: str,
                  max_new_tokens: int = 1200, tries: int = 2) -> list[dict]:
    """Generate 10‚Äì15 CBC-aligned MCQs in a single call."""
    for attempt in range(tries):
        sys_msg = {"role": "system", "content": SYSTEM_PROMPT_BATCH}
        usr_msg = {"role": "user", "content": USER_TEMPLATE_BATCH.format(subject=subject, grade=grade, topic=topic)}

        if hasattr(tokenizer, "apply_chat_template"):
            prompt_text = tokenizer.apply_chat_template(
                [sys_msg, usr_msg], tokenize=False, add_generation_prompt=True
            )
        else:
            prompt_text = f"<s>[INST] {SYSTEM_PROMPT_BATCH}\n\n{USER_TEMPLATE_BATCH.format(subject=subject, grade=grade, topic=topic)} [/INST]"

        inputs = tokenizer(prompt_text, return_tensors="pt").to(model.device)
        out_ids = model.generate(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.7,
            top_p=0.9,
            repetition_penalty=1.1,
            pad_token_id=tokenizer.pad_token_id,
            eos_token_id=tokenizer.eos_token_id,
        )

        gen = tokenizer.decode(out_ids[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
        arr = extract_json_array(gen)
        if arr and isinstance(arr, list) and len(arr) >= 5:
            valid_items = [it for it in arr if validate_item(it, subject, grade, topic)[0]]
            if len(valid_items) >= 5:
                return valid_items
    raise ValueError("Failed to parse a valid JSON array for quiz.")

# --------------------------
# 3. Flask App Routes
# --------------------------
app = Flask(__name__)
CORS(app)

@app.route("/generate", methods=["POST"])
def generate_endpoint():
    data = request.json
    subject = data.get("subject")
    topic = data.get("topic")
    grade_str = str(data.get("grade"))
    bloom_level = data.get("bloom_level")
    grade = int(re.search(r'\d+', grade_str).group())

    print(f"üß† Generating quiz for {subject=} {grade=} {topic=} ...")

    start = time.time()
    try:
        quiz_items = generate_quiz(subject, grade, topic)
    except Exception as e:
        return jsonify({"error": str(e)}), 500
    elapsed = time.time() - start

    return jsonify({
        "metadata": {
            "subject": subject,
            "grade": grade,
            "topic": topic,
            "bloom_level": bloom_level
        },
        "questions": quiz_items,
        "elapsed": elapsed
    })

# --------------------------
# 4. Expose public URL
# --------------------------
public_url = ngrok.connect(5000, bind_tls=True)
print("‚úÖ Your public Colab URL:", public_url)

if __name__ == "__main__":
    print("üöÄ Starting Flask server (threaded mode)...")
    app.run(host="0.0.0.0", port=5000, threaded=True)


‚è≥ Loading tokenizer...
‚è≥ Loading 4-bit Mistral model...


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

‚úÖ Mistral model loaded successfully!
‚úÖ Your public Colab URL: NgrokTunnel: "https://unreciprocally-stenographic-harley.ngrok-free.dev" -> "http://localhost:5000"
üöÄ Starting Flask server (threaded mode)...
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


üß† Generating quiz for subject='Science' grade=5 topic='Living Things' ...


INFO:werkzeug:127.0.0.1 - - [10/Nov/2025 10:42:41] "POST /generate HTTP/1.1" 200 -


üß† Generating quiz for subject='Science' grade=5 topic='Force and Energy' ...


INFO:werkzeug:127.0.0.1 - - [10/Nov/2025 10:44:36] "POST /generate HTTP/1.1" 200 -


üß† Generating quiz for subject='Science' grade=5 topic='Force and Energy' ...


INFO:werkzeug:127.0.0.1 - - [10/Nov/2025 10:47:26] "POST /generate HTTP/1.1" 200 -
