In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Load model and tokenizer
model_id = "Qwen/Qwen3-8B"
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True)

# Text generation pipeline
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

# Prompt template
def generate_prompt(document_text):
    return f"""Based on the following passage, generate one multiple-choice question with exactly 4 answer options (Aâ€“D). Indicate the correct answer clearly.

Passage:
\"\"\"{document_text}\"\"\"

Question:
"""

# Function to generate MCQs
def generate_mcqs(documents, max_tokens=512):
    questions = []
    for doc in documents:
        prompt = generate_prompt(doc)
        output = generator(prompt, max_new_tokens=max_tokens, do_sample=False)[0]["generated_text"]
        question = output[len(prompt):].strip()
        questions.append(question)
    return questions

# Example documents
documents = [
    "Photosynthesis is the process by which green plants use sunlight to synthesize foods from carbon dioxide and water.",
    "The capital of France is Paris, which is known for its art, fashion, and the iconic Eiffel Tower.",
]

# Generate MCQs
mcqs = generate_mcqs(documents)

# Print the output
for i, mcq in enumerate(mcqs, 1):
    print(f"MCQ #{i}:\n{mcq}\n{'='*80}")


Loading checkpoint shards:   0%|          | 0/5 [00:00<?, ?it/s]

Some parameters are on the meta device because they were offloaded to the cpu.
Device set to use cuda:0
