In [None]:
from collections import Counter
import random
import spacy
import nltk
from nltk.corpus import brown
import string

In [None]:
nlp= spacy.load('en_core_web_sm')

In [None]:
freq_dist = nltk.FreqDist(brown.words())

In [None]:
def is_common(word):
    return freq_dist[word.lower()] > 1000 or word.lower() in string.punctuation

def is_significant(word):
    return len(word) > 3

In [None]:
def generate_mcqs(text, num_ques=5):
    if text is None or text.strip() == "":
        return []

    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]
    if not sentences:
        return []

    selected_sentences = random.sample(sentences, min(num_ques, len(sentences)))
    mcqs = []

    for sentence in selected_sentences:
        sentence = sentence.lower()
        sent_doc = nlp(sentence)
        nouns = [token.text for token in sent_doc if token.pos_ == 'NOUN' and is_significant(token.text) and not is_common(token.text)]

        if len(nouns) < 2:
            continue

        noun_counts = Counter(nouns)
        if noun_counts:
            subject = sorted(noun_counts.items(), key=lambda x: x[1], reverse=True)[0][0]
            answer_choices = [subject]

            # Replace subject with blank
            ques = sentence.replace(subject, "______")

            # Collect distractors
            distractors = list(set(nouns) - {subject})
            if len(distractors) < 3:
                distractors = distractors + ["[Distractor]"] * (3 - len(distractors))

            # Ensure we have 3 distractors
            random.shuffle(distractors)
            answer_choices.extend(distractors[:3])
            random.shuffle(answer_choices)

            correct_ans = chr(65 + answer_choices.index(subject))
            mcqs.append((ques, answer_choices, correct_ans))

    return mcqs