In [None]:
import nltk
import random
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import stopwords

nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('stopwords', quiet=True)

def generate_fill_in_the_blanks(text, num_questions=5):
    """Generates fill-in-the-blanks questions from text."""

    sentences = nltk.sent_tokenize(text)
    questions = []
    stop_words = set(stopwords.words('english'))  # Get English stop words

    for sentence in sentences:
        words = word_tokenize(sentence)
        tagged_words = pos_tag(words)

        eligible_words = []  # Words suitable for blanks

        for word, tag in tagged_words:
            # Good candidates: Nouns, Adjectives, Verbs, Adverbs (not too common)
            if (tag.startswith('NN') or tag.startswith('JJ') or
                tag.startswith('VB') or tag.startswith('RB')) and \
               word.lower() not in stop_words and len(word) > 2: #Avoid short, common words
                eligible_words.append(word)

        if eligible_words:
            num_to_generate = min(len(eligible_words), num_questions)
            chosen_words = random.sample(eligible_words, num_to_generate)

            for word in chosen_words:
                question = sentence.replace(word, "______")  # Replace with blank
                questions.append((question, word))  # Store question and answer

    return questions


# Example usage:
text = """The quick brown fox jumps over the lazy dog. This is a simple sentence. It contains various parts of speech. The cat sat on the mat.  Artificial intelligence is a rapidly evolving field."""

questions_with_answers = generate_fill_in_the_blanks(text, 4)

for question, answer in questions_with_answers:
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print("-" * 20)


text2 = """Natural Language Processing is a subfield of Artificial Intelligence. It deals with the interaction between computers and human language."""
questions_with_answers2 = generate_fill_in_the_blanks(text2, 3)

for question, answer in questions_with_answers2:
    print(f"Question: {question}")
    print(f"Answer: {answer}")
    print("-" * 20)

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


Question: The quick brown ______ jumps over the lazy dog.
Answer: fox
--------------------
Question: The quick ______ fox jumps over the lazy dog.
Answer: brown
--------------------
Question: The ______ brown fox jumps over the lazy dog.
Answer: quick
--------------------
Question: The quick brown fox ______ over the lazy dog.
Answer: jumps
--------------------
Question: This is a ______ sentence.
Answer: simple
--------------------
Question: This is a simple ______.
Answer: sentence
--------------------
Question: It contains ______ parts of speech.
Answer: various
--------------------
Question: It ______ various parts of speech.
Answer: contains
--------------------
Question: It contains various parts of ______.
Answer: speech
--------------------
Question: It contains various ______ of speech.
Answer: parts
--------------------
Question: The cat ______ on the mat.
Answer: sat
--------------------
Question: The cat sat on the ______.
Answer: mat
--------------------
Question: The ____