In [1]:
import nltk
import random
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import stopwords

nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
nltk.download('stopwords', quiet=True)

def generate_match_questions(text, num_questions=3):
    """Generates match the following questions (ordered/shuffled) from text."""

    sentences = nltk.sent_tokenize(text)
    questions = []
    stop_words = set(stopwords.words('english'))

    for sentence in sentences:
        words = word_tokenize(sentence)
        tagged_words = pos_tag(words)

        eligible_words = []

        for word, tag in tagged_words:
            if (tag.startswith('NN') or tag.startswith('JJ') or
                tag.startswith('VB') or tag.startswith('RB')) and \
               word.lower() not in stop_words and len(word) > 2:
                eligible_words.append(word)

        if eligible_words:
            num_to_generate = min(len(eligible_words), num_questions)
            chosen_words = random.sample(eligible_words, num_to_generate)

            question_set = []
            shuffled_words = chosen_words[:]  # Create a copy to shuffle
            random.shuffle(shuffled_words)  # Shuffle the copy

            for word in chosen_words:
                question = sentence.replace(word, "______")
                question_set.append((question, word))

            questions.append((question_set, shuffled_words))

    return questions


# Example usage:
text = """The quick brown fox jumps over the lazy dog. This is a simple sentence. It contains various parts of speech. The cat sat on the mat. Artificial intelligence is a rapidly evolving field. Natural Language Processing is a subfield of Artificial Intelligence. It deals with the interaction between computers and human language."""

match_questions = generate_match_questions(text, 2)

for question_set, shuffled_words in match_questions:
    print("Match the following questions with the corresponding answers (using numbers):")
    print("-" * 40)

    print("Questions:")
    for i, (question, _) in enumerate(question_set):
        print(f"{i+1}. {question}")

    print("\nAnswers:")
    for i, word in enumerate(shuffled_words):
        print(f"{i+1}. {word}")

    print("-" * 40)
    print()  # Add an extra newline for spacing

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


Match the following questions with the corresponding answers (using numbers):
----------------------------------------
Questions:
1. The quick brown fox jumps over the lazy ______.
2. The ______ brown fox jumps over the lazy dog.

Answers:
1. dog
2. quick
----------------------------------------

Match the following questions with the corresponding answers (using numbers):
----------------------------------------
Questions:
1. This is a simple ______.
2. This is a ______ sentence.

Answers:
1. sentence
2. simple
----------------------------------------

Match the following questions with the corresponding answers (using numbers):
----------------------------------------
Questions:
1. It contains ______ parts of speech.
2. It contains various parts of ______.

Answers:
1. various
2. speech
----------------------------------------

Match the following questions with the corresponding answers (using numbers):
----------------------------------------
Questions:
1. The ______ sat on the mat