In [None]:
import nltk
import random
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')
from nltk.tokenize import word_tokenize
from nltk.tag import pos_tag
from nltk.corpus import stopwords

nltk.download('punkt', quiet=True)
nltk.download('averaged_perceptron_tagger', quiet=True)
nltk.download('stopwords', quiet=True)

def generate_match_questions(text, num_questions=3):  # Reduced default for matching
    """Generates match the following questions from text."""

    sentences = nltk.sent_tokenize(text)
    questions = []
    stop_words = set(stopwords.words('english'))

    for sentence in sentences:
        words = word_tokenize(sentence)
        tagged_words = pos_tag(words)

        eligible_words = []

        for word, tag in tagged_words:
            if (tag.startswith('NN') or tag.startswith('JJ') or
                tag.startswith('VB') or tag.startswith('RB')) and \
               word.lower() not in stop_words and len(word) > 2:
                eligible_words.append(word)

        if eligible_words:
            num_to_generate = min(len(eligible_words), num_questions)
            chosen_words = random.sample(eligible_words, num_to_generate)

            # Generate distractor answers (more challenging)
            distractors = []
            for _ in range(num_to_generate):  # One distractor per question
                distractor_sentence = random.choice(sentences) #Pick a random sentence
                distractor_words = [w for w, t in pos_tag(word_tokenize(distractor_sentence)) if (t.startswith('NN') or t.startswith('JJ') or t.startswith('VB') or t.startswith('RB')) and w.lower() not in stop_words and len(w) > 2 and w not in chosen_words] #Find a suitable word in the random sentence
                if distractor_words:
                  distractors.append(random.choice(distractor_words))
                else:
                  distractors.append("---") #If no suitable distractor found, use a placeholder


            question_set = []  # Questions and correct answers
            distractor_set = [] # Distractor answers

            for word in chosen_words:
                question = sentence.replace(word, "______")
                question_set.append((question, word))

            distractor_set = distractors #Use the generated distractors

            questions.append((question_set, distractor_set)) #Store the question set and distractor set together

    return questions




# Example usage:
text = """The quick brown fox jumps over the lazy dog. This is a simple sentence. It contains various parts of speech. The cat sat on the mat. Artificial intelligence is a rapidly evolving field. Natural Language Processing is a subfield of Artificial Intelligence. It deals with the interaction between computers and human language."""

match_questions = generate_match_questions(text, 2)

for question_set, distractor_set in match_questions:
  print("Match the following questions with the most appropriate answer:")
  for i, (question, answer) in enumerate(question_set):
    print(f"{i+1}. {question}")

  print("\nAnswers:")
  for i, (question, answer) in enumerate(question_set):
    print(f"{i+1}. {answer}")

  print("\nPossible Distractors:")
  for i, distractor in enumerate(distractor_set):
    print(f"{i+1}. {distractor}")
  print("-" * 40)

Match the following questions with the most appropriate answer:
1. The quick brown fox ______ over the lazy dog.
2. The quick ______ fox jumps over the lazy dog.

Answers:
1. jumps
2. brown

Possible Distractors:
1. sat
2. intelligence
----------------------------------------
Match the following questions with the most appropriate answer:
1. This is a simple ______.
2. This is a ______ sentence.

Answers:
1. sentence
2. simple

Possible Distractors:
1. evolving
2. rapidly
----------------------------------------
Match the following questions with the most appropriate answer:
1. It contains various parts of ______.
2. It ______ various parts of speech.

Answers:
1. speech
2. contains

Possible Distractors:
1. computers
2. parts
----------------------------------------
Match the following questions with the most appropriate answer:
1. The cat sat on the ______.
2. The ______ sat on the mat.

Answers:
1. mat
2. cat

Possible Distractors:
1. Artificial
2. Artificial
-----------------------

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger_eng is already up-to-
[nltk_data]       date!
