In [4]:
import spacy
import random
from collections import Counter

In [31]:
nlp = spacy.load('en_core_web_sm') #en_core_web_sm  is link which contains all the functions of npl



In [41]:
def generate_mcqs(text,num_questions=5):
  doc = nlp(text) #Convert text to doc
  sentences =[]
  for sent in doc.sents:  # Iterate over each sentence in the doc and separatly add it to senetences
      sentences.append(sent.text)
  selected_sentences = random.sample(sentences,min(num_questions,len(sentences))) #select num_questions of random sentences from all sentences
  mcqs = []

  for sentence in selected_sentences:
    sentence = sentence.lower() # convert it to lower as pos only works with lower
    sent_doc = nlp(sentence) #Convert text to doc object
    nouns = [token.text for token in sent_doc if token.pos_ == "NOUN"] #Extract nouns from each senetence and stores in list

    if len(nouns)<2:
      continue

    noun_count = Counter(nouns)
    if noun_count:
      subject = noun_count.most_common(1)[0][0] #Subject can be most frequent or which comes first
      question_stem = sentence.replace(subject,"___") #replace subject with ___

      fallback_nouns = ["grape", "mango", "peach", "pineapple", "strawberry"]

      answer_choices = [subject]  # Ensure the correct answer is included
      selected_distractors = set([subject])  # Track selected options

      # Generate distractors from available nouns
      for _ in range(3):
          remaining_nouns = list(set(nouns) - selected_distractors)

          if remaining_nouns:  # Use sentence nouns if available
              distractor = random.choice(remaining_nouns)
          else:  # Pick from fallback list if not enough nouns in sentence
              distractor = random.choice(list(set(fallback_nouns) - selected_distractors))

          answer_choices.append(distractor)
          selected_distractors.add(distractor)  # Mark as used

      # Ensure we have exactly 4 options
      while len(answer_choices) < 4:
          extra_distractor = random.choice(list(set(fallback_nouns) - selected_distractors))
          answer_choices.append(extra_distractor)
          selected_distractors.add(extra_distractor)

      random.shuffle(answer_choices)  # Shuffle options for randomness

      correct_answer = chr(64 + answer_choices.index(subject) + 1) #Gets option in alphabet from ascii of subject index
      mcqs.append((question_stem,answer_choices,correct_answer))
  return mcqs



In [42]:
text = """The main stem of the Ganges begins at the town of Devprayag,at the confluence of the Alaknanda, which is the source stream in hydrology on account of its greater length, and the Bhagirathi, which is considered the source stream in Hindu mythology.
The Ganges is a lifeline to hundreds of millions of people who
live in its basin and depend on it for their daily needs.
It has been important historically, with many former provincial or imperial capitals such as Pataliputra,Kannauj,Sonargaon, Dhaka, Bikrampur, Kara, Munger, Kashi, Patna, Hajipur, Kanpur, Delhi, Bhagalpur, Murshidabad, Baharampur, Kampilya, and Kolkata located on its banks or those of its tributaries and connected waterways. The river is home to approximately 140 species of fish, 90 species of amphibians, and also reptiles and mammals, including critically endangered species such as the gharial and South Asian river dolphin.The Ganges is the most sacred river to Hindus.
It is worshipped as the goddess Ganga in Hinduism."""

results = generate_mcqs(text,num_questions=5)

for i, mcq in enumerate(results):
  question_stem,answer_choices,correct_answer = mcq
  print(f"Q{i+1}: {question_stem}")

  for j,choice in enumerate(answer_choices):
    print(f"{chr(65+j)}. {choice}")
  print(f"Correct answer: {correct_answer}")
  print()

Q1: the river is home to approximately 140 ___ of fish, 90 ___ of amphibians, and also reptiles and mammals, including critically endangered ___ such as the gharial and south asian river dolphin.
A. species
B. reptiles
C. amphibians
D. mammals
Correct answer: A

Q2: the main stem of the ganges begins at the town of devprayag,at the confluence of the alaknanda, which is the ___ stream in hydrology on account of its greater length, and the bhagirathi, which is considered the ___ stream in hindu mythology.

A. town
B. hydrology
C. bhagirathi
D. source
Correct answer: D

Q3: the ___ is a lifeline to hundreds of millions of people who 
live in its basin and depend on it for their daily needs.

A. needs
B. ganges
C. people
D. hundreds
Correct answer: B

Q4: the ___ is the most sacred river to hindus.

A. river
B. hindus
C. grape
D. ganges
Correct answer: D

Q5: it has been important historically, with many former provincial or imperial ___ such as pataliputra,kannauj,sonargaon, dhaka, bikram