In [31]:
import spacy
import random
from collections import Counter
import re 

In [8]:
text = """
The Greek historian knew what he was talking about. The Nile River fed Egyptian civilization for hundreds of years. 
The Longest River the Nile is 4,160 miles long—the world’s longest river. It begins near the equator in Africa and 
flows north to the Mediterranean Sea. In the south the Nile churns with cataracts. A cataract is a waterfall. Near the 
sea the Nile branches into a delta. A delta is an area near a river’s mouth where the water deposits fine soil called silt. 
In the delta, the Nile divides into many streams. The river is called the upper Nile in the south and the lower Nile in the
north. For centuries, heavy rains in Ethiopia caused the Nile to flood every summer. The floods deposited rich soil along the 
Nile’s shores. This soil was fertile, which means it was good for growing crops. Unlike the Tigris and Euphrates,
the Nile River flooded at the same time every year, so farmers could predict when to plant their crops.
"""


In [34]:
num_questions = 5
nlp = spacy.load('en_core_web_md') # لنقدر نعرف وين كل حملة بتبلش وبتنتهي ونعرف معلومات عن كل  توكين
# Process the text with spacy
doc = nlp(text)

# Extract sentences from the text
sentences = [sent.text for sent in doc.sents]

# Randomly select sentences to form questions
selected_sentences = random.sample(sentences, min(num_questions, len(sentences)))


In [17]:
sentences

['\nThe Greek historian knew what he was talking about.',
 'The Nile River fed Egyptian civilization for hundreds of years. \n',
 'The Longest River the Nile is 4,160 miles long—the world’s longest river.',
 'It begins near the equator in Africa and \nflows north to the Mediterranean Sea.',
 'In the south the Nile churns with cataracts.',
 'A cataract is a waterfall.',
 'Near the \nsea the Nile branches into a delta.',
 'A delta is an area near a river’s mouth where the water deposits fine soil called silt. \n',
 'In the delta, the Nile divides into many streams.',
 'The river is called the upper Nile in the south and the lower Nile in the\nnorth.',
 'For centuries, heavy rains in Ethiopia caused the Nile to flood every summer.',
 'The floods deposited rich soil along the \nNile’s shores.',
 'This soil was fertile, which means it was good for growing crops.',
 'Unlike the Tigris and Euphrates,\nthe Nile River flooded at the same time every year, so farmers could predict when to plant t

In [49]:
def generate_mcqs_advanced(text, num_questions=5):
    if not text or not isinstance(text, str):
        return []
    doc = nlp(text)

    potential_distractors = [ent.text.lower() for ent in doc.ents if ent.label_ not in ['CARDINAL', 'DATE', 'QUANTITY']]
    nouns = [token.text.lower() for token in doc if token.pos_ == 'NOUN' and token.is_stop == False]
    potential_distractors.extend(list(set(nouns)))
    potential_distractors = list(set(potential_distractors))


    
    sentences = list(doc.sents)
    
    # نتأكد أن عدد الأسئلة لا يتجاوز عدد الجمل المتاحة
    num_questions = min(num_questions, len(sentences))
    
    # نخربط كل الجمل لنضمن العشوائية
    random.shuffle(sentences)
    
    # رح نمر على كل الجمل (بالترتيب المخربط) لحتى نعبي الحصة
    sentences_to_try = sentences 
    
    mcqs = []

    for sent in sentences_to_try:
        #  إذا وصلنا للعدد المطلوب، منوقف اللوب
        if len(mcqs) == num_questions:
            break
        subject = None
        
        if sent.ents:
            valid_ents = [ent for ent in sent.ents if ent.label_ not in ['CARDINAL', 'DATE', 'QUANTITY']]
            
            # هل بقي كيانات صالحة بعد الفلترة
            if valid_ents:
                # اذا إي منختار واحد منهم عشوائياً
                subject = random.choice(valid_ents)
        else:
            sent_nouns = [token for token in sent if token.pos_ == 'NOUN' and not token.is_stop]
            if sent_nouns:
                noun_counts = Counter(token.text for token in sent_nouns)
                subject_text = noun_counts.most_common(1)[0][0]
                subject = next((token for token in sent if token.text == subject_text), None)

        if not subject:
            continue
        question_stem = re.sub(r'\b' + re.escape(subject.text) + r'\b', "______", sent.text, count=1)

        if question_stem == sent.text:
            continue
            
        answer_choices = [subject.text]
        
        distractors = []
        subject_token = nlp(subject.text)[0] 
        
        sorted_distractors = sorted(
            potential_distractors,
            key=lambda x: subject_token.similarity(nlp(x)[0]) if x and nlp(x).has_vector else 0,
            reverse=True
        )

        for dist in sorted_distractors:
            # نضيف المشتت إذا كان مختلفاً عن الجواب ومشابهاً له بالمعنى
            if dist.lower() != subject.text.lower() and len(distractors) < 3:
                distractors.append(dist)
        
        # في حال لم نجد مشتتات مشابهة كفاية، نملأ الباقي عشوائياً
        remaining_distractors = [d for d in potential_distractors if d.lower() != subject.text.lower() and d not in distractors]
        while len(distractors) < 3 and remaining_distractors:
            distractors.append(random.choice(remaining_distractors))
            # نزيل التكرار مرة أخرى
            distractors = list(set(distractors))
            remaining_distractors = [d for d in remaining_distractors if d not in distractors]


        answer_choices.extend(distractors)
        random.shuffle(answer_choices)
        
        # قد لا تحتوي القائمة على 4 خيارات إذا كان بنك المشتتات صغيراً
        if len(answer_choices) < 2:
            continue

        correct_answer_char = chr(65 + answer_choices.index(subject.text))
        mcqs.append((question_stem, answer_choices, correct_answer_char))

    return mcqs

In [50]:
tech_text = """
The universe is vast and filled with mysteries that continue to captivate scientists and astronomers alike. From the depths of space to the farthest reaches of distant galaxies, the cosmos holds countless wonders waiting to be explored.

One of the fundamental concepts in astrophysics is the Big Bang theory, which posits that the universe originated from a singular, infinitely dense point nearly 13.8 billion years ago. Over time, the universe expanded and cooled, giving rise to the formation of galaxies, stars, and planets.

Galaxies are immense systems containing billions or even trillions of stars, as well as various types of interstellar matter such as gas, dust, and dark matter. The Milky Way, our home galaxy, is a spiral galaxy containing hundreds of billions of stars, including our own Sun.

Stars are the celestial objects that shine brightly in the night sky, fueled by nuclear fusion reactions occurring in their cores. They come in a variety of sizes, colors, and temperatures, with some stars being much larger and hotter than others. The life cycle of a star depends on its mass, with massive stars undergoing supernova explosions at the end of their lives, while smaller stars like our Sun eventually evolve into white dwarfs.

Planets orbit stars and come in different types, including terrestrial planets like Earth, gas giants like Jupiter, and icy worlds like Neptune. In our solar system, eight planets revolve around the Sun, each with its own unique characteristics and features.

Space exploration has allowed humanity to venture beyond Earth and explore the cosmos firsthand. Missions to the Moon, Mars, and beyond have expanded our understanding of the universe and laid the groundwork for future exploration and colonization of other worlds.

The search for extraterrestrial life is a central focus of space exploration, driven by the desire to uncover whether life exists beyond Earth. Scientists study the conditions on other planets and moons in our solar system, as well as exoplanets orbiting distant stars, in the hope of finding signs of life elsewhere in the universe.

The study of black holes, mysterious regions of spacetime where gravity is so strong that nothing, not even light, can escape, is another area of active research in astrophysics. Black holes come in various sizes, from stellar-mass black holes formed from the collapse of massive stars to supermassive black holes that lurk at the centers of galaxies.

Cosmology, the scientific study of the origin, evolution, and eventual fate of the universe, seeks to answer some of the most profound questions about our existence. By analyzing cosmic microwave background radiation, the distribution of galaxies, and the structure of the universe on the largest scales, cosmologists aim to unravel the mysteries of the cosmos and our place within it.

"""
mcqs = generate_mcqs_advanced(tech_text, num_questions=6)
# Ensure each MCQ is formatted correctly as (question_stem, answer_choices, correct_answer)
mcqs_with_index = [(i + 1, mcq) for i, mcq in enumerate(mcqs)]

for question in mcqs_with_index:
    print("Question", question[0], ":", question[1][0])
    print("Options:")
    options = question[1][1]
    for i, option in enumerate(options):
        print(f"{chr(97 + i)}) {option}")
    print("Correct Answer:", question[1][2])
    print("\n")

Question 1 : The ______ of black holes, mysterious regions of spacetime where gravity is so strong that nothing, not even light, can escape, is another area of active research in astrophysics.
Options:
a) gravity
b) research
c) concepts
d) study
Correct Answer: D


Question 2 : By analyzing cosmic ______ background radiation, the distribution of galaxies, and the structure of the universe on the largest scales, cosmologists aim to unravel the mysteries of the cosmos and our place within it.


Options:
a) night
b) microwave
c) dust
d) hope
Correct Answer: B


Question 3 : They come in a ______ of sizes, colors, and temperatures, with some stars being much larger and hotter than others.
Options:
a) variety
b) years
c) types
d) sizes
Correct Answer: A


Question 4 : The search for extraterrestrial life is a central focus of space exploration, driven by the desire to uncover whether life exists beyond ______.
Options:
a) depths
b) lives
c) life
d) Earth
Correct Answer: D


Question 5 : The