In [6]:
import spacy
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

def preprocess_text(text):
    """Splits text into sentences while maintaining structure and meaning."""
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]
    return sentences

def extract_key_phrases(text):
    """Extracts key phrases using Named Entity Recognition (NER) and noun chunks."""
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    key_phrases = set()

    for ent in doc.ents:
        key_phrases.add(ent.text)

    for chunk in doc.noun_chunks:
        key_phrases.add(chunk.text)

    return sorted(key_phrases, key=lambda x: len(x.split()), reverse=True)

def determine_flashcard_count(text):
    """Determines the number of flashcards based on text length."""
    word_count = len(text.split())
    if word_count < 100:
        return 3
    elif 100 <= word_count < 300:
        return 5
    elif 300 <= word_count < 600:
        return 7
    else:
        return 10

def compute_sentence_scores(sentences):
    """Computes sentence importance using TF-IDF weighting."""
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(sentences)
    sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
    return sentence_scores

def textrank_summary(text, top_n):
    """Ranks sentences using a hybrid of TextRank and TF-IDF for optimal summarization."""
    sentences = preprocess_text(text)
    if not sentences:
        return []

    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(sentences)
    similarity_matrix = cosine_similarity(embeddings)
    graph = nx.from_numpy_array(similarity_matrix)
    textrank_scores = nx.pagerank(graph)

    tfidf_scores = compute_sentence_scores(sentences)

    combined_scores = {i: textrank_scores[i] + tfidf_scores[i] for i in range(len(sentences))}
    ranked_sentences = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)

    selected_sentences = []
    seen_phrases = set()

    for i, _ in ranked_sentences:
        sentence = sentences[i]
        key_phrases = set(sentence.lower().split()[:5])
        if not any(key_phrases & seen for seen in seen_phrases):
            selected_sentences.append(sentence)
            seen_phrases.add(frozenset(key_phrases))
        if len(selected_sentences) >= top_n:
            break

    return selected_sentences

def generate_flashcards(text):
    """Generates concise, key-phrase-based flashcards dynamically."""
    num_flashcards = determine_flashcard_count(text)
    key_sentences = textrank_summary(text, num_flashcards)
    key_phrases = extract_key_phrases(text)

    flashcards = {}
    used_phrases = set()

    for sentence in key_sentences:
        relevant_phrase = None
        for phrase in key_phrases:
            if phrase.lower() in sentence.lower() and phrase not in used_phrases:
                relevant_phrase = phrase
                used_phrases.add(phrase)
                break

        if not relevant_phrase or len(relevant_phrase.split()) < 2:  # Ensure meaningful selections
            relevant_phrase = sentence.split('.')[0]

        flashcards[relevant_phrase] = sentence

    return flashcards

# Get user input
input_text = input("Enter your text to generate flashcards:\n")

# Generate flashcards
flashcards = generate_flashcards(input_text)

# Print flashcards
print("\n📚 Generated Flashcards:\n")
for key_phrase, answer in flashcards.items():
    print(f"{answer}\n")



📚 Generated Flashcards:

It is the only instance in commercial nuclear power history where radiation-related fatalities occurred.[9][10] As of 2005, 6000 cases of childhood thyroid cancer occurred within the affected populations, "a large fraction" being attributed to the disaster.[11] The United Nations Scientific Committee on the Effects of Atomic Radiation estimates fewer than 100 deaths have resulted from the fallout.[12] Predictions of the eventual total death toll vary; a 2006 World Health Organization study projected 9,000 cancer-related fatalities in Ukraine, Belarus, and Russia.[13]  Pripyat was abandoned and replaced by the purpose-built city of Slavutych.

In case of a total power loss, each of Chernobyl's reactors had three backup diesel generators, but they took 60–75 seconds to reach full load and generate the 5.5 MW needed to run one main pump.[18]: 15  Special counterweights on each pump provided coolant via inertia to bridge the gap to generator startup.[19][20]

RBMK