In [11]:
import spacy
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

def preprocess_text(text):
    """Splits text into sentences while maintaining structure and meaning."""
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]
    return sentences

def extract_key_phrases(text):
    """Extracts key phrases using Named Entity Recognition (NER) and noun chunks."""
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    key_phrases = set()

    for ent in doc.ents:
        key_phrases.add(ent.text)

    for chunk in doc.noun_chunks:
        key_phrases.add(chunk.text)

    return sorted(key_phrases, key=lambda x: len(x.split()), reverse=True)  # Prioritize longer, more meaningful phrases

def determine_flashcard_count(text):
    """Determines the number of flashcards based on text length."""
    word_count = len(text.split())
    if word_count < 100:
        return 3
    elif 100 <= word_count < 300:
        return 5
    elif 300 <= word_count < 600:
        return 7
    else:
        return 10

def compute_sentence_scores(sentences):
    """Computes sentence importance using TF-IDF weighting."""
    vectorizer = TfidfVectorizer(stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(sentences)
    sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()
    return sentence_scores

def textrank_summary(text, top_n):
    """Ranks sentences using a hybrid of TextRank and TF-IDF for optimal summarization."""
    sentences = preprocess_text(text)
    if not sentences:
        return []

    model = SentenceTransformer('all-MiniLM-L6-v2')
    embeddings = model.encode(sentences)
    similarity_matrix = cosine_similarity(embeddings)
    graph = nx.from_numpy_array(similarity_matrix)
    textrank_scores = nx.pagerank(graph)

    tfidf_scores = compute_sentence_scores(sentences)

    combined_scores = {i: textrank_scores[i] + tfidf_scores[i] for i in range(len(sentences))}
    ranked_sentences = sorted(combined_scores.items(), key=lambda x: x[1], reverse=True)

    selected_sentences = []
    seen_phrases = set()

    for i, _ in ranked_sentences:
        sentence = sentences[i]
        key_phrases = set(sentence.lower().split()[:5])
        if not any(key_phrases & seen for seen in seen_phrases):
            selected_sentences.append(sentence)
            seen_phrases.add(frozenset(key_phrases))
        if len(selected_sentences) >= top_n:
            break

    return selected_sentences

def generate_flashcards(text):
    """Generates concise, question-answer format flashcards dynamically."""
    num_flashcards = determine_flashcard_count(text)
    key_sentences = textrank_summary(text, num_flashcards)
    key_phrases = extract_key_phrases(text)

    flashcards = {}
    used_phrases = set()

    for sentence in key_sentences:
        relevant_phrase = None
        for phrase in key_phrases:
            if phrase.lower() in sentence.lower() and phrase not in used_phrases:
                relevant_phrase = phrase
                used_phrases.add(phrase)
                break

        if not relevant_phrase or len(relevant_phrase.split()) < 2:  # Ensure meaningful questions
            relevant_phrase = "this concept"

        question = f"What is {relevant_phrase}?"
        answer = sentence.split('.')[0]  # Take only the first sentence for brevity
        flashcards[question] = answer

    return flashcards

# Get user input
input_text = input("Enter your text to generate flashcards:\n")

# Generate flashcards
flashcards = generate_flashcards(input_text)

# Print flashcards
print("\n\U0001F4DA Generated Flashcards:\n")
for question, answer in flashcards.items():
    print(f"Q: {question}\nA: {answer}\n")


Enter your text to generate flashcards:
Chandrayaan-3 (CHUN-drə-YAHN /ˌtʃʌndrəˈjɑːn/) is the third mission in the Chandrayaan programme, a series of lunar-exploration missions developed by the Indian Space Research Organisation (ISRO).[11] The mission consists of a Vikram lunar lander and a Pragyan lunar rover. It was launched on July 14, 2023, at 2:35 PM IST (14:35 Hrs IST) from the Satish Dhawan Space Centre (SDSC) in Sriharikota. The spacecraft entered lunar orbit on 5 August, and India became the first country to touch down near the lunar south pole, at 69°S, the southernmost lunar landing [12] on 23 August 2023 at 18:04 IST (12:33 UTC), ISRO became the first agency to land near the south pole of the moon in its first attempt and overall the fourth space agency to successfully land on the Moon, after USSR, NASA and the CNSA.[13][note 1]  The spacecraft entered lunar orbit on 5 August, and became the first lander to touch down near the lunar south pole[12] on 23 August at 18:03 IST 