<a href="https://colab.research.google.com/github/Chinmay3775/Mini-Project-II/blob/main/flashcard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import spacy
import numpy as np
import networkx as nx
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

def preprocess_text(text):
    """Removes greetings, stopwords, and unnecessary parts while maintaining readability."""
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)
    sentences = [sent.text.strip() for sent in doc.sents]

    # Use lemmatization instead of direct stopword removal for better semantics
    stopwords = nlp.Defaults.stop_words
    processed_sentences = []

    for sent in sentences:
        tokens = [token.lemma_ for token in nlp(sent) if token.text.lower() not in stopwords and token.is_alpha]
        cleaned_sent = " ".join(tokens)
        if cleaned_sent:
            processed_sentences.append(cleaned_sent)

    return processed_sentences if processed_sentences else sentences  # Ensure we don't return an empty list

def textrank_summary(text, top_n=5):
    """Ranks sentences using a semantic similarity approach and extracts the most important ones."""
    sentences = preprocess_text(text)
    if not sentences:
        return []

    model = SentenceTransformer('all-MiniLM-L6-v2')  # Better semantic representation
    embeddings = model.encode(sentences)

    similarity_matrix = cosine_similarity(embeddings)
    graph = nx.from_numpy_array(similarity_matrix)
    scores = nx.pagerank(graph)
    ranked_sentences = sorted(((scores[i], s) for i, s in enumerate(sentences)), reverse=True)

    return [s[1] for s in ranked_sentences[:min(top_n, len(sentences))]]

def extract_flashcards(text, num_flashcards=5):
    """Generates flashcards with key points from any text, improving coherence."""
    key_sentences = textrank_summary(text, num_flashcards)
    flashcards = {f"Flashcard {i+1}": sentence.capitalize() for i, sentence in enumerate(key_sentences)}
    return flashcards

# Example usage
input_text = """SECURITY ATTACKS
Passive Attacks
Passive attacks are in the nature of eavesdropping on, or monitoring of, transmissions.
The goal of the opponent is to obtain information that is being transmitted. Two types
of passive attacks are the release of message content and traffic analysis. The release
of message contents is easily understood (Figure 1.2a).
A telephone conversation, an electronic mail message, and a transferred file may
contain sensitive or confidential information.
A second type of passive attack, traffic analysis, is subtler (Figure 1.2b). Suppose that
we had a way of masking the contents of messages or other information traffic so that
opponents, even if they captured the message, could not extract the information from
the message.
Passive attacks are very difficult to detect because they do not involve any alteration
of the data. Typically, the message traffic is sent and received in a normal fashion, and
neither the sender nor receiver is aware that a third party has read the messages or
observed the traffic pattern.
Active Attacks
Active attacks involve some modification of the data stream or the creation of a false
stream and can be subdivided into four categories: masquerade, replay, modification
of messages, and denial of service.
A masquerade takes place when one entity pretends to be a different entity (Figure
1.3a). A masquerade attack usually includes one of the other forms of active attack.
For example, authentication sequences can be captured and replayed after a valid
authentication sequence has taken place, thus enabling an authorized entity with few
privileges to obtain extra privileges by impersonating an entity that has those
privileges.
Replay involves the passive capture of a data unit and its subsequent retransmission
to produce an unauthorized effect (Figure 1.3b).
Modification of messages simply means that some portion of a legitimate message is
altered, or that messages are delayed or reordered, to produce an unauthorized effect
(Figure 1.3c). For example, a message meaning “Allow John Smith to read confidential
file accounts” is modified to mean “Allow Fred Brown to read confidential file
accounts.”
The denial of service prevents or inhibits the normal use or management of
communications facilities (Figure 1.3d). This attack may have a specific target
Active attacks present the opposite characteristics of passive attacks. Whereas passive
attacks are difficult to detect, measures are available to prevent their success.
"""
flashcards = extract_flashcards(input_text)
for key, value in flashcards.items():
    print(f"{key}: {value}")

Flashcard 1: Type passive attack release message content traffic analysis
Flashcard 2: Active attacks active attack involve modification data stream creation false stream subdivide category masquerade replay modification message denial service
Flashcard 3: Second type passive attack traffic analysis subtle figure
Flashcard 4: Passive attack difficult detect measure available prevent success
Flashcard 5: Suppose way mask content message information traffic opponent capture message extract information message
