In [24]:
!pip install sentence-transformers
!pip install sentence-transformers spacy scikit-learn nltk
!python -m nltk.downloader punkt stopwords
!python -m spacy download en_core_web_sm


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m44.1 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [25]:
import nltk
import numpy as np
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
from sentence_transformers import SentenceTransformer
from sklearn.cluster import KMeans
from collections import Counter
import spacy

In [26]:
# Setup
nltk.download("punkt")
nltk.download("stopwords")
stop_words = set(stopwords.words("english"))
nlp = spacy.load("en_core_web_sm")

# Load small semantic transformer
sim_model = SentenceTransformer("all-MiniLM-L6-v2")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [19]:
from sentence_transformers import SentenceTransformer
sim_model = SentenceTransformer("all-MiniLM-L6-v2")

def embed(sentences):
    return sim_model.encode(sentences)


In [51]:
# --- Utility Functions ---
def estimate_token_count(text):
    return int(len(word_tokenize(text)) * 1.3)

def cosine_sim(a, b):
    return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))

def score_sentence(sentence, word_freq):
    words = [w.lower() for w in word_tokenize(sentence) if w.isalpha() and w not in stop_words]
    return sum(word_freq.get(w, 0) for w in words)

def clean_sentence(s):
    s = s.strip()
    return s[0].upper() + s[1:] if s else s

def extract_svo(sentence):
    doc = nlp(sentence)
    subj, verb, obj = None, None, None
    for token in doc:
        if token.dep_ in ("nsubj", "nsubjpass"):
            subj = token.text
        elif token.pos_ == "VERB":
            verb = token.text
        elif token.dep_ in ("dobj", "pobj", "attr"):
            obj = token.text
    return subj, verb, obj

# --- Unified Summarizer ---
def summarize(text, top_n=7, chunk_sent_limit=15, overlap=2, token_threshold=512):
    sentences = sent_tokenize(text)
    token_count = estimate_token_count(text)

    # Recursive chunking if text is too long
    if token_count > token_threshold or len(sentences) > chunk_sent_limit * 2:
        chunks = []
        i = 0
        while i < len(sentences):
            chunk = sentences[i:i + chunk_sent_limit]
            chunks.append(" ".join(chunk))
            i += chunk_sent_limit - overlap

        partial_summaries = [summarize(chunk, top_n=top_n) for chunk in chunks]
        final_input = " ".join(partial_summaries)
        return summarize(final_input, top_n=max(3, top_n // 2))  # Shrink top_n on recursion

    # Normal summarization
    if len(sentences) == 0:
        return ""

    embeddings = sim_model.encode(sentences)
    n = len(sentences)
    est_k = max(3, min(n, token_count // 60))
    Ks = list(range(est_k, min(est_k + 4, n)))

    inertias = []
    for k in Ks:
        km = KMeans(n_clusters=k, random_state=0).fit(embeddings)
        inertias.append(km.inertia_)
    deltas = np.diff(inertias)
    k = Ks[np.argmin(deltas)] if len(deltas) > 0 else Ks[0]

    kmeans = KMeans(n_clusters=k, random_state=0).fit(embeddings)
    centers = kmeans.cluster_centers_

    candidate_indices = []
    for center in centers:
        idx = np.argmin([np.linalg.norm(center - emb) for emb in embeddings])
        candidate_indices.append(idx)

    word_freq = Counter([
        w.lower() for sent in sentences for w in word_tokenize(sent)
        if w.isalpha() and w not in stop_words
    ])
    ranked = sorted(candidate_indices, key=lambda i: score_sentence(sentences[i], word_freq), reverse=True)
    selected = sorted(ranked[:top_n])

    summary_sentences = [clean_sentence(sentences[i]) for i in selected]
    return " ".join(summary_sentences)


In [54]:
text = """There are times when the night sky glows with bands of color. The bands may begin as cloud shapes and then spread into a great arc across the entire sky. They may fall in folds like a curtain drawn across the heavens. The lights usually grow brighter, then suddenly dim. During this time the sky glows with pale yellow, pink, green, violet, blue, and red. These lights are called the Aurora Borealis. Some people call them the Northern Lights. Scientists have been watching them for hundreds of years. They are not quite sure what causes them. In ancient times people were afraid of the Lights. They imagined that they saw fiery dragons in the sky. Some even concluded that the heavens were on fire."""

output = summarize(text)
print(output)


There are times when the night sky glows with bands of color. They may fall in folds like a curtain drawn across the heavens. Some people call them the Northern Lights.


In [56]:
long_input = """Narration means the art of storytelling, and the purpose of narrative writing is to tell stories. Any time you tell a story to a friend or family member about an event or incident in your day, you engage in a form of narration. In addition, a narrative can be factual or fictional. A factual story is one that is based on, and tries to be faithful to, actual events as they unfolded in real life. A fictional story is a made-up, or imagined, story; the writer of a fictional story can create characters and events as he or she sees fit.The big distinction between factual and fictional narratives is based on a writer’s purpose. The writers of factual stories try to recount events as they actually happened, but writers of fictional stories can depart from real people and events because the writers’ intents are not to retell a real-life event. Biographies and memoirs are examples of factual stories, whereas novels and short stories are examples of fictional stories.Because the line between fact and fiction can often blur, it is helpful to understand what your purpose is from the beginning. Is it important that you recount history, either your own or someone else’s? Or does your interest lie in reshaping the world in your own image—either how you would like to see it or how you imagine it could be? Your answers will go a long way in shaping the stories you tell. Ultimately, whether the story is fact or fiction, narrative writing tries to relay a series of events in an emotionally engaging way. You want your audience to be moved by your story, which could mean through laughter, sympathy, fear, anger, and so on. The more clearly you tell your story, the more emotionally engaged your audience is likely to be. When writing a narrative essay, start by asking yourself if you want to write a factual or fictional story. Then freewrite, brainstorm, or mindmap about topics that are of general interest to you. For more information about pre-writing, review the materials in “My Writing Process – Prewriting and Draft.” Once you have a general idea of what you will be writing about, you should sketch out the major events of the story that will compose your plot. Typically, these events will be revealed chronologically and climax at a central conflict that must be resolved by the end of the story. The use of strong details is crucial as you describe the events and characters in your narrative. You want the reader to emotionally engage with the world that you create in writing. To create strong details, keep the human senses in mind. You want your reader to be immersed in the world that you create, so focus on details related to sight, sound, smell, taste, and touch as you describe people, places, and events in your narrative. As always, it is important to start with a strong introduction to hook your reader into wanting to read more. Try opening the essay with an event that is interesting to introduce the story and get it going. Finally, your conclusion should help resolve the central conflict of the story and impress upon your reader the ultimate theme of the piece."""

output = summarize(long_input)
print(output)


Narration means the art of storytelling, and the purpose of narrative writing is to tell stories. A factual story is one that is based on, and tries to be faithful to, actual events as they unfolded in real life. Ultimately, whether the story is fact or fiction, narrative writing tries to relay a series of events in an emotionally engaging way. You want your audience to be moved by your story, which could mean through laughter, sympathy, fear, anger, and so on. When writing a narrative essay, start by asking yourself if you want to write a factual or fictional story. Typically, these events will be revealed chronologically and climax at a central conflict that must be resolved by the end of the story. The use of strong details is crucial as you describe the events and characters in your narrative.
