ExtractiveSummarizer

In [4]:
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import sent_tokenize, word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')

def text_summarizer(text, num_sentences=3):
    """
    Summarizes the given text using extractive summarization.

    Parameters:
    - text (str): The input text to summarize.
    - num_sentences (int): The number of sentences to include in the summary.

    Returns:
    - str: The summarized text.
    """
    # Tokenize the text into sentences
    sentences = sent_tokenize(text)

    if len(sentences) <= num_sentences:
        return text  # If text has fewer sentences than requested summary length

    # Preprocess the text
    stop_words = set(stopwords.words('english'))
    preprocessed_sentences = [
        ' '.join([word.lower() for word in word_tokenize(sentence) if word.isalnum() and word.lower() not in stop_words])
        for sentence in sentences
    ]

    # Compute TF-IDF scores for sentences
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(preprocessed_sentences)

    # Calculate sentence scores by summing TF-IDF values
    sentence_scores = np.array(tfidf_matrix.sum(axis=1)).flatten()

    # Rank sentences by their scores
    ranked_sentences = [sentences[i] for i in np.argsort(-sentence_scores)]

    # Select the top N sentences for the summary
    summary = ' '.join(ranked_sentences[:num_sentences])

    return summary

if __name__ == "__main__":
    # Example text input
    input_text = (
        """Climate change refers to significant changes in global temperatures 
        and weather patterns over time. While climate change is a natural
        phenomenon, scientific evidence shows that human activities, particularly since the Industrial Revolution, have accelerated its pace. The burning of fossil fuels like coal, oil, and gas for energy has significantly increased the concentration of greenhouse gases in the atmosphere, leading to global warming. Deforestation,
        industrial processes, and some agricultural practices also 
        contribute to the problem. The impacts of climate change are 
        widespread and severe, including rising sea levels, more frequent and 
        intense heatwaves, changes in precipitation patterns, and increased 
        risks of extreme weather events such as hurricanes and floods.
        Addressing climate change requires coordinated global efforts to reduce greenhouse
        gas emissions, transition to renewable energy sources, and implement
        sustainable practices across industries."""
    )

    # Call the summarizer
    summary = text_summarizer(input_text)

    # Print the result
    print("Original Text:\n", input_text)
    print("\nSummarized Text:\n", summary)


[nltk_data] Downloading package punkt to /usr/share/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /usr/share/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Original Text:
 Climate change refers to significant changes in global temperatures 
        and weather patterns over time. While climate change is a natural
        phenomenon, scientific evidence shows that human activities, particularly since the Industrial Revolution, have accelerated its pace. The burning of fossil fuels like coal, oil, and gas for energy has significantly increased the concentration of greenhouse gases in the atmosphere, leading to global warming. Deforestation,
        industrial processes, and some agricultural practices also 
        contribute to the problem. The impacts of climate change are 
        widespread and severe, including rising sea levels, more frequent and 
        intense heatwaves, changes in precipitation 

Abstractive Summarizer

In [5]:
from transformers import T5ForConditionalGeneration, T5Tokenizer
import torch

def abstractive_text_summarizer(text, model_name="t5-small", max_length=150, min_length=30):
    """
    Summarizes the given text using an abstractive summarization model.

    Parameters:
    - text (str): The input text to summarize.
    - model_name (str): The pre-trained model to use for summarization. Default is "t5-small".
    - max_length (int): Maximum length of the summary.
    - min_length (int): Minimum length of the summary.

    Returns:
    - str: The summarized text.
    """
    # Load the model and tokenizer
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    model = T5ForConditionalGeneration.from_pretrained(model_name)

    # Preprocess the input text for T5
    input_text = "summarize: " + text.strip()
    input_ids = tokenizer.encode(input_text, return_tensors="pt", max_length=512, truncation=True)

    # Generate the summary
    summary_ids = model.generate(
        input_ids,
        max_length=max_length,
        min_length=min_length,
        length_penalty=2.0,
        num_beams=4,
        early_stopping=True
    )

    # Decode and return the summary
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

if __name__ == "__main__":
    # Example text input
    input_text = (
        """Climate change refers to significant changes in global temperatures 
        and weather patterns over time. While climate change is a natural
        phenomenon, scientific evidence shows that human activities, particularly since the Industrial Revolution, have accelerated its pace. The burning of fossil fuels like coal, oil, and gas for energy has significantly increased the concentration of greenhouse gases in the atmosphere, leading to global warming. Deforestation,
        industrial processes, and some agricultural practices also 
        contribute to the problem. The impacts of climate change are 
        widespread and severe, including rising sea levels, more frequent and 
        intense heatwaves, changes in precipitation patterns, and increased 
        risks of extreme weather events such as hurricanes and floods.
        Addressing climate change requires coordinated global efforts to reduce greenhouse
        gas emissions, transition to renewable energy sources, and implement
        sustainable practices across industries."""
    )

    # Call the abstractive summarizer
    summary = abstractive_text_summarizer(input_text)

    # Print the result
    print("Original Text:\n", input_text)
    print("\nSummarized Text:\n", summary)


tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

Original Text:
 Climate change refers to significant changes in global temperatures 
        and weather patterns over time. While climate change is a natural
        phenomenon, scientific evidence shows that human activities, particularly since the Industrial Revolution, have accelerated its pace. The burning of fossil fuels like coal, oil, and gas for energy has significantly increased the concentration of greenhouse gases in the atmosphere, leading to global warming. Deforestation,
        industrial processes, and some agricultural practices also 
        contribute to the problem. The impacts of climate change are 
        widespread and severe, including rising sea levels, more frequent and 
        intense heatwaves, changes in precipitation patterns, and increased 
        risks of extreme weather events such as hurricanes and floods.
        Addressing climate change requires coordinated global efforts to reduce greenhouse
        gas emissions, transition to renewable energy