In [None]:
!pip install transformers datasets rouge-score nltk spacy scikit-learn
!python -m nltk.downloader punkt
!python -m spacy download en_core_web_sm


In [None]:
from datasets import load_dataset

dataset = load_dataset("cnn_dailymail", '3.0.0')
sample = dataset['train'][0]
print("Article:\n", sample['article'])
print("\nReference Summary:\n", sample['highlights'])


In [None]:
import re
import nltk
from nltk.tokenize import sent_tokenize

def preprocess_text(text):
    text = re.sub(r'\s+', ' ', text)  
    text = re.sub(r'\[[^\]]*\]', '', text)  
    return sent_tokenize(text)


In [None]:
import spacy
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nlp = spacy.load("en_core_web_sm")

def extractive_summary(text, num_sentences=3):
    sentences = preprocess_text(text)
    tfidf = TfidfVectorizer().fit_transform(sentences)
    similarity_matrix = cosine_similarity(tfidf, tfidf)
    scores = similarity_matrix.sum(axis=1)
    ranked_sentences = [sentences[i] for i in np.argsort(scores, axis=0)[-num_sentences:][::-1]]
    return ' '.join(ranked_sentences)

# Example usage:
article_text = sample['article']
print("Extractive Summary:\n", extractive_summary(article_text))


In [None]:
from transformers import pipeline

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def abstractive_summary(text):
    summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
    return summary[0]['summary_text']

# Example usage:
print("Abstractive Summary:\n", abstractive_summary(article_text[:1024]))


In [None]:
from datasets import load_metric

rouge = load_metric("rouge")

generated = abstractive_summary(article_text[:1024])
reference = sample['highlights']

results = rouge.compute(predictions=[generated], references=[reference])
print("ROUGE Evaluation:\n", results)


In [None]:
## ✅ Conclusion

This project demonstrates two approaches to text summarization:

- **Extractive Summarization** selects important sentences using TF-IDF and cosine similarity.
- **Abstractive Summarization** uses a pre-trained deep learning model (BART) to generate human-like summaries.

Further enhancements can include:
- Fine-tuning models for better performance
- GUI/web app for real-world usage
