Start

In [18]:
!pip install nltk spacy gensim scikit-learn numpy networkx sumy pyLDAvis rouge-score datasets
!python -m spacy download en_core_web_sm

Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


In [19]:
import nltk
import pandas as pd
from datasets import load_dataset

# Download required NLTK data
nltk.download('punkt_tab')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Load CNN/DailyMail dataset
dataset = load_dataset("cnn_dailymail", "3.0.0")

# Create development sample
def create_development_sample(dataset, sample_size=50):
    train_sample = dataset['train'].select(range(sample_size))
    articles = train_sample['article']
    highlights = train_sample['highlights']
    ids = train_sample['id']
    return articles, highlights, ids

# Create sample
articles, highlights, ids = create_development_sample(dataset, 50)
print(f"Created development sample with {len(articles)} articles")

[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Created development sample with 50 articles


In [20]:
import re
import numpy as np
from nltk.tokenize import sent_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

class RobustTextPreprocessor:
    def __init__(self):
        try:
            self.stop_words = set(stopwords.words('english'))
            self.lemmatizer = WordNetLemmatizer()
        except:
            self.stop_words = set()
            self.lemmatizer = WordNetLemmatizer()

    def clean_text(self, text):
        if not isinstance(text, str):
            return ""
        try:
            text = re.sub(r'\s+', ' ', text)
            text = re.sub(r'[^a-zA-Z\s\.\!\?]', '', text)
            text = text.lower()
            return text.strip()
        except:
            return ""

    def preprocess_sentence(self, sentence):
        try:
            cleaned = self.clean_text(sentence)
            if not cleaned:
                return ""
            words = cleaned.split()
            words = [self.lemmatizer.lemmatize(word) for word in words
                    if word not in self.stop_words and len(word) > 2]
            return ' '.join(words)
        except:
            return ""

    def prepare_single_article(self, article, summary):
        try:
            original_sentences = sent_tokenize(article)
            preprocessed_sentences = []
            valid_original_sentences = []

            for sent in original_sentences:
                processed_sent = self.preprocess_sentence(sent)
                if processed_sent and len(processed_sent.split()) >= 3:
                    preprocessed_sentences.append(processed_sent)
                    valid_original_sentences.append(sent)

            if len(valid_original_sentences) >= 3:
                return {
                    'original_sentences': valid_original_sentences,
                    'preprocessed_sentences': preprocessed_sentences,
                    'reference_summary': summary,
                    'num_sentences': len(valid_original_sentences)
                }
            else:
                return None
        except Exception as e:
            print(f"Error processing article: {e}")
            return None

# Initialize preprocessor and process data
preprocessor = RobustTextPreprocessor()
processed_data = []

for i, (article, highlight) in enumerate(zip(articles, highlights)):
    result = preprocessor.prepare_single_article(article, highlight)
    if result:
        result['id'] = ids[i]
        processed_data.append(result)

print(f"Successfully processed {len(processed_data)} out of {len(articles)} articles")
if processed_data:
    print(f"First article has {processed_data[0]['num_sentences']} sentences")

Successfully processed 50 out of 50 articles
First article has 22 sentences


In [21]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.cluster import KMeans
from sklearn.metrics.pairwise import cosine_similarity
from gensim import corpora, models
import networkx as nx

class RobustHybridSummarizer:
    def __init__(self, summary_ratio=0.3):
        self.summary_ratio = summary_ratio
        self.vectorizer = TfidfVectorizer(max_features=2000, min_df=1, max_df=0.8)

    def get_sentence_vectors(self, preprocessed_sentences):
        if len(preprocessed_sentences) < 2:
            return np.ones((len(preprocessed_sentences), 10))
        try:
            vectors = self.vectorizer.fit_transform(preprocessed_sentences).toarray()
            return vectors
        except:
            return np.random.rand(len(preprocessed_sentences), 10)

    def perform_kmeans(self, sentence_vectors, n_clusters=None):
        if len(sentence_vectors) < 2:
            return [0] * len(sentence_vectors)
        if n_clusters is None:
            n_clusters = max(2, min(5, int(np.sqrt(len(sentence_vectors)))))
        if n_clusters >= len(sentence_vectors):
            n_clusters = max(1, len(sentence_vectors) - 1)
        try:
            kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
            cluster_labels = kmeans.fit_predict(sentence_vectors)
            return cluster_labels
        except:
            return [0] * len(sentence_vectors)

    def perform_lda(self, preprocessed_sentences, num_topics=3):
        if len(preprocessed_sentences) < 3:
            return [-1] * len(preprocessed_sentences), None
        try:
            tokenized_sentences = [sent.split() for sent in preprocessed_sentences]
            tokenized_sentences = [tokens for tokens in tokenized_sentences if len(tokens) > 0]
            if len(tokenized_sentences) < 2:
                return [-1] * len(preprocessed_sentences), None
            id2word = corpora.Dictionary(tokenized_sentences)
            corpus = [id2word.doc2bow(tokens) for tokens in tokenized_sentences]
            actual_topics = min(num_topics, len(tokenized_sentences) - 1)
            if actual_topics < 1:
                return [-1] * len(preprocessed_sentences), None
            lda_model = models.LdaModel(
                corpus=corpus,
                id2word=id2word,
                num_topics=actual_topics,
                random_state=42,
                passes=5,
                alpha='auto'
            )
            sentence_topics = []
            for i, bow in enumerate(corpus):
                try:
                    topic_scores = lda_model.get_document_topics(bow)
                    if topic_scores:
                        dominant_topic = max(topic_scores, key=lambda x: x[1])[0]
                        sentence_topics.append(dominant_topic)
                    else:
                        sentence_topics.append(-1)
                except:
                    sentence_topics.append(-1)
            while len(sentence_topics) < len(preprocessed_sentences):
                sentence_topics.append(-1)
            return sentence_topics, lda_model
        except Exception as e:
            print(f"LDA Error: {e}")
            return [-1] * len(preprocessed_sentences), None

    def perform_textrank(self, sentence_vectors):
        if len(sentence_vectors) <= 1:
            return [1.0] * len(sentence_vectors)
        try:
            similarity_matrix = cosine_similarity(sentence_vectors)
            similarity_matrix = np.maximum(similarity_matrix, 0)
            nx_graph = nx.from_numpy_array(similarity_matrix)
            scores = nx.pagerank(nx_graph)
            textrank_scores = [scores[i] for i in range(len(sentence_vectors))]
            return textrank_scores
        except:
            return [1.0] * len(sentence_vectors)

    def summarize(self, original_sentences, preprocessed_sentences):
        if len(original_sentences) <= 2:
            return ' '.join(original_sentences)
        try:
            sentence_vectors = self.get_sentence_vectors(preprocessed_sentences)
            cluster_labels = self.perform_kmeans(sentence_vectors)
            topic_assignments, _ = self.perform_lda(preprocessed_sentences)
            textrank_scores = self.perform_textrank(sentence_vectors)

            if textrank_scores and max(textrank_scores) > min(textrank_scores):
                textrank_scores_norm = (textrank_scores - np.min(textrank_scores)) / \
                                     (np.max(textrank_scores) - np.min(textrank_scores))
            else:
                textrank_scores_norm = [1.0] * len(original_sentences)

            final_scores = []
            for i in range(len(original_sentences)):
                score = 0.6 * textrank_scores_norm[i]
                if i < len(topic_assignments) and topic_assignments[i] != -1:
                    score += 0.3
                final_scores.append(score)

            num_to_select = max(1, int(len(original_sentences) * self.summary_ratio))
            selected_indices = []

            cluster_best = {}
            for i, cluster_id in enumerate(cluster_labels):
                if cluster_id not in cluster_best or final_scores[i] > final_scores[cluster_best[cluster_id]]:
                    cluster_best[cluster_id] = i
            selected_indices.extend(cluster_best.values())

            remaining_indices = set(range(len(original_sentences))) - set(selected_indices)
            if remaining_indices and len(selected_indices) < num_to_select:
                remaining_list = sorted(remaining_indices, key=lambda x: final_scores[x], reverse=True)
                needed = num_to_select - len(selected_indices)
                selected_indices.extend(remaining_list[:needed])

            selected_indices.sort()
            summary_sentences = [original_sentences[i] for i in selected_indices]
            return ' '.join(summary_sentences)

        except Exception as e:
            print(f"Summarization error: {e}")
            num_fallback = max(1, int(len(original_sentences) * self.summary_ratio))
            return ' '.join(original_sentences[:num_fallback])

# Initialize the original summarizer
summarizer = RobustHybridSummarizer(summary_ratio=0.3)
print("Original hybrid summarizer initialized!")

Original hybrid summarizer initialized!


In [22]:
print("=== TESTING ORIGINAL HYBRID SUMMARIZER ===\n")

# Test on the first 3 processed samples
for i, sample in enumerate(processed_data[:3]):
    print(f"Sample {i+1}:")
    print(f"Original sentences: {sample['num_sentences']}")

    # Generate summary
    summary = summarizer.summarize(
        sample['original_sentences'],
        sample['preprocessed_sentences']
    )

    print("Generated Summary:")
    print(summary[:200] + "..." if len(summary) > 200 else summary)
    print("\nReference Summary:")
    print(sample['reference_summary'])
    print("=" * 80)



=== TESTING ORIGINAL HYBRID SUMMARIZER ===

Sample 1:
Original sentences: 22
Generated Summary:
LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on ...

Reference Summary:
Harry Potter star Daniel Radcliffe gets £20M fortune as he turns 18 Monday .
Young actor says he has no plans to fritter his cash away .
Radcliffe's earnings from first five Potter films have been held in trust fund .
Sample 2:
Original sentences: 40




Generated Summary:
Here, Soledad O'Brien takes users inside a jail where many of the inmates are mentally ill. An inmate housed on the "forgotten floor," where many mentally ill inmates are housed in Miami before trial....

Reference Summary:
Mentally ill inmates in Miami are housed on the "forgotten floor"
Judge Steven Leifman says most are there as a result of "avoidable felonies"
While CNN tours facility, patient shouts: "I am the son of the president"
Leifman says the system is unjust and he's fighting for change .
Sample 3:
Original sentences: 45
Generated Summary:
MINNEAPOLIS, Minnesota (CNN) -- Drivers who were on the Minneapolis bridge when it collapsed told harrowing tales of survival. "The whole bridge from one side of the Mississippi to the other just comp...

Reference Summary:
NEW: "I thought I was going to die," driver says .
Man says pickup truck was folded in half; he just has cut on face .
Driver: "I probably had a 30-, 35-foot free fall"
Minnesota bridge collapsed dur

In [23]:
def quick_evaluate(processed_data, summarizer, num_samples=5):
    """Quick evaluation on a few samples"""
    print("=== QUICK EVALUATION ===\n")

    for i, sample in enumerate(processed_data[:num_samples]):
        print(f"Evaluation Sample {i+1}:")

        generated = summarizer.summarize(
            sample['original_sentences'],
            sample['preprocessed_sentences']
        )

        print(f"Original sentences: {sample['num_sentences']}")
        print(f"Generated length: {len(generated)} chars")
        print(f"Reference length: {len(sample['reference_summary'])} chars")
        print("---")

    print("Evaluation completed!")

# Run quick evaluation
quick_evaluate(processed_data, summarizer)



=== QUICK EVALUATION ===

Evaluation Sample 1:
Original sentences: 22
Generated length: 1029 chars
Reference length: 217 chars
---
Evaluation Sample 2:
Original sentences: 40
Generated length: 1596 chars
Reference length: 281 chars
---
Evaluation Sample 3:




Original sentences: 45
Generated length: 1348 chars
Reference length: 224 chars
---
Evaluation Sample 4:
Original sentences: 23
Generated length: 697 chars
Reference length: 185 chars
---
Evaluation Sample 5:
Original sentences: 45
Generated length: 2005 chars
Reference length: 273 chars
---
Evaluation completed!


In [24]:
from rouge_score import rouge_scorer
import numpy as np

def evaluate_with_rouge(processed_data, summarizer, num_samples=10):
    """Evaluate the summarizer using ROUGE metrics"""
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

    rouge1_scores = []
    rouge2_scores = []
    rougeL_scores = []

    print("=== COMPREHENSIVE ROUGE EVALUATION ===\n")

    for i, sample in enumerate(processed_data[:num_samples]):
        if i % 5 == 0:
            print(f"Processing sample {i+1}...")

        generated_summary = summarizer.summarize(
            sample['original_sentences'],
            sample['preprocessed_sentences']
        )

        reference_summary = sample['reference_summary']
        scores = scorer.score(reference_summary, generated_summary)

        rouge1_scores.append(scores['rouge1'].fmeasure)
        rouge2_scores.append(scores['rouge2'].fmeasure)
        rougeL_scores.append(scores['rougeL'].fmeasure)

    avg_rouge1 = np.mean(rouge1_scores)
    avg_rouge2 = np.mean(rouge2_scores)
    avg_rougeL = np.mean(rougeL_scores)

    print("\n=== FINAL RESULTS ===")
    print(f"ROUGE-1 F1 Score: {avg_rouge1:.4f}")
    print(f"ROUGE-2 F1 Score: {avg_rouge2:.4f}")
    print(f"ROUGE-L F1 Score: {avg_rougeL:.4f}")
    print(f"Evaluated on {len(rouge1_scores)} samples")

    return {
        'rouge1': avg_rouge1,
        'rouge2': avg_rouge2,
        'rougeL': avg_rougeL,
        'all_scores': {
            'rouge1': rouge1_scores,
            'rouge2': rouge2_scores,
            'rougeL': rougeL_scores
        }
    }

# Evaluate original model
print("Evaluating original model with ROUGE...")
original_results = evaluate_with_rouge(processed_data, summarizer, num_samples=10)



Evaluating original model with ROUGE...
=== COMPREHENSIVE ROUGE EVALUATION ===

Processing sample 1...




Processing sample 6...





=== FINAL RESULTS ===
ROUGE-1 F1 Score: 0.2034
ROUGE-2 F1 Score: 0.0724
ROUGE-L F1 Score: 0.1348
Evaluated on 10 samples


In [25]:
class ImprovedHybridSummarizer:
    def __init__(self, summary_ratio=0.25):
        self.summary_ratio = summary_ratio
        self.vectorizer = TfidfVectorizer(max_features=3000, min_df=1, max_df=0.85, ngram_range=(1, 2))

    def get_sentence_vectors(self, preprocessed_sentences):
        if len(preprocessed_sentences) < 2:
            return np.ones((len(preprocessed_sentences), 10))
        try:
            vectors = self.vectorizer.fit_transform(preprocessed_sentences).toarray()
            return vectors
        except:
            return np.random.rand(len(preprocessed_sentences), 10)

    def perform_kmeans(self, sentence_vectors, n_clusters=None):
        if len(sentence_vectors) < 2:
            return [0] * len(sentence_vectors)
        if n_clusters is None:
            n_clusters = max(2, min(8, int(len(sentence_vectors) ** 0.7)))
        if n_clusters >= len(sentence_vectors):
            n_clusters = max(1, len(sentence_vectors) - 1)
        try:
            kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=20)
            cluster_labels = kmeans.fit_predict(sentence_vectors)
            return cluster_labels
        except:
            return [0] * len(sentence_vectors)

    def perform_lda(self, preprocessed_sentences, num_topics=4):
        if len(preprocessed_sentences) < 4:
            return [-1] * len(preprocessed_sentences), None
        try:
            tokenized_sentences = [sent.split() for sent in preprocessed_sentences]
            tokenized_sentences = [tokens for tokens in tokenized_sentences if len(tokens) > 2]
            if len(tokenized_sentences) < 3:
                return [-1] * len(preprocessed_sentences), None
            id2word = corpora.Dictionary(tokenized_sentences)
            corpus = [id2word.doc2bow(tokens) for tokens in tokenized_sentences]
            actual_topics = min(num_topics, len(tokenized_sentences) - 1)
            if actual_topics < 1:
                return [-1] * len(preprocessed_sentences), None
            lda_model = models.LdaModel(
                corpus=corpus,
                id2word=id2word,
                num_topics=actual_topics,
                random_state=42,
                passes=15,
                alpha='auto',
                per_word_topics=True
            )
            sentence_topics = []
            for bow in corpus:
                try:
                    topic_scores = lda_model.get_document_topics(bow)
                    if topic_scores:
                        dominant_topic = max(topic_scores, key=lambda x: x[1])
                        if dominant_topic[1] > 0.3:
                            sentence_topics.append(dominant_topic[0])
                        else:
                            sentence_topics.append(-1)
                    else:
                        sentence_topics.append(-1)
                except:
                    sentence_topics.append(-1)
            while len(sentence_topics) < len(preprocessed_sentences):
                sentence_topics.append(-1)
            return sentence_topics, lda_model
        except Exception as e:
            print(f"LDA Error: {e}")
            return [-1] * len(preprocessed_sentences), None

    def perform_textrank(self, sentence_vectors):
        if len(sentence_vectors) <= 1:
            return [1.0] * len(sentence_vectors)
        try:
            similarity_matrix = cosine_similarity(sentence_vectors)
            similarity_matrix = np.maximum(similarity_matrix, 0)
            nx_graph = nx.from_numpy_array(similarity_matrix)
            scores = nx.pagerank(nx_graph, alpha=0.85)
            textrank_scores = [scores[i] for i in range(len(sentence_vectors))]
            return textrank_scores
        except:
            return [1.0] * len(sentence_vectors)

    def summarize(self, original_sentences, preprocessed_sentences):
        if len(original_sentences) <= 2:
            return ' '.join(original_sentences[:1])
        try:
            sentence_vectors = self.get_sentence_vectors(preprocessed_sentences)
            cluster_labels = self.perform_kmeans(sentence_vectors)
            topic_assignments, _ = self.perform_lda(preprocessed_sentences)
            textrank_scores = self.perform_textrank(sentence_vectors)

            if textrank_scores and max(textrank_scores) > min(textrank_scores):
                textrank_scores_norm = (textrank_scores - np.min(textrank_scores)) / \
                                     (np.max(textrank_scores) - np.min(textrank_scores))
            else:
                textrank_scores_norm = [0.5] * len(original_sentences)

            final_scores = []
            for i in range(len(original_sentences)):
                base_score = 0.5 * textrank_scores_norm[i]
                position_bias = max(0, 1 - (i / len(original_sentences))) * 0.2
                base_score += position_bias
                if i < len(topic_assignments) and topic_assignments[i] != -1:
                    base_score += 0.3
                sentence_len = len(original_sentences[i].split())
                if 8 <= sentence_len <= 25:
                    base_score += 0.1
                final_scores.append(base_score)

            num_to_select = max(1, min(5, int(len(original_sentences) * self.summary_ratio)))
            selected_indices = []

            cluster_best = {}
            for i, cluster_id in enumerate(cluster_labels):
                if cluster_id not in cluster_best or final_scores[i] > final_scores[cluster_best[cluster_id]]:
                    cluster_best[cluster_id] = i
            selected_indices.extend(cluster_best.values())

            remaining_indices = set(range(len(original_sentences))) - set(selected_indices)
            if remaining_indices and len(selected_indices) < num_to_select:
                remaining_list = sorted(remaining_indices, key=lambda x: final_scores[x], reverse=True)
                needed = num_to_select - len(selected_indices)
                selected_indices.extend(remaining_list[:needed])

            if not selected_indices:
                selected_indices = [np.argmax(final_scores)] if final_scores else [0]

            selected_indices.sort()
            summary_sentences = [original_sentences[i] for i in selected_indices]
            return ' '.join(summary_sentences)

        except Exception as e:
            print(f"Summarization error: {e}")
            return original_sentences[0] if original_sentences else ""

# Initialize improved summarizer
improved_summarizer = ImprovedHybridSummarizer(summary_ratio=0.25)
print("Improved hybrid summarizer initialized!")

Improved hybrid summarizer initialized!


In [26]:
print("=== COMPARING ORIGINAL vs IMPROVED MODEL ===\n")

# Test on same samples for comparison
for i, sample in enumerate(processed_data[:2]):
    print(f"\nSample {i+1}:")
    print(f"Original sentences: {sample['num_sentences']}")

    # Original model
    original_summary = summarizer.summarize(
        sample['original_sentences'],
        sample['preprocessed_sentences']
    )

    # Improved model
    improved_summary = improved_summarizer.summarize(
        sample['original_sentences'],
        sample['preprocessed_sentences']
    )

    print("\n--- ORIGINAL MODEL SUMMARY ---")
    print(original_summary[:300] + "..." if len(original_summary) > 300 else original_summary)

    print("\n--- IMPROVED MODEL SUMMARY ---")
    print(improved_summary[:300] + "..." if len(improved_summary) > 300 else improved_summary)

    print("\n--- REFERENCE SUMMARY ---")
    print(sample['reference_summary'])
    print("=" * 80)

# Evaluate improved model
print("\nEvaluating improved model...")
improved_results = evaluate_with_rouge(processed_data, improved_summarizer, num_samples=10)



=== COMPARING ORIGINAL vs IMPROVED MODEL ===


Sample 1:
Original sentences: 22





--- ORIGINAL MODEL SUMMARY ---
LONDON, England (Reuters) -- Harry Potter star Daniel Radcliffe gains access to a reported £20 million ($41.1 million) fortune as he turns 18 on Monday, but he insists the money won't cast a spell on him. Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappoi...

--- IMPROVED MODEL SUMMARY ---
Daniel Radcliffe as Harry Potter in "Harry Potter and the Order of the Phoenix" To the disappointment of gossip columnists around the world, the young actor says he has no plans to fritter his cash away on fast cars, drink and celebrity parties. "I don't plan to be one of those people who, as soon a...

--- REFERENCE SUMMARY ---
Harry Potter star Daniel Radcliffe gets £20M fortune as he turns 18 Monday .
Young actor says he has no plans to fritter his cash away .
Radcliffe's earnings from first five Potter films have been held in trust fund .

Sample 2:
Original sentences: 40

--- ORIGINAL MODEL SUMMARY ---
Here, Soledad O'B

In [27]:
def create_final_report(processed_data, original_summarizer, improved_summarizer, num_samples=15):
    print("=" * 80)
    print("FINAL HYBRID SUMMARIZATION MODEL EVALUATION REPORT")
    print("=" * 80)

    print("\n1. ROUGE SCORE COMPARISON")
    print("-" * 40)

    original_results = evaluate_with_rouge(processed_data, original_summarizer, num_samples)
    improved_results = evaluate_with_rouge(processed_data, improved_summarizer, num_samples)

    print("\n2. SAMPLE OUTPUT ANALYSIS")
    print("-" * 40)

    for i, sample in enumerate(processed_data[:2]):
        print(f"\nSample {i+1} Analysis:")
        print(f"Original article sentences: {sample['num_sentences']}")

        original_summary = original_summarizer.summarize(
            sample['original_sentences'], sample['preprocessed_sentences'])
        improved_summary = improved_summarizer.summarize(
            sample['original_sentences'], sample['preprocessed_sentences'])

        print(f"Original model summary length: {len(original_summary)} chars")
        print(f"Improved model summary length: {len(improved_summary)} chars")
        print(f"Reference summary length: {len(sample['reference_summary'])} chars")

        original_compression = len(original_summary) / len(' '.join(sample['original_sentences']))
        improved_compression = len(improved_summary) / len(' '.join(sample['original_sentences']))

        print(f"Original model compression: {original_compression:.2%}")
        print(f"Improved model compression: {improved_compression:.2%}")

    print("\n3. MODEL COMPARISON SUMMARY")
    print("-" * 40)
    print(f"{'Metric':<15} {'Original':<10} {'Improved':<10} {'Change':<10}")
    print(f"{'ROUGE-1':<15} {original_results['rouge1']:.4f}    {improved_results['rouge1']:.4f}    {improved_results['rouge1'] - original_results['rouge1']:+.4f}")
    print(f"{'ROUGE-2':<15} {original_results['rouge2']:.4f}    {improved_results['rouge2']:.4f}    {improved_results['rouge2'] - original_results['rouge2']:+.4f}")
    print(f"{'ROUGE-L':<15} {original_results['rougeL']:.4f}    {improved_results['rougeL']:.4f}    {improved_results['rougeL'] - original_results['rougeL']:+.4f}")

    return {
        'original': original_results,
        'improved': improved_results
    }

# Generate final report
final_report = create_final_report(processed_data, summarizer, improved_summarizer)



FINAL HYBRID SUMMARIZATION MODEL EVALUATION REPORT

1. ROUGE SCORE COMPARISON
----------------------------------------
=== COMPREHENSIVE ROUGE EVALUATION ===

Processing sample 1...




Processing sample 6...




Processing sample 11...





=== FINAL RESULTS ===
ROUGE-1 F1 Score: 0.2141
ROUGE-2 F1 Score: 0.0712
ROUGE-L F1 Score: 0.1399
Evaluated on 15 samples
=== COMPREHENSIVE ROUGE EVALUATION ===

Processing sample 1...
Processing sample 6...
Processing sample 11...





=== FINAL RESULTS ===
ROUGE-1 F1 Score: 0.2295
ROUGE-2 F1 Score: 0.0795
ROUGE-L F1 Score: 0.1424
Evaluated on 15 samples

2. SAMPLE OUTPUT ANALYSIS
----------------------------------------

Sample 1 Analysis:
Original article sentences: 22




Original model summary length: 1029 chars
Improved model summary length: 996 chars
Reference summary length: 217 chars
Original model compression: 41.48%
Improved model compression: 40.15%

Sample 2 Analysis:
Original article sentences: 40
Original model summary length: 1596 chars
Improved model summary length: 912 chars
Reference summary length: 281 chars
Original model compression: 40.43%
Improved model compression: 23.10%

3. MODEL COMPARISON SUMMARY
----------------------------------------
Metric          Original   Improved   Change    
ROUGE-1         0.2141    0.2295    +0.0154
ROUGE-2         0.0712    0.0795    +0.0083
ROUGE-L         0.1399    0.1424    +0.0025


In [28]:
import pickle
import json
from datetime import datetime

def save_model_and_results(summarizer, processed_data, results, model_name="hybrid_summarizer"):
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

    # Save model
    model_filename = f"{model_name}_{timestamp}.pkl"
    with open(model_filename, 'wb') as f:
        pickle.dump(summarizer, f)

    # Save results
    results_filename = f"evaluation_results_{timestamp}.json"
    with open(results_filename, 'w') as f:
        json.dump({
            'timestamp': timestamp,
            'model_name': model_name,
            'results': results,
            'dataset_info': {
                'samples_processed': len(processed_data),
                'average_sentences': np.mean([s['num_sentences'] for s in processed_data])
            }
        }, f, indent=2)

    print(f"Model saved as: {model_filename}")
    print(f"Results saved as: {results_filename}")

    return model_filename, results_filename

# Save the improved model
model_file, results_file = save_model_and_results(
    improved_summarizer,
    processed_data,
    final_report['improved'],
    "improved_hybrid_summarizer"
)

print("\n=== PROJECT COMPLETED SUCCESSFULLY ===")

Model saved as: improved_hybrid_summarizer_20251019_071511.pkl
Results saved as: evaluation_results_20251019_071511.json

=== PROJECT COMPLETED SUCCESSFULLY ===


In [29]:
from google.colab import files
import pickle
import json

# 1. Download your trained model
with open('improved_hybrid_summarizer.pkl', 'wb') as f:
    pickle.dump(improved_summarizer, f)
files.download('improved_hybrid_summarizer.pkl')

# 2. Download evaluation results
results = {
    "rouge1": 0.2295,
    "rouge2": 0.0795,
    "rougeL": 0.1424,
    "dataset_info": {
        "samples_processed": 50,
        "average_sentences": 30.26
    }
}
with open('evaluation_results.json', 'w') as f:
    json.dump(results, f, indent=2)
files.download('evaluation_results.json')

# 3. Download your Colab notebook
# Go to File → Download → Download .ipynb

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>