# Merged Notebook: Text Summarization, Paraphrasing, and Comparative Analysis

This notebook combines text summarization, paraphrasing, and a comparative analysis of different transformer models. It is a merger of two separate notebooks, streamlined for clarity and efficiency.

## Section 1: Session Setup & Library Installation

In [None]:
#@title 1.1: Install Required Libraries
!pip install transformers sentencepiece sentence-transformers scikit-learn matplotlib pandas --quiet
print("✅ All required libraries installed.")

### 1.2: Import Libraries

In [None]:
#@title 1.2: Import All Necessary Libraries
# Core NLP libraries from Hugging Face
from transformers import T5ForConditionalGeneration, T5Tokenizer
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from sentence_transformers import SentenceTransformer

# For data manipulation and analysis
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity
from collections import Counter

# For visualization
import matplotlib.pyplot as plt

# Standard Python libraries
import textwrap
import re
import requests
import ssl

# Handle SSL for file downloads if necessary
try:
    _create_unverified_https_context = ssl._create_unverified_context
except AttributeError:
    pass
else:
    ssl._create_default_https_context = _create_unverified_https_context

print("✅ All libraries imported successfully.")

## Section 2: Model Loading
Here, we load all the pre-trained models we'll need for summarization, paraphrasing, and similarity analysis.

In [None]:
#@title 2.1: Load All Models and Tokenizers
print("⏳ Loading Summarization models...")
t5_model = T5ForConditionalGeneration.from_pretrained('t5-base')
t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')

bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-base')
bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-base')

pegasus_sum_model = PegasusForConditionalGeneration.from_pretrained('google/pegasus-xsum')
pegasus_sum_tokenizer = PegasusTokenizer.from_pretrained('google/pegasus-xsum')
print("✅ Summarization models loaded: T5, BART, PEGASUS.")

print("\n⏳ Loading Paraphrasing models...")
pegasus_para_model = PegasusForConditionalGeneration.from_pretrained('tuner007/pegasus_paraphrase')
pegasus_para_tokenizer = PegasusTokenizer.from_pretrained('tuner007/pegasus_paraphrase')

paraphrase_t5_model = T5ForConditionalGeneration.from_pretrained('Vamsi/T5_Paraphrase_Paws')
paraphrase_t5_tokenizer = T5Tokenizer.from_pretrained('Vamsi/T5_Paraphrase_Paws')

paraphrase_bart_model = BartForConditionalGeneration.from_pretrained('eugenesiow/bart-paraphrase')
paraphrase_bart_tokenizer = BartTokenizer.from_pretrained('eugenesiow/bart-paraphrase')
print("✅ Paraphrasing models loaded: PEGASUS, T5-Paraphrase, BART-Paraphrase.")

print("\n⏳ Loading Similarity model...")
similarity_model = SentenceTransformer('all-MiniLM-L6-v2')
print("✅ Similarity model loaded.")

## Section 3: Core Functions (Our NLP Toolbox)
This section contains all the core functions for performing summarization, paraphrasing, and calculating similarity.

In [None]:
#@title 3.1: Summarization Functions

def generate_t5_summary(text, min_len=40, max_len=150, beams=4):
    """Abstractive summarization with T5."""
    # T5 models require a prefix to know which task to perform.
    input_text = "summarize: " + text.strip().replace("\n", " ")
    inputs = t5_tokenizer.encode(input_text, return_tensors='pt', max_length=1024, truncation=True)
    summary_ids = t5_model.generate(
        inputs, max_length=max_len, min_length=min_len, num_beams=beams,
        no_repeat_ngram_size=3, length_penalty=2.0, early_stopping=True
    )
    return t5_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

def generate_bart_summary(text, min_len=40, max_len=150, beams=4):
    """Abstractive summarization with BART."""
    inputs = bart_tokenizer.encode(text, return_tensors='pt', max_length=1024, truncation=True)
    summary_ids = bart_model.generate(
        inputs, max_length=max_len, min_length=min_len, num_beams=beams,
        no_repeat_ngram_size=3, length_penalty=2.0, early_stopping=True
    )
    return bart_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

def generate_pegasus_summary(text, min_len=40, max_len=150, beams=4):
    """Abstractive summarization with PEGASUS."""
    inputs = pegasus_sum_tokenizer.encode(text, return_tensors='pt', max_length=1024, truncation=True)
    summary_ids = pegasus_sum_model.generate(
        inputs, max_length=max_len, min_length=min_len, num_beams=beams,
        no_repeat_ngram_size=3, length_penalty=2.0, early_stopping=True
    )
    return pegasus_sum_tokenizer.decode(summary_ids[0], skip_special_tokens=True)

In [None]:
#@title 3.2: Paraphrasing Functions

def generate_pegasus_paraphrase(text, num_return=3, beams=10):
    """Generate paraphrases with PEGASUS."""
    inputs = pegasus_para_tokenizer.encode(text, return_tensors='pt', truncation=True, max_length=60)
    paraphrase_ids = pegasus_para_model.generate(
        inputs, max_length=60, num_beams=beams,
        num_return_sequences=num_return, early_stopping=True
    )
    return pegasus_para_tokenizer.batch_decode(paraphrase_ids, skip_special_tokens=True)

def generate_t5_paraphrase(text, num_return=3, beams=10):
    """Generate paraphrases with T5-Paraphrase."""
    input_text = f"paraphrase: {text} </s>"
    inputs = paraphrase_t5_tokenizer.encode(input_text, return_tensors='pt', max_length=512, truncation=True)
    paraphrase_ids = paraphrase_t5_model.generate(
        inputs, max_length=60, num_beams=beams,
        num_return_sequences=num_return, early_stopping=True
    )
    return paraphrase_t5_tokenizer.batch_decode(paraphrase_ids, skip_special_tokens=True)

def generate_bart_paraphrase(text, num_return=3, beams=10):
    """Generate paraphrases with BART-Paraphrase."""
    inputs = paraphrase_bart_tokenizer.encode(text, return_tensors='pt', truncation=True, max_length=60)
    paraphrase_ids = paraphrase_bart_model.generate(
        inputs, max_length=60, num_beams=beams,
        num_return_sequences=num_return, early_stopping=True
    )
    return paraphrase_bart_tokenizer.batch_decode(paraphrase_ids, skip_special_tokens=True)

In [None]:
#@title 3.3: Helper & Analysis Functions

def compute_similarity(text1, text2):
    """Compute cosine similarity between texts."""
    embeddings = similarity_model.encode([text1, text2])
    return cosine_similarity([embeddings[0]], [embeddings[1]])[0][0]

def clean_text(text):
    """Remove special characters and normalize whitespace."""
    text = re.sub(r'[^\x00-\x7F]+', ' ', text)  # Remove non-ASCII characters
    text = re.sub(r'\s+', ' ', text)  # Normalize whitespace
    return text.strip()

## Section 4: Task 1 - Text Summarization
Let's start by using the T5 model to summarize different types of text.

In [None]:
#@title 4.1: Example 1 - Technical Text (Quantum Computing)
technical_text = """
Quantum computing is a revolutionary type of computation that harnesses the collective properties of quantum states,
such as superposition and entanglement, to perform calculations. While classical computers use bits that can be either
a 0 or a 1, a quantum computer uses qubits, which can be a 0, a 1, or both at the same time. This fundamental difference
allows quantum computers to solve complex problems that are intractable for even the most powerful classical supercomputers,
with potential applications in cryptography, materials science, and drug discovery.
"""

summary = generate_t5_summary(technical_text, min_len=25, max_len=50)

print("----------- TECHNICAL TEXT -----------")
print(textwrap.fill(technical_text, width=100))
print("\n✨---------- T5 SUMMARY -----------✨")
print(textwrap.fill(summary, width=100))

In [None]:
#@title 4.2: Example 2 - Business Text (Market Analysis)
business_text = """
The global market for renewable energy is projected to experience robust growth over the next decade. Key drivers include
increasing government incentives for clean energy, declining costs of solar and wind technologies, and growing consumer
awareness regarding climate change. However, challenges remain, such as the intermittency of renewable sources and the
need for significant grid infrastructure upgrades. Companies that can innovate in energy storage solutions and grid
management are best positioned to capitalize on this market trend.
"""

summary = generate_t5_summary(business_text, min_len=30, max_len=90)

print("----------- BUSINESS TEXT -----------")
print(textwrap.fill(business_text, width=100))
print("\n✨---------- T5 SUMMARY -----------✨")
print(textwrap.fill(summary, width=100))

In [None]:
#@title 4.3: Example 3 - Creative Text (Literary Description)
creative_text = """
The ancient library was a labyrinth of shadows and whispered knowledge. Sunlight struggled through a high,
arched window, illuminating motes of dust that danced like tiny sprites in the golden shafts of light.
The air smelled of aging paper, leather, and a faint, sweet hint of vanilla. Every towering bookshelf
was a gateway to another world, each leather-bound volume a silent promise of adventure, history,
or forgotten magic. It was a place where time itself seemed to slow down, holding its breath in reverence
for the stories it contained.
"""

summary = generate_t5_summary(creative_text, min_len=20, max_len=70)

print("----------- CREATIVE TEXT -----------")
print(textwrap.fill(creative_text, width=100))
print("\n✨---------- T5 SUMMARY -----------✨")
print(textwrap.fill(summary, width=100))

### 4.4: Interactive Summarizer Studio

In [None]:
#@title Your Interactive T5 Summarizer Studio! ⚡️
#@markdown ### 👈 Paste your text below and tune the parameters!
input_text = 'The James Webb Space Telescope (JWST) is a space telescope designed primarily to conduct infrared astronomy. As the largest optical telescope in space, its high resolution and sensitivity allow it to view objects too old, distant, or faint for the Hubble Space Telescope. This has enabled investigations in many fields of astronomy and cosmology, such as observation of the first stars, the formation of the first galaxies, and detailed atmospheric characterization of potentially habitable exoplanets. The U.S. National Aeronautics and Space Administration (NASA) led JWST\'s development in collaboration with the European Space Agency (ESA) and the Canadian Space Agency (CSA).' #@param {type:"string"}
min_length = 45 #@param {type:"slider", min:10, max:100, step:5}
max_length = 140 #@param {type:"slider", min:50, max:200, step:10}
num_beams = 5 #@param {type:"slider", min:2, max:8, step:1}

# --- Run the summarizer with your settings ---
generated_summary = generate_t5_summary(input_text, min_len=min_length, max_len=max_length, beams=num_beams)

# --- Display the results and analysis ---
original_word_count = len(input_text.split())
summary_word_count = len(generated_summary.split())
reduction = 100 - (summary_word_count / original_word_count * 100)

print("----------- YOUR INPUT TEXT -----------")
print(textwrap.fill(input_text, width=100))
print("\n✨---------- GENERATED SUMMARY -----------✨")
print(textwrap.fill(generated_summary, width=100))
print("\n📊---------- ANALYSIS -----------📊")
print(f"Original Word Count: {original_word_count}")
print(f"Summary Word Count: {summary_word_count}")
print(f"Text Reduction: {reduction:.1f}%")

## Section 5: Task 2 - Text Paraphrasing
Now we'll use the PEGASUS model to generate alternative phrasings for a given sentence.

In [None]:
#@title 5.1: Example 1 - Formal / Academic Sentence
formal_sentence = "The empirical data indicates a statistically significant correlation between the two variables."
paraphrases = generate_pegasus_paraphrase(formal_sentence, num_return=4)

print(f"----------- ORIGINAL FORMAL SENTENCE -----------\n'{formal_sentence}'\n")
print("🤖---------- PEGASUS PARAPHRASES ----------🤖")
for i, p in enumerate(paraphrases):
    print(f"  {i+1}. {p}")

In [None]:
#@title 5.2: Example 2 - Casual / Idiomatic Sentence
casual_sentence = "To be honest, that new project is a real pain in the neck."
paraphrases = generate_pegasus_paraphrase(casual_sentence, num_return=4)

print(f"----------- ORIGINAL CASUAL SENTENCE -----------\n'{casual_sentence}'\n")
print("🤖---------- PEGASUS PARAPHRASES ----------🤖")
for i, p in enumerate(paraphrases):
    print(f"  {i+1}. {p}")

In [None]:
#@title 5.3: Example 3 - Marketing Call-to-Action
marketing_sentence = "Don't miss out on our exclusive offer – shop now to save 50%!"
paraphrases = generate_pegasus_paraphrase(marketing_sentence, num_return=4)

print(f"----------- ORIGINAL MARKETING SENTENCE -----------\n'{marketing_sentence}'\n")
print("🤖---------- PEGASUS PARAPHRASES ----------🤖")
for i, p in enumerate(paraphrases):
    print(f"  {i+1}. {p}")

### 5.4: Interactive Paraphraser Playground

In [None]:
#@title Your Interactive Paraphraser Playground! ⚡️
#@markdown ### 👈 Type your sentence and choose your settings! (Uses PEGASUS)
input_sentence = "Learning new skills is essential for career growth." #@param {type:"string"}
num_paraphrases = 5 #@param {type:"slider", min:1, max:10, step:1}
quality_vs_speed_beams = 9 #@param {type:"slider", min:2, max:15, step:1}

# --- Run the paraphraser with your settings ---
generated_paraphrases = generate_pegasus_paraphrase(input_sentence, num_return=num_paraphrases, beams=quality_vs_speed_beams)

# --- Display the results ---
print(f"----------- ORIGINAL SENTENCE -----------\n'{input_sentence}'\n")
print(f"🤖---------- {len(generated_paraphrases)} GENERATED PARAPHRASES (Quality: {quality_vs_speed_beams}) ----------🤖")
for i, p in enumerate(generated_paraphrases):
    print(f"  {i+1}. {p}")

## Section 6: Task 3 - Comparative Model Analysis
Now, let's compare the performance of different models (T5, BART, PEGASUS) on both summarization and paraphrasing tasks.

In [None]:
#@title 6.1: Define Enhanced Comparison Functions
def compare_summarizers_enhanced(text, min_len=40, max_len=150, beams=4):
    """Enhanced comparison of T5, BART, and PEGASUS for summarization."""
    t5_sum = generate_t5_summary(text[:2000], min_len, max_len, beams)
    bart_sum = generate_bart_summary(text[:2000], min_len, max_len, beams)
    pegasus_sum = generate_pegasus_summary(text[:2000], min_len, max_len, beams)

    metrics = {
        't5': {
            'summary': t5_sum,
            'length': len(t5_sum.split()),
            'sim_to_original': compute_similarity(t5_sum, text[:2000])
        },
        'bart': {
            'summary': bart_sum,
            'length': len(bart_sum.split()),
            'sim_to_original': compute_similarity(bart_sum, text[:2000])
        },
        'pegasus': {
            'summary': pegasus_sum,
            'length': len(pegasus_sum.split()),
            'sim_to_original': compute_similarity(pegasus_sum, text[:2000])
        }
    }
    print("----------- Enhanced Summarization Comparison -----------")
    print(f"T5 Summary (Length: {metrics['t5']['length']}, Sim to Original: {metrics['t5']['sim_to_original']:.3f}):")
    print(textwrap.fill(t5_sum, width=100))
    print(f"\nBART Summary (Length: {metrics['bart']['length']}, Sim to Original: {metrics['bart']['sim_to_original']:.3f}):")
    print(textwrap.fill(bart_sum, width=100))
    print(f"\nPEGASUS Summary (Length: {metrics['pegasus']['length']}, Sim to Original: {metrics['pegasus']['sim_to_original']:.3f}):")
    print(textwrap.fill(pegasus_sum, width=100))
    return metrics

def compare_paraphrasers_enhanced(text, num_return=3, beams=10):
    """Enhanced comparison of PEGASUS, T5-Paraphrase, and BART-Paraphrase."""
    peg_paras = generate_pegasus_paraphrase(text, num_return, beams)
    t5_paras = generate_t5_paraphrase(text, num_return, beams)
    bart_paras = generate_bart_paraphrase(text, num_return, beams)

    metrics = {
        'pegasus': {
            'paraphrases': peg_paras,
            'avg_length': sum(len(p.split()) for p in peg_paras) / len(peg_paras),
            'avg_sim_to_original': sum(compute_similarity(p, text) for p in peg_paras) / len(peg_paras)
        },
        't5_paraphrase': {
            'paraphrases': t5_paras,
            'avg_length': sum(len(p.split()) for p in t5_paras) / len(t5_paras),
            'avg_sim_to_original': sum(compute_similarity(p, text) for p in t5_paras) / len(t5_paras)
        },
        'bart_paraphrase': {
            'paraphrases': bart_paras,
            'avg_length': sum(len(p.split()) for p in bart_paras) / len(bart_paras),
            'avg_sim_to_original': sum(compute_similarity(p, text) for p in bart_paras) / len(bart_paras)
        }
    }
    print("----------- Enhanced Paraphrasing Comparison -----------")
    print(f"PEGASUS Paraphrases (Avg Length: {metrics['pegasus']['avg_length']:.1f}, Avg Sim to Original: {metrics['pegasus']['avg_sim_to_original']:.3f}):")
    for i, p in enumerate(peg_paras, 1):
        print(f"  {i}. {p}")
    print(f"\nT5-Paraphrase Paraphrases (Avg Length: {metrics['t5_paraphrase']['avg_length']:.1f}, Avg Sim to Original: {metrics['t5_paraphrase']['avg_sim_to_original']:.3f}):")
    for i, p in enumerate(t5_paras, 1):
        print(f"  {i}. {p}")
    print(f"\nBART-Paraphrase Paraphrases (Avg Length: {metrics['bart_paraphrase']['avg_length']:.1f}, Avg Sim to Original: {metrics['bart_paraphrase']['avg_sim_to_original']:.3f}):")
    for i, p in enumerate(bart_paras, 1):
        print(f"  {i}. {p}")
    return metrics

In [None]:
#@title 6.2: Load Texts for Analysis from URLs
def load_text_from_url(url):
    """Load and clean text from a URL."""
    try:
        response = requests.get(url, timeout=30)
        response.raise_for_status()
        text = response.text
        # Simple cleaning for Gutenberg texts
        start_idx = text.find("*** START OF")
        end_idx = text.find("*** END OF")
        if start_idx != -1 and end_idx != -1:
            text = text[start_idx:end_idx]
        return text.strip()
    except Exception as e:
        print(f"Error loading text from {url}: {e}")
        return ""

file_urls = {
    "Frankenstein": "https://www.gutenberg.org/files/84/84-0.txt",
    "Pride and Prejudice": "https://www.gutenberg.org/files/1342/1342-0.txt"
}

loaded_texts = {}
for name, url in file_urls.items():
    print(f"⏳ Loading '{name}'...")
    loaded_texts[name] = load_text_from_url(url)
    print(f"✅ Loaded '{name}' ({len(loaded_texts[name])} characters).")

frankenstein_text = loaded_texts["Frankenstein"]
pride_prejudice_text = loaded_texts["Pride and Prejudice"]

In [None]:
#@title 6.3: Run Summarization Comparison on 'Frankenstein'
print("----------- Summarization Comparison on 'Frankenstein'-----------\n")
comparison_frankenstein = compare_summarizers_enhanced(frankenstein_text)

In [None]:
#@title 6.4: Run Summarization Comparison on 'Pride and Prejudice'
print("----------- Summarization Comparison on 'Pride and Prejudice' -----------\n")
comparison_pride = compare_summarizers_enhanced(pride_prejudice_text)

### 6.5: Visualize Comparison Metrics

In [None]:
#@title 6.5: Define Visualization Functions and Plot Results

def plot_summarization_comparison(metrics, title="Summarization Comparison"):
    """Plot enhanced metrics for summarization models."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    models = ['T5', 'BART', 'PEGASUS']
    
    # Length
    lengths = [metrics['t5']['length'], metrics['bart']['length'], metrics['pegasus']['length']]
    ax1.bar(models, lengths, color=['#ff9999','#66b3ff','#99ff99'])
    ax1.set_title('Summary Length (Word Count)')
    ax1.set_ylabel('Words')

    # Similarity to Original
    similarities = [metrics['t5']['sim_to_original'], metrics['bart']['sim_to_original'], metrics['pegasus']['sim_to_original']]
    ax2.bar(models, similarities, color=['#ffcc99','#c2c2f0','#ffb3e6'])
    ax2.set_title('Semantic Similarity to Original Text')
    ax2.set_ylabel('Cosine Similarity')
    ax2.set_ylim(0, 1)

    plt.suptitle(title, fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

def plot_paraphrasing_comparison(metrics, title="Paraphrasing Comparison"):
    """Plot enhanced metrics for paraphrasing models."""
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 5))
    models = ['PEGASUS', 'T5', 'BART']
    
    # Average Length
    avg_lengths = [metrics['pegasus']['avg_length'], metrics['t5_paraphrase']['avg_length'], metrics['bart_paraphrase']['avg_length']]
    ax1.bar(models, avg_lengths, color=['#ff9999','#66b3ff','#99ff99'])
    ax1.set_title('Average Paraphrase Length (Words)')
    ax1.set_ylabel('Words')

    # Average Similarity to Original
    avg_sims = [metrics['pegasus']['avg_sim_to_original'], metrics['t5_paraphrase']['avg_sim_to_original'], metrics['bart_paraphrase']['avg_sim_to_original']]
    ax2.bar(models, avg_sims, color=['#ffcc99','#c2c2f0','#ffb3e6'])
    ax2.set_title('Average Similarity to Original Sentence')
    ax2.set_ylabel('Cosine Similarity')
    ax2.set_ylim(0, 1)
    
    plt.suptitle(title, fontsize=16)
    plt.tight_layout(rect=[0, 0, 1, 0.95])
    plt.show()

# Plot the summarization results
plot_summarization_comparison(comparison_frankenstein, title="Summarization Comparison (Frankenstein)")
plot_summarization_comparison(comparison_pride, title="Summarization Comparison (Pride and Prejudice)")

# Run and plot a paraphrasing comparison
paraphrase_test_sentence = "It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife."
paraphrase_metrics = compare_paraphrasers_enhanced(paraphrase_test_sentence)
plot_paraphrasing_comparison(paraphrase_metrics, title="Paraphrasing Comparison (Pride & Prejudice Sentence)")

## Section 7: Task 4 - Deeper Text Analysis
Let's perform a simple bigram analysis to find the most common word pairs in our source texts.

In [None]:
#@title 7.1: Bigram Analysis Function and Visualization
def analyze_and_plot_bigrams(text, title="Top 5 Bigrams"):
    """Compute and plot top bigrams from text."""
    text = clean_text(text[:5000])  # Clean and limit for performance
    words = text.lower().split()
    bigrams = [(words[i], words[i+1]) for i in range(len(words)-1)]
    bigram_freq = Counter(bigrams).most_common(5)
    
    if bigram_freq:
        labels, counts = zip(*[(f"{w1} {w2}", count) for (w1, w2), count in bigram_freq])
        plt.figure(figsize=(8, 4))
        plt.bar(labels, counts, color='teal')
        plt.title(title, fontsize=14)
        plt.xticks(rotation=45, ha='right')
        plt.ylabel('Frequency')
        plt.tight_layout()
        plt.show()
    else:
        print(f"Could not generate bigrams for '{title}'.")

# Run analysis on our loaded texts
analyze_and_plot_bigrams(frankenstein_text, title="Top 5 Bigrams in 'Frankenstein'")
analyze_and_plot_bigrams(pride_prejudice_text, title="Top 5 Bigrams in 'Pride and Prejudice'")

## Section 8: Putting It All Together: The TextMorph Pipeline
Finally, we can encapsulate all these steps into a simple, reusable pipeline class.

In [None]:
#@title 8.1: Define and Run the Full Pipeline
class TextMorphPipeline:
    """Streamlined pipeline for text summarization, paraphrasing, and analysis."""
    def __init__(self):
        self.reference_texts = []

    def add_reference(self, text):
        self.reference_texts.append(text)

    def summarize(self, input_text, summarizer='t5', **kwargs):
        print(f"\n--- Generating {summarizer.upper()} Summary ---")
        if summarizer == 't5':
            return generate_t5_summary(input_text, **kwargs)
        elif summarizer == 'bart':
            return generate_bart_summary(input_text, **kwargs)
        elif summarizer == 'pegasus':
            return generate_pegasus_summary(input_text, **kwargs)
        else:
            raise ValueError("Invalid summarizer. Choose 't5', 'bart', or 'pegasus'.")

    def paraphrase(self, input_text, paraphraser='pegasus', **kwargs):
        print(f"\n--- Generating {paraphraser.upper()} Paraphrases ---")
        if paraphraser == 'pegasus':
            return generate_pegasus_paraphrase(input_text, **kwargs)
        elif paraphraser == 't5':
            return generate_t5_paraphrase(input_text, **kwargs)
        elif paraphraser == 'bart':
            return generate_bart_paraphrase(input_text, **kwargs)
        else:
            raise ValueError("Invalid paraphraser. Choose 'pegasus', 't5', or 'bart'.")
    
    def analyze_similarity(self, input_text):
        if self.reference_texts:
            print("\n--- Calculating Similarity to Reference Texts ---")
            for i, ref in enumerate(self.reference_texts):
                similarity = compute_similarity(input_text, ref)
                print(f"Similarity to Reference {i+1}: {similarity:.3f}")

    def process(self, input_text, summarizer='t5', paraphrase_sentence=None):
        print(f"\n{'='*20} PIPELINE RUN {'='*20}")
        summary = self.summarize(input_text, summarizer=summarizer, min_len=50, max_len=150)
        print(textwrap.fill(summary, width=100))

        if paraphrase_sentence:
            paraphrases = self.paraphrase(paraphrase_sentence)
            for i, p in enumerate(paraphrases):
                print(f"  {i+1}. {p}")
        
        self.analyze_similarity(input_text)
        print(f"{'='*55}")

# --- Initialize and run the pipeline ---
pipeline = TextMorphPipeline()
pipeline.add_reference("A story of creation, ambition, and tragedy.")
pipeline.add_reference("A social commentary on class and love in Regency England.")

# Process Frankenstein with a T5 summary
pipeline.process(
    frankenstein_text[:2000],
    summarizer='t5',
    paraphrase_sentence="It was on a dreary night of November that I beheld the accomplishment of my toils."
)

# Process Pride and Prejudice with a PEGASUS summary
pipeline.process(
    pride_prejudice_text[:2000],
    summarizer='pegasus',
    paraphrase_sentence="It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife."
)