In [None]:
pip install datasets

## TextRank

In [None]:
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

# Load the classified dataset
dataset_path = "processed_testdataset.csv"
data = pd.read_csv(dataset_path)

# Function for TextRank summarization
def textrank_summary(article, num_sentences=3):
    # Split the article into sentences
    sentences = article.split('. ')
    if len(sentences) <= num_sentences:
        return article  # Return the full article if too short

    # Create a count vectorizer
    vectorizer = CountVectorizer().fit_transform(sentences)
    similarity_matrix = cosine_similarity(vectorizer)

    # Rank sentences based on their similarity scores
    scores = similarity_matrix.sum(axis=1)
    ranked_sentences = [sentences[i] for i in np.argsort(scores)[::-1]]

    # Return top-ranked sentences as the summary
    return '. '.join(ranked_sentences[:num_sentences])

# Apply TextRank summarization to each article
data['textrank_summary'] = data['article'].apply(textrank_summary)

# Save the dataset with summaries
data.to_csv("textrank_summarized_dataset.csv", index=False)
print("TextRank summaries saved to textrank_summarized_dataset.csv")

## ROUGE Score-textrank

In [None]:
pip install rouge

pip install rouge-score

from rouge import Rouge

# Initialize ROUGE
rouge = Rouge()

# Calculate ROUGE scores for summaries
data['rouge_scores_textrank'] = data.apply(lambda row: rouge.get_scores(row['textrank_summary'], row['article'], avg=True), axis=1)

# Save ROUGE scores
data.to_csv("summary_with_rouge_scores_textrank.csv", index=False)
print("ROUGE scores saved to summary_with_rouge_scores_textrank.csv")

## BLEU SCORE-textrank


In [None]:
from nltk.translate.bleu_score import sentence_bleu

# Calculate BLEU scores for summaries
data['bleu_scores_textrank'] = data.apply(lambda row: sentence_bleu([row['article'].split()], row['textrank_summary'].split()), axis=1)

# Save BLEU scores
data.to_csv("summary_with_bleu_scores_textrank.csv", index=False)
print("BLEU scores saved to summary_with_bleu_scores_textrank.csv")

## LexRank

In [None]:
pip install nltk

pip install lexrank

from lexrank import LexRank
from lexrank.mappings.stopwords import STOPWORDS
from nltk.tokenize import sent_tokenize
import pandas as pd
import nltk
nltk.download('punkt_tab')
# Load the classified dataset
dataset_path = "processed_testdataset.csv"
data = pd.read_csv(dataset_path)

# Use all articles in the dataset to initialize LexRank
corpus = data['article'].dropna().tolist()  # Drop NaN values and convert to list
tokenized_corpus = [sent_tokenize(doc) for doc in corpus]  # Tokenize each document into sentences

# Initialize LexRank with the corpus
lxr = LexRank(tokenized_corpus, stopwords=STOPWORDS['en'])

# Function for LexRank summarization
def lexrank_summary(article, num_sentences=3):
    # Split article into sentences using nltk
    sentences = sent_tokenize(article)
    if len(sentences) <= num_sentences:
        return article  # Return full article if too short

    # Generate LexRank summary
    summary = lxr.get_summary(sentences, summary_size=num_sentences)
    return ' '.join(summary)

# Apply LexRank summarization to each article
data['lexrank_summary'] = data['article'].apply(lexrank_summary)

# Save the dataset with summaries
data.to_csv("lexrank_summarized_dataset.csv", index=False)
print("LexRank summaries saved to lexrank_summarized_dataset.csv")

## ROUGE Score-lexrank


In [None]:
from rouge import Rouge

# Initialize ROUGE
rouge = Rouge()

# Calculate ROUGE scores for summaries
data['rouge_scores_lexrank'] = data.apply(lambda row: rouge.get_scores(row['lexrank_summary'], row['article'], avg=True), axis=1)

# Save ROUGE scores
data.to_csv("summary_with_rouge_scores_lexrank.csv", index=False)
print("ROUGE scores saved to summary_with_rouge_scores_lexrank.csv")

## BLEU SCORE-lexrank

In [None]:
from nltk.translate.bleu_score import sentence_bleu

# Calculate BLEU scores for summaries
data['bleu_scores_lexrank'] = data.apply(lambda row: sentence_bleu([row['article'].split()], row['lexrank_summary'].split()), axis=1)

# Save BLEU scores
data.to_csv("summary_with_bleu_scores_lexrank.csv", index=False)
print("BLEU scores saved to summary_with_bleu_scores_lexrank.csv")

## BERTsum

In [None]:
pip install transformers

from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch

# Load the BERTSum model and tokenizer
model_name = "bert-base-uncased"  # Replace with an actual pre-trained extractive summarization model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function for BERTSum summarization
def bertsum_summary(article, num_sentences=3):
    # Split article into sentences
    sentences = article.split('. ')
    if len(sentences) <= num_sentences:
        return article  # Return the full article if too short

    # Encode sentences using tokenizer
    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Predict importance scores
    with torch.no_grad():
        outputs = model(**inputs)

    # Extract scores and rank sentences
    scores = outputs.logits[:, 1].numpy()
    ranked_sentences = [sentences[i] for i in np.argsort(scores)[::-1]]

    # Return top-ranked sentences as the summary
    return '. '.join(ranked_sentences[:num_sentences])

# Apply BERTSum summarization to each article
data['bertsum_summary'] = data['article'].apply(bertsum_summary)

# Save the dataset with summaries
data.to_csv("bertsum_summarized_dataset.csv", index=False)
print("BERTSum summaries saved to bertsum_summarized_dataset.csv")
import pandas as pd
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import numpy as np

# Load processed test dataset
test_dataset_path = "processed_testdataset.csv"  # Replace with the correct path
data = pd.read_csv(test_dataset_path)

# Select only the first 5 rows of the dataset
data = data.head(5)

# Load the BERTSum model and tokenizer
model_name = "bert-base-uncased"  # Replace with a fine-tuned BERTSum model name if available
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(model_name)

# Function for BERTSum summarization
def bertsum_summary(article, num_sentences=3):
    # Split article into sentences
    sentences = article.split('. ')
    if len(sentences) <= num_sentences:
        return article  # Return the full article if too short

    # Encode sentences using tokenizer
    inputs = tokenizer(sentences, return_tensors="pt", padding=True, truncation=True, max_length=512)

    # Predict importance scores
    with torch.no_grad():
        outputs = model(**inputs)

    # Extract scores and rank sentences
    scores = outputs.logits[:, 1].numpy()
    ranked_sentences = [sentences[i] for i in np.argsort(scores)[::-1]]

    # Return top-ranked sentences as the summary
    return '. '.join(ranked_sentences[:num_sentences])

# Apply BERTSum summarization to each article
data['bertsum_summary'] = data['article'].apply(bertsum_summary)

# Save the dataset with summaries
output_path = "bertsum_summarized_first5.csv"
data.to_csv(output_path, index=False)

output_path

## ROUGE Score-bertsum

In [None]:
from rouge import Rouge

# Initialize ROUGE
rouge = Rouge()

# Calculate ROUGE scores for summaries
data['rouge_scores_bertsum'] = data.apply(lambda row: rouge.get_scores(row['bertsum_summary'], row['article'], avg=True), axis=1)

# Save ROUGE scores
data.to_csv("summary_with_rouge_scores_bertsum.csv", index=False)
print("ROUGE scores saved to summary_with_rouge_scores_bertsum.csv")


## BLEU SCORE-bertsum

In [None]:
from nltk.translate.bleu_score import sentence_bleu

# Calculate BLEU scores for summaries
data['bleu_scores_bertsum'] = data.apply(lambda row: sentence_bleu([row['article'].split()], row['bertsum_summary'].split()), axis=1)

# Save BLEU scores
data.to_csv("summary_with_bleu_scores_bertsum.csv", index=False)
print("BLEU scores saved to summary_with_bleu_scores_bertsum.csv")