In [1]:
import torch
from transformers import AutoTokenizer, AutoModel 
import spacy 
from nltk.corpus import stopwords 
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np 

In [2]:


class SummarizationBenchmark:
    def __init__(self, model_name="allenai/scibert_scivocab_uncased"):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.model = AutoModel.from_pretrained(model_name).to(self.device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.nlp = spacy.load("en_core_web_sm")
        self.stop_words = set(stopwords.words('english'))

    def get_embedding(self, text):
        inputs = self.tokenizer(text, return_tensors="pt", truncation=True, max_length=512, padding=True).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
        return outputs.last_hidden_state.mean(dim=1).cpu().numpy()

    def semantic_similarity(self, doc, summary):
        doc_emb = self.get_embedding(doc)
        sum_emb = self.get_embedding(summary)
        return cosine_similarity(doc_emb, sum_emb)[0][0]

    def jaccard_similarity(self, doc, summary):
        """Calculates Jaccard similarity between two texts."""
        doc_tokens = [token.text for token in self.nlp(doc) if not token.is_stop and not token.is_punct]
        sum_tokens = [token.text for token in self.nlp(summary) if not token.is_stop and not token.is_punct]
        intersection = len(set(doc_tokens) & set(sum_tokens))
        union = len(set(doc_tokens) | set(sum_tokens))
        return intersection / union if union > 0 else 0  

    def benchmark(self, document, summary):
        """Calculates and returns benchmark scores."""
        results = {}
        results['semantic_similarity'] = self.semantic_similarity(document, summary)
        results['jaccard_similarity'] = self.jaccard_similarity(document, summary)
        return results

In [None]:
# benchmark = SummarizationBenchmark()

# # Compare meta summary to standard summary
# meta_results = benchmark.benchmark(standard_summary, meta_summary)
# meta_similarity = score_metrics(meta_results)

# # Compare large book summary to standard summary
# large_book_results = benchmark.benchmark(standard_summary, large_book_summary)
# large_book_similarity = score_metrics(large_book_results)

# print(f"Meta Summary Similarity Score: {meta_similarity:.4f}")
# print(f"Large Book Summary Similarity Score: {large_book_similarity:.4f}")

In [None]:
# Example usage
benchmark = SummarizationBenchmark()
document = "document here"
summary = "summary here"
results = benchmark.benchmark(document, summary)
def score_metrics(m): return (m['semantic_similarity'])
score_metrics(results)