In [1]:
from transformers import AutoTokenizer, PegasusForConditionalGeneration, PegasusTokenizer
import torch 
model_name = 'google/pegasus-large' 

# Load the Pegasus tokenizer 
tokenizer = AutoTokenizer.from_pretrained(model_name) 
model = PegasusForConditionalGeneration.from_pretrained(model_name)


with open('./content/justafile.txt', 'r') as f: 
 arxiv_paper = f.read() 
input_chunks = [arxiv_paper[i:i+512] for i in range(0, len(arxiv_paper), 512)] 
summaries = []

for chunk in input_chunks: 
 input_ids = tokenizer.encode(chunk, return_tensors='pt') 
 output = model.generate(input_ids, max_length=100, num_beams=5, length_penalty=0.8) 
 summary = tokenizer.decode(output[0], skip_special_tokens=True) 
 summaries.append(summary) 
summary = ' '.join(summaries) 
print(summary)

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-large and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Machine learning, a subset of AI, enables systems to learn and improve from experience without explicit programming. OpenAI's GPT-3, for instance, is a powerful NLP model that has demonstrated remarkable language understanding and generation capabilities. As we embrace these advancements, it is crucial to address ethical considerations and work towards responsible AI development.


In [2]:
reference_summary="""Artificial intelligence (AI) is transforming industries by enhancing efficiency and enabling data-driven decisions. Machine learning, a subset of AI, facilitates learning from experience without explicit programming. Natural language processing (NLP) is a key application, allowing computers to understand and generate human-like text. OpenAI's GPT-3 exemplifies powerful NLP capabilities. Despite advancements, ethical concerns like bias, data privacy, and job displacement persist. Striking a balance between progress and ethical responsibility is crucial for the positive impact of AI on society.
"""

In [3]:
from rouge import Rouge 
import json
rouge = Rouge() 
scores = rouge.get_scores(summary, reference_summary) 
print(json.dumps(scores, indent=4)) 

import nltk 
# download punkt tokenizer 
nltk.download('punkt') 
sentences = nltk.sent_tokenize(summary) 
num_sentences = len(sentences) 
print("Number of sentences:", num_sentences)

[
    {
        "rouge-1": {
            "r": 0.34285714285714286,
            "p": 0.5,
            "f": 0.4067796561907498
        },
        "rouge-2": {
            "r": 0.15384615384615385,
            "p": 0.23076923076923078,
            "f": 0.18461537981538473
        },
        "rouge-l": {
            "r": 0.3142857142857143,
            "p": 0.4583333333333333,
            "f": 0.37288135110600407
        }
    }
]
Number of sentences: 3


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\USER\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [4]:
import os
directory = 'results'
if not os.path.exists(directory):
    os.makedirs(directory)

# Save the ROUGE scores to a text file in the new directory
file_path = os.path.join(directory, 'pagus_rouge_scores.txt')
with open(file_path, 'w') as file:
    file.write(json.dumps(scores))