# 0. Install dependencies

In [46]:
# Install PyTorch
!pip install torch torchvision torchaudio



In [47]:
# Install transformers
!pip install transformers



# 1. Import and Load Model

In [48]:
# Importing dependencies from transformers
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [49]:
!pip install sentencepiece



In [50]:
# Load tokenizer 
tokenizer = PegasusTokenizer.from_pretrained("google/pegasus-xsum")

In [51]:
# Load model 
model = PegasusForConditionalGeneration.from_pretrained("google/pegasus-xsum")

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-xsum and are newly initialized: ['model.encoder.embed_positions.weight', 'model.decoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


# 2. Perform Abstractive Summarization

In [52]:
text = """
Global warming is the long-term warming of the planet's overall temperature. Though this warming trend has been going on for a long time, its pace has significantly increased in the last hundred years due to the burning of fossil fuels. As the human population has increased, so has the volume of fossil fuels burned.
"""

In [53]:
# Create tokens - number representation of our text
tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt")

In [54]:
# Input tokens
tokens

{'input_ids': tensor([[ 2871,  8309,   117,   109,   300,   121,  1704,  8309,   113,   109,
          3909,   131,   116,  1380,  1972,   107,  4450,   136,  8309,  3012,
           148,   174,   313,   124,   118,   114,   300,   166,   108,   203,
          3644,   148,  2838,  1562,   115,   109,   289,  4062,   231,   640,
           112,   109,  5461,   113, 11881, 13922,   107,   398,   109,   883,
          1948,   148,  1562,   108,   167,   148,   109,  2410,   113, 11881,
         13922,  9856,   107,     1]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [55]:
# Summarize 
summary = model.generate(**tokens)

In [56]:
# Output summary tokens
summary[0]

tensor([    0,   139,   706,  5898,   125, 75112,  6967,   124,  9792,  5297,
          143,   187, 45711,   158,   148,  1291,   203,   807,   731,   124,
         1122,  8309,   107,     1])

In [57]:
# Decode summary
tokenizer.decode(summary[0])

'<pad>The United Nations Intergovernmental Panel on Climate Change (IPCC) has released its latest report on global warming.</s>'