# 1. Import and Load Model

In [1]:
import torch 
from transformers import PegasusForConditionalGeneration, PegasusTokenizer

In [2]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
model_name = "google/pegasus-xsum"

In [4]:
#load model and tokenizer
tokenizer = PegasusTokenizer.from_pretrained(model_name)

In [5]:
model = PegasusForConditionalGeneration.from_pretrained(model_name).to(device)

# 2. Perform Abstractive Summarization

In [15]:
text = """
Binance is a cryptocurrency exchange which is currently the largest exchange in the world in terms of daily trading volume of cryptocurrencies.[2] It was founded in 2017 and is registered in the Cayman Islands.

Binance was founded by Changpeng Zhao, a developer who had previously created high frequency trading software. Binance was initially based in China, but later moved its headquarters out of China following the Chinese government's increasing regulation of cryptocurrency.

Binance is currently under investigation by both the United States Department of Justice and Internal Revenue Service on allegations of money laundering and tax offenses.[3][4][5] The UK's Financial Conduct Authority ordered Binance to stop all regulated activity in the United Kingdom in June 2021.[6]
"""

In [16]:
#create tokens - number representation of our text

In [17]:
tokens = tokenizer(text, truncation=True, padding="longest", return_tensors="pt").to(device)

In [18]:
tokens

{'input_ids': tensor([[62915,   117,   114, 11239,  2264,   162,   117,   767,   109,  1368,
          2264,   115,   109,   278,   115,  1130,   113,  1036,  2430,  2410,
           113,   110, 20751,   107,  4101, 50558,   168,   140,  3271,   115,
          1326,   111,   117,  2451,   115,   109, 28122,  6561,   107, 62915,
           140,  3271,   141, 17284, 71144, 38027,   108,   114,  4369,   170,
           196,  2255,   732,   281,  5099,  2430,   680,   107, 62915,   140,
          4471,   451,   115,  1224,   108,   155,   678,  1652,   203,  7243,
           165,   113,  1224,   645,   109,  1950,   657,   131,   116,  2186,
          5714,   113, 11239,   107, 62915,   117,   767,   365,  3244,   141,
           302,   109,   706,  1013,  1318,   113,  4465,   111, 10894, 11528,
          1255,   124,  9952,   113,   408, 27516,   111,  1035, 25474,   107,
          4101,   726, 32887,   914, 32887,  1343,  1100,   139,   926,   131,
           116,  3650, 16413,  5369,  

In [19]:
#summarize
summary = model.generate(**tokens)

In [20]:
summary

tensor([[    0,   139,   645,   117,   114,  1245,   112,   181,   113,   109,
           205,   785,   110, 20751,   107,     1]], device='cuda:0')

In [21]:
tokenizer.decode(summary[0])

'The following is a guide to some of the most popular cryptocurrencies.'

In [22]:
tokenizer.batch_decode(summary, skip_special_tokens=True)

['The following is a guide to some of the most popular cryptocurrencies.']