### Pipelines

This code summarize an article.

In [None]:
from transformers import pipeline
import os

In [None]:
#Using pipelines

# device = -1 => usnig cpu
# device >= 0 => cuda
pipe = pipeline("summarization", model="facebook/bart-large-cnn", device=0)

file = open('./article.txt',mode='r')
article = file.read()

res = pipe(article)
print(res)

# Save
pipe.save_pretrained('./model/bart-large-cnn/')


In [None]:
# Load pretrained model from file
model_path = './model/bart-large-cnn'
pipe2 = pipeline("summarization", model=model_path, tokenizer=model_path, device=0) # cuda:0

file = open('./article.txt',mode='r')
article = file.read()

res = pipe2(article)
print(res)

In [None]:
# %% Using accelerate
import torch
from transformers import pipeline

pipe = pipeline(model="facebook/opt-1.3b", torch_dtype=torch.bfloat16, device=0)
output = pipe("This is a cool example!", do_sample=True, top_p=0.95)

### Dig into Pipelines


![](./src/pipeline.png)


1. Tokenizer

In [6]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt") # ['pt', 'tf', 'np', 'jax']
print(inputs)

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}


2. Model

In [7]:
from transformers import AutoModel

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModel.from_pretrained(checkpoint)

In [13]:
outputs = model(**inputs)
print(outputs.last_hidden_state.shape)

torch.Size([2, 16, 768])


In [16]:
'''
we won’t actually use the `AutoModel` class, but `AutoModelForSequenceClassification`
'''

from transformers import AutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)
print(outputs.logits) # torch.Size([2, 2])

tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)


In [17]:
# applying softmax

import torch

predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
print(predictions)

tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)


In [18]:
# check label

model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}