# The pipeline function

In [1]:
from transformers import pipeline

In [2]:
classifier = pipeline("sentiment-analysis")

No model was supplied, defaulted to distilbert/distilbert-base-uncased-finetuned-sst-2-english and revision af0f99b (https://huggingface.co/distilbert/distilbert-base-uncased-finetuned-sst-2-english).
Using a pipeline without specifying a model name and revision in production is not recommended.


In [3]:
classifier("I've been waiting for a HuggingFace Course my whole life")

[{'label': 'POSITIVE', 'score': 0.9516072273254395}]

In [4]:
classifier([
    "I've been waiting for a HuggingFace Course my whole life",
    "I hate this so much!"
])

[{'label': 'POSITIVE', 'score': 0.9516072273254395},
 {'label': 'NEGATIVE', 'score': 0.9994558691978455}]

In [5]:
classifier = pipeline('zero-shot-classification')
classifier(
    "This is a course about the Transformers library",
    candidate_labels=['education', 'politics', 'buisness']
)

No model was supplied, defaulted to facebook/bart-large-mnli and revision c626438 (https://huggingface.co/facebook/bart-large-mnli).
Using a pipeline without specifying a model name and revision in production is not recommended.


{'sequence': 'This is a course about the Transformers library',
 'labels': ['education', 'buisness', 'politics'],
 'scores': [0.6811392903327942, 0.2838385999202728, 0.03502209484577179]}

In [6]:
generator = pipeline('text-generation')
generator("In this course, we will teach you how to")

No model was supplied, defaulted to openai-community/gpt2 and revision 6c0e608 (https://huggingface.co/openai-community/gpt2).
Using a pipeline without specifying a model name and revision in production is not recommended.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'In this course, we will teach you how to use the Windows PowerShell Framework.\n\nWhen you take this course, you will need to download the latest versions for you. Once you do that, you will get a new install which, if you'}]

In [7]:
generator = pipeline('text-generation', model="distilgpt2")
generator("In this course, we will teach you how to", max_length=30, num_return_sequences=2) # no. of words and no. of like generated texts

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': 'In this course, we will teach you how to implement advanced concepts in your favorite language.\n\n\n1. When you first start learning language,'},
 {'generated_text': 'In this course, we will teach you how to improve your posture, and what it is to increase your posture!\n\n\n\nThe course includes'}]

In [8]:
unmasker = pipeline('fill-mask')
unmasker('This course will teach you all about <mask> modoels', top_k=2)

No model was supplied, defaulted to distilbert/distilroberta-base and revision ec58a5b (https://huggingface.co/distilbert/distilroberta-base).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at distilbert/distilroberta-base were not used when initializing RobertaForMaskedLM: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
- This IS expected if you are initializing RobertaForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'score': 0.025584490969777107,
  'token': 39887,
  'token_str': ' CSS',
  'sequence': 'This course will teach you all about CSS modoels'},
 {'score': 0.02498646453022957,
  'token': 15826,
  'token_str': ' Linux',
  'sequence': 'This course will teach you all about Linux modoels'}]

In [9]:
ner = pipeline('ner')
ner("My name is Sylavin and I work at Hugging Face in Brookyln")

No model was supplied, defaulted to dbmdz/bert-large-cased-finetuned-conll03-english and revision f2482bf (https://huggingface.co/dbmdz/bert-large-cased-finetuned-conll03-english).
Using a pipeline without specifying a model name and revision in production is not recommended.
Some weights of the model checkpoint at dbmdz/bert-large-cased-finetuned-conll03-english were not used when initializing BertForTokenClassification: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


[{'entity': 'I-PER',
  'score': 0.99933594,
  'index': 4,
  'word': 'S',
  'start': 11,
  'end': 12},
 {'entity': 'I-PER',
  'score': 0.9868073,
  'index': 5,
  'word': '##yla',
  'start': 12,
  'end': 15},
 {'entity': 'I-PER',
  'score': 0.99791175,
  'index': 6,
  'word': '##vin',
  'start': 15,
  'end': 18},
 {'entity': 'I-ORG',
  'score': 0.98590523,
  'index': 11,
  'word': 'Hu',
  'start': 33,
  'end': 35},
 {'entity': 'I-ORG',
  'score': 0.9730094,
  'index': 12,
  'word': '##gging',
  'start': 35,
  'end': 40},
 {'entity': 'I-ORG',
  'score': 0.98913074,
  'index': 13,
  'word': 'Face',
  'start': 41,
  'end': 45},
 {'entity': 'I-LOC',
  'score': 0.8641132,
  'index': 15,
  'word': 'Brook',
  'start': 49,
  'end': 54},
 {'entity': 'I-LOC',
  'score': 0.76545364,
  'index': 16,
  'word': '##yl',
  'start': 54,
  'end': 56},
 {'entity': 'I-LOC',
  'score': 0.71794754,
  'index': 17,
  'word': '##n',
  'start': 56,
  'end': 57}]

In [10]:
question_answerer = pipeline("question-answering")
question_answerer(question="Where do I work?", context="My name is Sylavin and I work at Hugging Face in Brooklyn")

No model was supplied, defaulted to distilbert/distilbert-base-cased-distilled-squad and revision 626af31 (https://huggingface.co/distilbert/distilbert-base-cased-distilled-squad).
Using a pipeline without specifying a model name and revision in production is not recommended.


{'score': 0.7014117240905762, 'start': 33, 'end': 45, 'answer': 'Hugging Face'}

In [11]:
summarizer = pipeline("summarization")
summarizer("A critical phase in Roman history was the Punic Wars against Carthage (264–146 BC). The First Punic War (264–241 BC) established Rome as a naval power, while the Second Punic War (218–201 BC) saw the rise of Hannibal, whose daring crossing of the Alps remains legendary. Despite early Carthaginian victories, Rome eventually triumphed, culminating in the Third Punic War (149–146 BC), which led to the complete destruction of Carthage.")

No model was supplied, defaulted to sshleifer/distilbart-cnn-12-6 and revision a4f8f3e (https://huggingface.co/sshleifer/distilbart-cnn-12-6).
Using a pipeline without specifying a model name and revision in production is not recommended.
Your max_length is set to 142, but your input_length is only 103. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=51)


[{'summary_text': ' First Punic War (264–241 BC) established Rome as a naval power . Second Punic Wars (218–201 BC) saw the rise of Hannibal, whose daring crossing of the Alps remains legendary . Despite early Carthaginian victories, Rome eventually triumphed, culminating in the Third Punic war (149–146 BC) which led to the complete destruction of Carthage .'}]

In [12]:
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
translator("Ce cours est produit par Hugging Face")



[{'translation_text': 'This course is produced by Hugging Face'}]

# What happens inside the pipeline function? (PyTorch)

In [13]:
from transformers import AutoTokenizer

In [14]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [15]:
raw_inputs = [
    "I've been waiting for a HuggingFace course my whole life.",
    "I hate this so much!"
]

In [16]:
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="pt")

In [17]:
inputs

{'input_ids': tensor([[  101,  1045,  1005,  2310,  2042,  3403,  2005,  1037, 17662, 12172,
          2607,  2026,  2878,  2166,  1012,   102],
        [  101,  1045,  5223,  2023,  2061,  2172,   999,   102,     0,     0,
             0,     0,     0,     0,     0,     0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
        [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])}

In [18]:
from transformers import AutoModel

In [19]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModel.from_pretrained(checkpoint)

In [20]:
outputs = model(**inputs)

In [21]:
print(outputs.last_hidden_state.shape)

torch.Size([2, 16, 768])


In [22]:
from transformers import AutoModelForSequenceClassification

In [23]:
checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = AutoModelForSequenceClassification.from_pretrained(checkpoint)

In [24]:
outputs = model(**inputs)

In [25]:
outputs.logits

tensor([[-1.5607,  1.6123],
        [ 4.1692, -3.3464]], grad_fn=<AddmmBackward0>)

In [26]:
import torch
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

In [27]:
predictions

tensor([[4.0195e-02, 9.5980e-01],
        [9.9946e-01, 5.4418e-04]], grad_fn=<SoftmaxBackward0>)

In [28]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

# Instantiate a Transformers model (PyTorch)


In [29]:
from transformers import AutoModel

In [30]:
bert_model = AutoModel.from_pretrained("bert-base-cased")
bert_config = AutoModel.from_pretrained("bert-base-cased")

In [31]:
gpt_model = AutoModel.from_pretrained("gpt2")
gpt_config = AutoModel.from_pretrained("gpt2")

In [32]:
bart_model = AutoModel.from_pretrained("facebook/bart-base")
bart_config = AutoModel.from_pretrained("facebook/bart-base")

In [38]:
from transformers import BertConfig, BertModel
bert_config = BertConfig.from_pretrained("bert-base-cased")
bert_model = BertModel(bert_config)