## Huggingface Configuration

In [32]:
from transformers import AutoConfig,AutoModel,AutoTokenizer,AdamW,get_linear_schedule_with_warmup,logging
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset,SequentialSampler,RandomSampler,DataLoader

In [34]:
MODEL_NAME="roberta-large"
config = AutoConfig.from_pretrained(MODEL_NAME)
config

Downloading: 100%|██████████| 482/482 [00:00<00:00, 319kB/s]


RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.2",
  "type_vocab_size": 1,
  "use_cache": true,
  "vocab_size": 50265
}

In [36]:
config.num_labels

2

In [37]:
config.num_labels = 12

In [39]:
config

RobertaConfig {
  "_name_or_path": "roberta-large",
  "architectures": [
    "RobertaForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_11": 11,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "LABEL_7": 7,
    "LABEL_8": 8,
    "LABEL_9": 9
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "roberta",
  "num_attention_heads": 16,
  "num_hidden_layers": 24,
  "

## Huggingface Tokenizer

In [40]:
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
tokenizer

Downloading: 100%|██████████| 878k/878k [00:00<00:00, 1.83MB/s]
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 935kB/s] 
Downloading: 100%|██████████| 1.29M/1.29M [00:00<00:00, 2.25MB/s]


PreTrainedTokenizerFast(name_or_path='roberta-large', vocab_size=50265, model_max_len=512, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'bos_token': '<s>', 'eos_token': '</s>', 'unk_token': '<unk>', 'sep_token': '</s>', 'pad_token': '<pad>', 'cls_token': '<s>', 'mask_token': AddedToken("<mask>", rstrip=False, lstrip=True, single_word=False, normalized=False)})

In [41]:
tokenizer.all_special_ids

[0, 2, 3, 1, 50264]

In [42]:
tokenizer.all_special_tokens

['<s>', '</s>', '<unk>', '<pad>', '<mask>']

In [43]:
tokenizer.vocab_size


50265

Text to ids

In [44]:
text = 'I work in Beijing'
token_ids=tokenizer.encode(text)
token_ids

[0, 100, 173, 11, 3332, 2]

Ids to tokens

In [45]:
tokenizer.convert_ids_to_tokens(token_ids)

['<s>', 'I', 'Ġwork', 'Ġin', 'ĠBeijing', '</s>']

Method 1 of Padding

In [46]:
token_ids=tokenizer.encode(text,padding=True,max_length=30,add_special_tokens=True)
token_ids



[0, 100, 173, 11, 3332, 2]

In [47]:
token_ids=tokenizer.encode(text,padding="max_length",max_length=30,add_special_tokens=True)
token_ids

[0,
 100,
 173,
 11,
 3332,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1]

In [48]:
token_ids=tokenizer.encode(text,padding="max_length",max_length=30,add_special_tokens=True,return_tensors='pt')
token_ids

tensor([[   0,  100,  173,   11, 3332,    2,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1]])

Method 2 of padding

In [49]:
token_ids=tokenizer.encode_plus(
    text,padding="max_length",
    max_length=30,
    add_special_tokens=True,
    return_tensors='pt',
    return_token_type_ids=True,
    return_attention_mask=True
)
token_ids

{'input_ids': tensor([[   0,  100,  173,   11, 3332,    2,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,    1,
            1,    1,    1,    1,    1,    1]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0]])}

### Huggingface Modeling

In [50]:
model=AutoModel.from_pretrained(MODEL_NAME)
model

Downloading: 100%|██████████| 1.33G/1.33G [03:51<00:00, 6.17MB/s]
Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


RobertaModel(
  (embeddings): RobertaEmbeddings(
    (word_embeddings): Embedding(50265, 1024, padding_idx=1)
    (position_embeddings): Embedding(514, 1024, padding_idx=1)
    (token_type_embeddings): Embedding(1, 1024)
    (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): RobertaEncoder(
    (layer): ModuleList(
      (0): RobertaLayer(
        (attention): RobertaAttention(
          (self): RobertaSelfAttention(
            (query): Linear(in_features=1024, out_features=1024, bias=True)
            (key): Linear(in_features=1024, out_features=1024, bias=True)
            (value): Linear(in_features=1024, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): RobertaSelfOutput(
            (dense): Linear(in_features=1024, out_features=1024, bias=True)
            (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
            (d

In [51]:
# outputs=model(token_ids['input_ids'],token_ids['token_type_ids'])
outputs=model(token_ids['input_ids'],token_ids['attention_mask'])

# outputs=model(token_ids['input_ids'],token_ids['attention_mask'],token_ids['token_type_ids'])
outputs

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.0062, -0.0401, -0.0286,  ..., -0.0385,  0.0794,  0.0556],
         [ 0.2347, -0.0184, -0.1813,  ..., -0.0050, -0.3350,  0.2581],
         [ 0.0338, -0.1492, -0.2547,  ..., -0.2282, -0.0268,  0.1119],
         ...,
         [ 0.1340,  0.0578, -0.2684,  ...,  0.0360,  0.0731,  0.1547],
         [ 0.1340,  0.0578, -0.2684,  ...,  0.0360,  0.0731,  0.1547],
         [ 0.1340,  0.0578, -0.2684,  ...,  0.0360,  0.0731,  0.1547]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 0.1655,  0.7004,  0.4744,  ..., -0.0157,  0.3987, -0.3642]],
       grad_fn=<TanhBackward0>), hidden_states=None, past_key_values=None, attentions=None, cross_attentions=None)

In [52]:
last_hidden_state=outputs[0]
outputs[0].shape # last_hidden_state

torch.Size([1, 30, 1024])

In [53]:
outputs[1].shape # pooler_output

torch.Size([1, 1024])

In [54]:
cls_embeddings=last_hidden_state[:,0] 
last_hidden_state[:,0].shape

torch.Size([1, 1024])

Change output of BERT

In [55]:
config.update({'output_hidden_states':True}) 

In [56]:
model=AutoModel.from_pretrained(MODEL_NAME,config=config)

outputs=model(token_ids['input_ids'],token_ids['token_type_ids'])

Some weights of the model checkpoint at roberta-large were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.bias', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [57]:
outputs.keys()

odict_keys(['last_hidden_state', 'pooler_output', 'hidden_states'])

In [58]:
outputs['last_hidden_state'].shape

torch.Size([1, 30, 1024])

In [59]:
outputs['pooler_output'].shape

torch.Size([1, 1024])

In [60]:
len(outputs['hidden_states'])

25

In [61]:
outputs['hidden_states'][-1].shape

torch.Size([1, 30, 1024])

## Huggingface Use Case

In [22]:
# !pip install transformers

In [65]:
from transformers import (
    AutoModelForSequenceClassification,
    AutoTokenizer,
    BertTokenizer,
    BertForMaskedLM,
    TrainingArguments,
    Trainer,
    pipeline
)
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
model = AutoModelForMaskedLM.from_pretrained("bert-base-cased")
model

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(28996, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=Tr

In [3]:
import torch
from transformers import BertTokenizer
from IPython.display import clear_output

PRETRAINED_MODEL_NAME = "bert-base-cased" 

tokenizer = BertTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)

clear_output()
print("PyTorch Version：", torch.__version__)

PyTorch Version： 1.11.0


In [5]:
vocab = tokenizer.vocab
print("Vocabulary Size：", len(vocab))

Vocabulary Size： 28996


In [6]:
import random
random_tokens = random.sample(list(vocab), 10)
random_ids = [vocab[t] for t in random_tokens]

print("{0:20}{1:15}".format("token", "index"))
print("-" * 25)
for t, id in zip(random_tokens, random_ids):
    print("{0:15}{1:10}".format(t, id))

token               index          
-------------------------
Easy                12167
salmon              17646
Russia               2733
Relegated           17997
tents               19544
exported            16578
reacted             15510
Audience            23590
subdivided          16224
brace               22913


In [19]:
# A quote from Albert Einstein
text = "[CLS] life is like riding a bicycle. to keep your [MASK], you must keep moving."
tokens = tokenizer.tokenize(text)
ids = tokenizer.convert_tokens_to_ids(tokens)

print(text)
print(tokens[:18], '...')
print(ids[:18], '...')

[CLS] life is like riding a bicycle. to keep your [MASK], you must keep moving.
['[CLS]', 'life', 'is', 'like', 'riding', 'a', 'bicycle', '.', 'to', 'keep', 'your', '[MASK]', ',', 'you', 'must', 'keep', 'moving', '.'] ...
[101, 1297, 1110, 1176, 5569, 170, 11639, 119, 1106, 1712, 1240, 103, 117, 1128, 1538, 1712, 2232, 119] ...


In [21]:
from transformers import BertForMaskedLM

tokens_tensor = torch.tensor([ids])  # (1, seq_len)
segments_tensors = torch.zeros_like(tokens_tensor)  # (1, seq_len)
maskedLM_model = BertForMaskedLM.from_pretrained(PRETRAINED_MODEL_NAME)

# Use masked LM to estimate token at [MASK]
maskedLM_model.eval()
with torch.no_grad():
    outputs = maskedLM_model(tokens_tensor, segments_tensors)
    predictions = outputs[0]
    # (1, seq_len, num_hidden_units)
del maskedLM_model

# Get top k tokens from the probability distribution
masked_index = 11
k = 3
probs, indices = torch.topk(torch.softmax(predictions[0, masked_index], -1), k)
predicted_tokens = tokenizer.convert_ids_to_tokens(indices.tolist())

print("Input tokens ：", tokens[:18], '...')
print('-' * 50)
for i, (t, p) in enumerate(zip(predicted_tokens, probs), 1):
    tokens[masked_index] = t
    print("Top {} ({:2}%)：{}".format(i, int(p.item() * 100), tokens[:18]), '...')

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Input tokens ： ['[CLS]', 'life', 'is', 'like', 'riding', 'a', 'bicycle', '.', 'to', 'keep', 'your', 'strength', ',', 'you', 'must', 'keep', 'moving', '.'] ...
--------------------------------------------------
Top 1 (31%)：['[CLS]', 'life', 'is', 'like', 'riding', 'a', 'bicycle', '.', 'to', 'keep', 'your', 'balance', ',', 'you', 'must', 'keep', 'moving', '.'] ...
Top 2 ( 9%)：['[CLS]', 'life', 'is', 'like', 'riding', 'a', 'bicycle', '.', 'to', 'keep', 'your', 'equilibrium', ',', 'you', 'must', 'keep', 'moving', '.'] ...
Top 3 ( 4%)：['[CLS]', 'life', 'is', 'like', 'riding', 'a', 'bicycle', '.', 'to', 'keep', 'your', 'strength', ',', 'you', 'must', 'keep', 'moving', '.'] ...


## Visualization

In [62]:
# !pip install bertviz ipywidgets

In [24]:
from transformers import AutoTokenizer, AutoModel
from bertviz import model_view

tokenizer = AutoTokenizer.from_pretrained(PRETRAINED_MODEL_NAME)
model = AutoModelForMaskedLM.from_pretrained(PRETRAINED_MODEL_NAME, output_attentions=True)
inputs = tokenizer.encode("[CLS] life is like riding a bicycle. to keep your [MASK], you must keep moving.",
                          return_tensors='pt')
outputs = model(inputs)
attention = outputs[-1]  
tokens = tokenizer.convert_ids_to_tokens(inputs[0]) 
model_view(attention, tokens)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<IPython.core.display.Javascript object>

In [30]:
from transformers import BertTokenizer, BertModel
from bertviz import head_view
from IPython.display import clear_output
# helper function for visualization in jupyter notebook
def call_html():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              "d3": "https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.8/d3.min",
              jquery: '//ajax.googleapis.com/ajax/libs/jquery/2.0.0/jquery.min',
            },
          });
        </script>
        '''))

clear_output()

In [31]:
model_version = PRETRAINED_MODEL_NAME
model = BertModel.from_pretrained(model_version, output_attentions=True)
tokenizer = BertTokenizer.from_pretrained(model_version)

# Scenario 1
sentence_a = "Emma asked Tommy to buy books,"
sentence_b = "She beat him when he came back."

# Get attention from new input
inputs = tokenizer.encode_plus(sentence_a, sentence_b, return_tensors='pt', add_special_tokens=False)
token_type_ids = inputs['token_type_ids']
input_ids = inputs['input_ids']
attention = model(input_ids, token_type_ids=token_type_ids)[-1]
input_id_list = input_ids[0].tolist() # Batch index 0
tokens = tokenizer.convert_ids_to_tokens(input_id_list)
call_html()

# BertViz
head_view(attention, tokens)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


<IPython.core.display.Javascript object>

## Pipeline

1. "feature-extraction" : FeatureExtractionPipeline
1. "text-classification" : TextClassificationPipeline
1. "sentiment-analysis" : TextClassificationPipeline
1. "token-classification" : TokenClassificationPipeline
1. "ner": TokenClassificationPipeline
1. "question-answering" : QuestionAnsweringPipeline
1. "fill-mask": FillMaskPipeline
1. "summarization" : SummarizationPipeline
1. "translation_xx_to_yy" : TranslationPipeline
1. "text2text-generation" : Text2TextGenerationPipeline
1. "text-generation" : TextGenerationPipeline
1. "zero-shot-classification" : ZeroShotClassificationPipeline
1. "conversational" : ConversationalPipeline

In [74]:
classifier = pipeline("sentiment-analysis")
result = classifier("We are very happy to introduce pipeline to the transformers repository.")[0]
print("label: {}, with score: {}".format(result["label"], round(result["score"], 4)))
result = classifier("I hate you")[0]
print("label: {}, with score: {}".format(result["label"], round(result["score"], 4)))
result = classifier("I love you")[0]
print("label: {}, with score: {}".format(result["label"], round(result["score"], 4)))

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


label: POSITIVE, with score: 0.9998
label: NEGATIVE, with score: 0.9991
label: POSITIVE, with score: 0.9999


In [76]:
question_answerer = pipeline("question-answering")
context = r"""
Extractive Question Answering is the task of extracting an answer from a text given a question. 
An example of a question answering dataset is the SQuAD dataset, which is entirely based on that task. 
If you would like to fine-tune a model on a SQuAD task, you may leverage the run_squad.py.
"""
result = question_answerer(
    question="What is extractive question answering?",
    context=context
)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}")
result = question_answerer(
    question="What is a good example of a question answering dataset?",
    context=context
)
print(f"Answer: '{result['answer']}', score: {round(result['score'], 4)}, start: {result['start']}")

No model was supplied, defaulted to distilbert-base-cased-distilled-squad (https://huggingface.co/distilbert-base-cased-distilled-squad)


Answer: 'the task of extracting an answer from a text given a question', score: 0.6166, start: 34
Answer: 'SQuAD dataset', score: 0.4985, start: 148


In [79]:
summarizer = pipeline("summarization")
ARTICLE = """
    America has changed dramatically during recent years. Not only has the number of 
    graduates in traditional engineering disciplines such as mechanical, civil, 
    electrical, chemical, and aeronautical engineering declined, but in most of 
    the premier American universities engineering curricula now concentrate on 
    and encourage largely the study of engineering science. As a result, there 
    are declining offerings in engineering subjects dealing with infrastructure, 
    the environment, and related issues, and greater concentration on high 
    technology subjects, largely supporting increasingly complex scientific 
    developments. While the latter is important, it should not be at the expense 
    of more traditional engineering.

    Rapidly developing economies such as China and India, as well as other 
    industrial countries in Europe and Asia, continue to encourage and advance 
    the teaching of engineering. Both China and India, respectively, graduate 
    six and eight times as many traditional engineers as does the United States. 
    Other industrial countries at minimum maintain their output, while America 
    suffers an increasingly serious decline in the number of engineering graduates 
    and a lack of well-educated engineers.
"""

result = summarizer(ARTICLE, max_length=130, min_length=30, do_sample=False)
result

[{'summary_text': ' America has changed dramatically during recent years . The number of engineering graduates in the U.S. has declined in traditional engineering disciplines such as mechanical, civil, electrical, chemical, and aeronautical engineering . Rapidly developing economies such as China and India continue to encourage and advance the teaching of engineering .'}]

In [80]:
classifier = pipeline("zero-shot-classification")
classifier(
    "This is a course about the Transformers library",
    candidate_labels=["education", "politics", "business"],
)

No model was supplied, defaulted to facebook/bart-large-mnli (https://huggingface.co/facebook/bart-large-mnli)
Downloading: 100%|██████████| 1.13k/1.13k [00:00<00:00, 480kB/s]
Downloading: 100%|██████████| 1.52G/1.52G [03:22<00:00, 8.04MB/s]
Downloading: 100%|██████████| 26.0/26.0 [00:00<00:00, 10.9kB/s]
Downloading: 100%|██████████| 878k/878k [00:00<00:00, 925kB/s] 
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 3.46MB/s]
Downloading: 100%|██████████| 1.29M/1.29M [00:00<00:00, 5.39MB/s]


{'sequence': 'This is a course about the Transformers library',
 'labels': ['education', 'business', 'politics'],
 'scores': [0.8445994257926941, 0.11197381466627121, 0.04342671111226082]}

In [81]:
generator = pipeline("text-generation", model="distilgpt2")
generator(
    "In this course, we will teach you how to",
    max_length=30,
    num_return_sequences=2,
)

Downloading: 100%|██████████| 762/762 [00:00<00:00, 346kB/s]
Downloading: 100%|██████████| 336M/336M [01:07<00:00, 5.21MB/s] 
Downloading: 100%|██████████| 0.99M/0.99M [00:01<00:00, 1.03MB/s]
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 1.35MB/s]
Downloading: 100%|██████████| 1.29M/1.29M [00:00<00:00, 1.67MB/s]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


[{'generated_text': "In this course, we will teach you how to control and hold your own on the world through the skills of the young. But if you don't"},
 {'generated_text': 'In this course, we will teach you how to apply to the National Student Federal Reserve.'}]

In [82]:
unmasker = pipeline("fill-mask")
unmasker("This course will teach you all about <mask> models.", top_k=2)

No model was supplied, defaulted to distilroberta-base (https://huggingface.co/distilroberta-base)
Downloading: 100%|██████████| 480/480 [00:00<00:00, 210kB/s]
Downloading: 100%|██████████| 316M/316M [01:01<00:00, 5.41MB/s] 
Downloading: 100%|██████████| 878k/878k [00:00<00:00, 2.97MB/s]
Downloading: 100%|██████████| 446k/446k [00:00<00:00, 1.18MB/s]
Downloading: 100%|██████████| 1.29M/1.29M [00:00<00:00, 2.03MB/s]


[{'score': 0.19619736075401306,
  'token': 30412,
  'token_str': ' mathematical',
  'sequence': 'This course will teach you all about mathematical models.'},
 {'score': 0.04052709415555,
  'token': 38163,
  'token_str': ' computational',
  'sequence': 'This course will teach you all about computational models.'}]

In [None]:
ner = pipeline("ner", grouped_entities=True)
ner("My name is Sylvain and I work at Hugging Face in Brooklyn.")

In [None]:
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-fr-en")
translator("Ce cours est produit par Hugging Face.")

## Fine Tune Pretrained Model

In [None]:
# from datasets import load_dataset, load_metric

# # load data
# raw_datasets = load_dataset("imdb")

# # tokenization
# tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
# inputs = tokenizer(sentences, padding="max_length", truncation=True)
# def tokenize_function(examples):
#     return tokenizer(
#     examples["text"], padding="max_length", truncation=True
#     )
# tokenized_datasets = raw_datasets.map(tokenize_function, batched=True)
# small_train_dataset = tokenized_datasets["train"].shuffle(seed=42).select(range(1000))
# small_eval_dataset = tokenized_datasets["test"].shuffle(seed=42).select(range(1000))
# full_train_dataset = tokenized_datasets["train"]
# full_eval_dataset = tokenized_datasets["test"]

# # Model
# model = AutoModelForSequenceClassification.from_pretrained(
#     "bert-base-cased", num_labels=2
# )

# # Evaluation
# metric = load_metric("accuracy")
# def compute_metrics(eval_pred):
#     logits, labels = eval_pred
#     predictions = np.argmax(logits, axis=-1)
#     return metric.compute(predictions=predictions, references=labels)

# # Train
# training_args = TrainingArguments("test_trainer", evaluation_strategy="epoch")
# trainer = Trainer(
#     model=model,
#     args=training_args,
#     train_dataset=small_train_dataset,
#     eval_dataset=small_eval_dataset,
#     compute_metrics=compute_metrics,
# )
# trainer.train()
# trainer.evaluate()