# Getting started

Start with a simple example with the high level pipeline API.

In [145]:
import tensorflow as tf
from transformers import TFAutoModel
from transformers import AutoModel
from transformers import pipeline
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification
from transformers import DistilBertTokenizer, TFDistilBertForSequenceClassification

In [146]:
classifier = pipeline('sentiment-analysis')

In [147]:
classifier('We are very happy to show you the 🤗 Transformers library.')

[{'label': 'POSITIVE', 'score': 0.9997795224189758}]

In [148]:
results = classifier([
    "We are very happy to show you the 🤗 Transformers library.",
    "We hope you don't hate it."
])
for result in results:
    print(f"label: {result['label']}, with score: {round(result['score'], 4)}")


label: POSITIVE, with score: 0.9998
label: NEGATIVE, with score: 0.5309


# Internal

Take a look at what is happening under the hood

In [149]:
model_name = "nlptown/bert-base-multilingual-uncased-sentiment"
tf_model = TFAutoModelForSequenceClassification.from_pretrained(model_name, from_pt=True)
tokenizer = AutoTokenizer.from_pretrained(model_name)
classifier_i18n = pipeline('sentiment-analysis', model=tf_model, tokenizer=tokenizer)

All PyTorch model weights were used when initializing TFBertForSequenceClassification.

All the weights of TFBertForSequenceClassification were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertForSequenceClassification for predictions without further training.


In [150]:
pipeline_results = classifier_i18n([
    "We are very happy to show you the 🤗 Transformers library.",
    "We hope you don't hate it."
])
print(pipeline_results)

[{'label': '5 stars', 'score': 0.7725350260734558}, {'label': '5 stars', 'score': 0.23652462661266327}]


In [151]:
tf_batch = tokenizer(
    [
        "We are very happy to show you the 🤗 Transformers library.",
        "We hope you don't hate it."
    ],
    padding=True,
    truncation=True,
    max_length=512,
    return_tensors="tf"
)
for key, value in tf_batch.items():
    print(f"{key}: {value.numpy().tolist()}")

input_ids: [[101, 11312, 10320, 12495, 19308, 10114, 11391, 10855, 10103, 100, 58263, 13299, 119, 102], [101, 11312, 18763, 10855, 11530, 112, 162, 39487, 10197, 119, 102, 0, 0, 0]]
token_type_ids: [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]
attention_mask: [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0]]


In [152]:
tf_outputs = tf_model(tf_batch)
print(tf_outputs)

TFSequenceClassifierOutput(loss=None, logits=<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[-2.6222003 , -2.7745316 , -0.8966624 ,  2.0137324 ,  3.3063858 ],
       [ 0.0063588 , -0.12577434, -0.05034703, -0.16553083,  0.13285917]],
      dtype=float32)>, hidden_states=None, attentions=None)


In [153]:
tf_predictions = tf.nn.softmax(tf_outputs.logits, axis=-1)
print(tf_predictions)

tf.Tensor(
[[0.00205668 0.00176608 0.01154935 0.21209283 0.7725351 ]
 [0.20841938 0.18262215 0.19692962 0.17550415 0.2365247 ]], shape=(2, 5), dtype=float32)


In [154]:
tf_outputs_with_loss = tf_model(
    tf_batch, 
    labels=tf.constant([1, 1])
)
print(tf_outputs_with_loss)

TFSequenceClassifierOutput(loss=<tf.Tensor: shape=(2,), dtype=float32, numpy=array([6.338995 , 1.7003361], dtype=float32)>, logits=<tf.Tensor: shape=(2, 5), dtype=float32, numpy=
array([[-2.6222003 , -2.7745316 , -0.8966624 ,  2.0137324 ,  3.3063858 ],
       [ 0.0063588 , -0.12577434, -0.05034703, -0.16553083,  0.13285917]],
      dtype=float32)>, hidden_states=None, attentions=None)


In [155]:
save_directory='./tmp'

In [156]:
tokenizer.save_pretrained(save_directory)
tf_model.save_pretrained(save_directory)

In [157]:
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModel.from_pretrained(save_directory, from_tf=True)

All TF 2.0 model weights were used when initializing BertModel.

All the weights of BertModel were initialized from the TF 2.0 model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use BertModel for predictions without further training.


# Getting deeper

Check what is the TF specific code under the hood

In [158]:
distilbert_model_name = "distilbert-base-uncased-finetuned-sst-2-english"
distilbert_model = TFDistilBertForSequenceClassification.from_pretrained(distilbert_model_name)
distilbert_tokenizer = DistilBertTokenizer.from_pretrained(distilbert_model_name)

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['dropout_778']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [159]:
custom_label_model = TFDistilBertForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=50)

Some layers from the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertForSequenceClassification: ['vocab_projector', 'activation_13', 'vocab_layer_norm', 'vocab_transform']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased and are newly initialized: ['classifier', 'dropout_798', 'pre_classifier']
You should probably TRAIN this model on a down-stream task to be able to use 