In [None]:
pip install transformers

In [26]:
from transformers import pipeline

classifier = pipeline("sentiment-analysis")
classifier(
    [
        "I love you",
        "I am angry with you",
    ]
)

No model was supplied, defaulted to distilbert-base-uncased-finetuned-sst-2-english (https://huggingface.co/distilbert-base-uncased-finetuned-sst-2-english)


[{'label': 'POSITIVE', 'score': 0.9998656511306763},
 {'label': 'NEGATIVE', 'score': 0.9985038042068481}]

## Preprocessing with a tokenizer

In [27]:
from transformers import AutoTokenizer

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

In [28]:
raw_inputs = [
    "I love you",
    "I am angry with you",
]
inputs = tokenizer(raw_inputs, padding=True, truncation=True, return_tensors="tf")
print(inputs)

{'input_ids': <tf.Tensor: shape=(2, 7), dtype=int32, numpy=
array([[ 101, 1045, 2293, 2017,  102,    0,    0],
       [ 101, 1045, 2572, 4854, 2007, 2017,  102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(2, 7), dtype=int32, numpy=
array([[1, 1, 1, 1, 1, 0, 0],
       [1, 1, 1, 1, 1, 1, 1]], dtype=int32)>}


## Going through the model

In [29]:
from transformers import TFAutoModel

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = TFAutoModel.from_pretrained(checkpoint)

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertModel: ['pre_classifier', 'dropout_19', 'classifier']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


In [30]:
outputs = model(inputs)
print(outputs.last_hidden_state.shape)

(2, 7, 768)


In [31]:
outputs

TFBaseModelOutput([('last_hidden_state',
                    <tf.Tensor: shape=(2, 7, 768), dtype=float32, numpy=
                    array([[[ 0.51096743,  0.41755652, -0.08299181, ...,  0.4442657 ,
                              0.8320096 , -0.33735096],
                            [ 0.63406163,  0.45391536, -0.03473741, ...,  0.33013564,
                              0.8103205 , -0.24367695],
                            [ 0.84637016,  0.57502276,  0.11380929, ...,  0.24522883,
                              0.77054554, -0.33237526],
                            ...,
                            [ 1.2287459 ,  0.28054962,  0.3941112 , ...,  0.6245968 ,
                              0.42625168, -0.8306236 ],
                            [ 0.20518051,  0.4855185 , -0.15691999, ...,  0.43059927,
                              0.7913891 , -0.3208361 ],
                            [ 0.146653  ,  0.412667  , -0.13948853, ...,  0.43119532,
                              0.7967104 , -0.34413463]],


## Postprocessing the output

In [32]:
from transformers import TFAutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)
outputs = model(inputs)

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['dropout_96']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [33]:
outputs

TFSequenceClassifierOutput([('logits',
                             <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
                             array([[-4.275578 ,  4.6392555],
                                    [ 3.5825093, -2.9207718]], dtype=float32)>)])

In [34]:
outputs.logits

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[-4.275578 ,  4.6392555],
       [ 3.5825093, -2.9207718]], dtype=float32)>

In [35]:
outputs.logits.shape

TensorShape([2, 2])

In [36]:
import tensorflow as tf

predictions = tf.math.softmax(outputs.logits, axis=-1)
predictions

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1.3436274e-04, 9.9986565e-01],
       [9.9850380e-01, 1.4962723e-03]], dtype=float32)>

In [37]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

## Creating a Transformer

In [38]:
from transformers import BertConfig, TFBertModel

# Building the config
config = BertConfig()

# Building the model from the config
model = TFBertModel(config)

In [39]:
config

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.20.0",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [40]:
model

<transformers.models.bert.modeling_tf_bert.TFBertModel at 0x7f78cb165f50>

**OR**

In [98]:
from transformers import TFBertModel

model = TFBertModel.from_pretrained("bert-base-cased")

Some layers from the model checkpoint at bert-base-cased were not used when initializing TFBertModel: ['nsp___cls', 'mlm___cls']
- This IS expected if you are initializing TFBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the layers of TFBertModel were initialized from the model checkpoint at bert-base-cased.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions without further training.


In [42]:
model

<transformers.models.bert.modeling_tf_bert.TFBertModel at 0x7f78cb072a90>

In [52]:
model.save_pretrained("bert_model")

## Using a Transformer model for inference

In [113]:
sequences = ["Hello!", "Cool.", "Nice!"]

token_ids = tokenizer(sequences, return_tensors = 'tf')
encoded_sequences = token_ids.input_ids

#inputs = tf.constant(encoded_sequences)
output = model(encoded_sequences)

In [114]:
token_ids

{'input_ids': <tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[ 101, 7592,  999,  102],
       [ 101, 4658, 1012,  102],
       [ 101, 3835,  999,  102]], dtype=int32)>, 'attention_mask': <tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]], dtype=int32)>}

In [115]:
encoded_sequences

<tf.Tensor: shape=(3, 4), dtype=int32, numpy=
array([[ 101, 7592,  999,  102],
       [ 101, 4658, 1012,  102],
       [ 101, 3835,  999,  102]], dtype=int32)>

In [116]:
output

TFSequenceClassifierOutput([('logits',
                             <tf.Tensor: shape=(3, 2), dtype=float32, numpy=
                             array([[-3.7234993,  3.9690619],
                                    [-4.2218633,  4.5806665],
                                    [-4.285249 ,  4.6165533]], dtype=float32)>)])

## Word based Tokenizer

In [65]:
tokenized_text = "My name is Nilavo Boral".split(' ')
tokenized_text

['My', 'name', 'is', 'Nilavo', 'Boral']

## Use pretrained tokenizer

In [66]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

Downloading:   0%|          | 0.00/29.0 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/208k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/426k [00:00<?, ?B/s]

In [67]:
tokenizer('I love to play chess.')

{'input_ids': [101, 146, 1567, 1106, 1505, 10924, 119, 102], 'token_type_ids': [0, 0, 0, 0, 0, 0, 0, 0], 'attention_mask': [1, 1, 1, 1, 1, 1, 1, 1]}

In [70]:
tokens = tokenizer.tokenize('I love to play chess.')
tokens

['I', 'love', 'to', 'play', 'chess', '.']

In [72]:
ids = tokenizer.convert_tokens_to_ids(tokens)
ids

[146, 1567, 1106, 1505, 10924, 119]

In [68]:
# save tokenizer

tokenizer.save_pretrained("tokenizer")

('tokenizer/tokenizer_config.json',
 'tokenizer/special_tokens_map.json',
 'tokenizer/vocab.txt',
 'tokenizer/added_tokens.json',
 'tokenizer/tokenizer.json')

## Decoding

In [73]:
decoded_string = tokenizer.decode(ids)
decoded_string

'I love to play chess.'

## Try

In [129]:
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)

sentence = 'Hello, how are you?'

#tokens = tokenizer.tokenize(sentence)
#ids = tokenizer.convert_tokens_to_ids(tokens)

#input = tf.constant([ids])

encoded_sentence = tokenizer(sentence, return_tensors = 'tf')
encoded_sentence_ids = encoded_sentence.input_ids

output = model(encoded_sentence_ids) 

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['dropout_367']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [130]:
encoded_sentence_ids

<tf.Tensor: shape=(1, 8), dtype=int32, numpy=array([[ 101, 7592, 1010, 2129, 2024, 2017, 1029,  102]], dtype=int32)>

In [131]:
output

TFSequenceClassifierOutput([('logits',
                             <tf.Tensor: shape=(1, 2), dtype=float32, numpy=array([[-2.5492408,  2.6930792]], dtype=float32)>)])

In [160]:
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)

sentences = ["I love you", "I am angry with you"]

encoded_sentences = tokenizer(sentences, padding = True, truncation = True, return_tensors = 'tf')
#embedded_sequence = encoded_sentence.input_ids

output = model(**encoded_sentences) 

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['dropout_407']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [161]:
output

TFSequenceClassifierOutput([('logits',
                             <tf.Tensor: shape=(2, 2), dtype=float32, numpy=
                             array([[-4.275578 ,  4.6392555],
                                    [ 3.5825093, -2.9207718]], dtype=float32)>)])

In [162]:
tf.math.softmax(output.logits)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1.3436274e-04, 9.9986565e-01],
       [9.9850380e-01, 1.4962723e-03]], dtype=float32)>

In [163]:
tf.math.softmax(output.logits, axis = -1)

<tf.Tensor: shape=(2, 2), dtype=float32, numpy=
array([[1.3436274e-04, 9.9986565e-01],
       [9.9850380e-01, 1.4962723e-03]], dtype=float32)>

In [164]:
import numpy as np
np.argmax(tf.math.softmax(output.logits, axis = -1)[0])

1

In [165]:
np.argmax(tf.math.softmax(output.logits, axis = -1)[1])

0

In [166]:
model.config.id2label

{0: 'NEGATIVE', 1: 'POSITIVE'}

In [167]:
model.config.id2label[np.argmax(tf.math.softmax(output.logits, axis = -1)[1])]

'NEGATIVE'

In [172]:
tf.math.softmax(output.logits, axis = -1)[1][np.argmax(tf.math.softmax(output.logits, axis = -1)[1])].numpy()

0.9985038

In [169]:
sentiment_analysis_list = []

for i in np.arange(0, len(sentences)):

  new_list = [model.config.id2label[np.argmax(tf.math.softmax(output.logits, axis = -1)[i])], 
              tf.math.softmax(output.logits, axis = -1)[i][np.argmax(tf.math.softmax(output.logits, axis = -1)[i])].numpy()]

  sentiment_analysis_list.append(new_list)

In [170]:
sentiment_analysis_list

[['POSITIVE', 0.99986565], ['NEGATIVE', 0.9985038]]

## Sentiment-analysis pipeline

In [174]:
import tensorflow as tf
from transformers import AutoTokenizer, TFAutoModelForSequenceClassification

checkpoint = "distilbert-base-uncased-finetuned-sst-2-english"

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint)

sentences = ["I love you", "I am angry with you"]

encoded_sentences = tokenizer(sentences, padding = True, truncation = True, return_tensors = 'tf')

output = model(**encoded_sentences) 

sentiment_analysis_list = []
for i in np.arange(0, len(sentences)):

  new_list = [model.config.id2label[np.argmax(tf.math.softmax(output.logits, axis = -1)[i])], 
              tf.math.softmax(output.logits, axis = -1)[i][np.argmax(tf.math.softmax(output.logits, axis = -1)[i])].numpy()]

  sentiment_analysis_list.append(new_list)

Some layers from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english were not used when initializing TFDistilBertForSequenceClassification: ['dropout_19']
- This IS expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFDistilBertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFDistilBertForSequenceClassification were not initialized from the model checkpoint at distilbert-base-uncased-finetuned-sst-2-english and are newly initialized: ['dropout_447']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [175]:
sentiment_analysis_list

[['POSITIVE', 0.99986565], ['NEGATIVE', 0.9985038]]