In [1]:
from transformers import BertModel, BertConfig
from transformers import BertTokenizer

In [2]:
import logging
logging.basicConfig(level=logging.INFO)

In [3]:
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

In [4]:
text="Hello all! This is a test for the BERT model?"
text

'Hello all! This is a test for the BERT model?'

In [5]:
encoding = tokenizer(text, add_special_tokens = True, truncation = True, padding=True, return_attention_mask = True, return_tensors = "pt")

In [6]:
encoding

{'input_ids': tensor([[  101,  7592,  2035,   999,  2023,  2003,  1037,  3231,  2005,  1996,
         14324,  2944,  1029,   102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [7]:
text

'Hello all! This is a test for the BERT model?'

In [8]:
configuration = BertConfig(output_hidden_states=False)

In [9]:
configuration

BertConfig {
  "attention_probs_dropout_prob": 0.1,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.4.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

In [10]:
model = BertModel.from_pretrained('bert-base-uncased', config=configuration)

In [11]:
type(model)

transformers.models.bert.modeling_bert.BertModel

In [12]:
import torch

In [13]:
assert isinstance(model, torch.nn.Module)

In [14]:
output = model(input_ids=encoding["input_ids"],
               attention_mask=encoding["attention_mask"],
               token_type_ids=encoding["token_type_ids"],
              )

In [15]:
type(output)

transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions

In [16]:
output.last_hidden_state.shape

torch.Size([1, 14, 768])

In [17]:
output.pooler_output.shape

torch.Size([1, 768])

In [18]:
output.keys()

odict_keys(['last_hidden_state', 'pooler_output'])

In [19]:
type(model)

transformers.models.bert.modeling_bert.BertModel

In [24]:
model.config.hidden_size

768

In [26]:
type(encoding)

transformers.tokenization_utils_base.BatchEncoding