In [1]:
import torch
from transformers import BertModel, BertTokenizer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
model = BertModel.from_pretrained("./bert-base-chinese/")
tokenizer = BertTokenizer.from_pretrained("./bert-base-chinese")

Some weights of the model checkpoint at ./bert-base-chinese/ were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
input_text = "你好"
input_ids = torch.tensor([tokenizer.encode(input_text, add_special_tokens=True)])

In [31]:
input_ids

tensor([[ 101, 5245, 7807,  102]])

In [27]:
input_ids

tensor([[ 101,  872, 1962,  102]])

In [26]:
tokenizer.decode(101)

'[ C L S ]'

In [4]:
inputs = tokenizer("你好", return_tensors="pt")

In [5]:
inputs

{'input_ids': tensor([[ 101,  872, 1962,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1]])}

In [13]:
model(**inputs, output_hidden_states = True)

BaseModelOutputWithPoolingAndCrossAttentions(last_hidden_state=tensor([[[-0.2852,  0.4501,  0.2862,  ...,  0.4362,  0.4979, -0.2372],
         [-0.2079,  0.0369,  0.0398,  ..., -0.5650,  0.3733,  0.0951],
         [ 0.7796,  0.0815, -0.0798,  ..., -0.0146,  0.8973, -0.2835],
         [-0.3637,  0.3618,  0.1234,  ...,  0.6489,  0.6392, -0.2132]]],
       grad_fn=<NativeLayerNormBackward0>), pooler_output=tensor([[ 0.9939,  0.9999,  0.9605,  0.9619,  0.8260,  0.8557, -0.4677, -0.5561,
          0.9954, -0.9910,  1.0000,  0.9811, -0.7397, -0.9134,  0.9996, -0.9983,
         -0.8414,  0.9996,  0.9935,  0.0913,  0.9988, -1.0000, -0.9656,  0.3223,
          0.0429,  0.9642,  0.7766, -0.2238, -0.9999,  0.9853,  0.7119,  0.9974,
          0.8192, -1.0000, -0.9977,  0.2347,  0.0275,  0.9781, -0.7894, -0.9872,
         -0.9213, -0.7144,  0.5856, -0.9844, -0.9948,  0.3433, -1.0000, -0.9999,
         -0.0335,  0.9995, -0.9089, -0.9999,  0.1662, -0.1775, -0.4278,  0.9861,
         -0.9997,  0.8741,

In [7]:
with torch.no_grad():
    outputs = model(input_ids)
    embeddings = outputs[0]

In [8]:
outputs[0].shape

torch.Size([1, 4, 768])

In [9]:
embeddings[0].__len__()

4

In [12]:
embeddings[0][1:3]

tensor([[-0.2079,  0.0369,  0.0398,  ..., -0.5650,  0.3733,  0.0951],
        [ 0.7796,  0.0815, -0.0798,  ..., -0.0146,  0.8973, -0.2835]])

In [11]:
for i, token in enumerate(input_text.split()):
    print(token, embeddings[0][i+1])

你好 tensor([-2.0793e-01,  3.6918e-02,  3.9811e-02,  1.7311e-01,  1.3073e+00,
        -1.1926e+00,  1.7733e-01,  3.7741e-01, -9.1014e-01,  1.3228e+00,
         1.5831e-01, -3.7892e-01,  1.4100e+00, -3.5151e-01,  2.0441e+00,
        -1.0238e+00,  7.2505e-01, -3.9941e-02, -7.0018e-01, -9.2059e-01,
         8.3011e-01,  5.5756e-01, -1.8788e+00, -2.4900e-01,  8.9637e-01,
         7.2073e-01, -7.6161e-01,  2.0468e-01,  9.4741e-01,  1.1170e+00,
         2.6147e-01,  7.9134e-01, -1.3510e+00, -2.3269e-01,  6.5817e-01,
        -5.2823e-02,  9.6547e-01,  6.0924e-01, -6.2540e-02, -6.5123e-01,
        -6.3861e-01,  1.2747e+00,  2.5888e-01,  1.0379e-01,  2.3293e-01,
         5.2488e-02,  2.8147e-01,  5.3963e-01, -1.1568e+00,  2.9894e-01,
         6.4489e-01,  7.5942e+00,  3.2715e-01,  1.2338e+00,  2.5323e-01,
         5.1191e-01,  2.9790e-01,  3.6909e-01, -4.6086e-02,  1.5273e-01,
         2.2011e-02, -1.0319e+00,  9.1532e-01,  1.4048e+00,  3.3801e-01,
        -1.0447e+00, -5.2274e-01, -1.6431e-01, -