# transformers

In [7]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained('bert-base-chinese', use_fast=True)

sentences = ["我觉得不行", "我觉得其实还可以"]

batch_inputs = tokenizer(
    sentences,
    padding=True,
    return_tensors="pt"
)

print(batch_inputs["input_ids"], "\n", type(batch_inputs["input_ids"]))
print(batch_inputs["input_ids"].shape)

batch_inputs

tensor([[ 101, 2769, 6230, 2533,  679, 6121,  102,    0,    0,    0],
        [ 101, 2769, 6230, 2533, 1071, 2141, 6820, 1377,  809,  102]]) 
 <class 'torch.Tensor'>
torch.Size([2, 10])


{'input_ids': tensor([[ 101, 2769, 6230, 2533,  679, 6121,  102,    0,    0,    0],
        [ 101, 2769, 6230, 2533, 1071, 2141, 6820, 1377,  809,  102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [8]:
# slow tokenizer才有 Encoding
print(batch_inputs[0], "\n", type(batch_inputs[0]))

# torch.Tensor


Encoding(num_tokens=10, attributes=[ids, type_ids, tokens, offsets, attention_mask, special_tokens_mask, overflowing]) 
 <class 'tokenizers.Encoding'>


| 写法                          | 实际含义                           |
| --------------------------- | ------------------------------ |
| `batch_inputs["input_ids"]` | 从 `data` 里取模型输入                |
| `batch_inputs[i]`           | 从 `encodings` 里取第 i 个 Encoding |


## Model Forward

In [9]:
from transformers import AutoModel

model = AutoModel.from_pretrained("bert-base-chinese")


In [10]:
import torch

with torch.no_grad():
    # **batch_inputs 字典解包
    outputs = model(**batch_inputs)

print(f"输出对象的类型: {type(outputs)}")
# 核心产出：last_hidden_state (最后一层的隐藏状态)
print(f"向量形状: {outputs.last_hidden_state.shape}")

输出对象的类型: <class 'transformers.modeling_outputs.BaseModelOutputWithPoolingAndCrossAttentions'>
向量形状: torch.Size([2, 10, 768])


In [11]:
# 提示：取出第 0 个位置的 token 向量
cls_embeddings = outputs.last_hidden_state[:, 0, :] 
print(cls_embeddings.shape) # 应该是 [2, 768]

torch.Size([2, 768])


In [12]:
from transformers import AutoModel

model = AutoModel.from_pretrained("bert-base-chinese")

# for name, param in model.named_parameters():
#     print(name, param.requires_grad)


In [13]:
inputs = {
    "input_ids": torch.tensor([[101, 2769, 6230, 2533, 679, 6121, 102]]),
    "attention_mask": torch.tensor([[1, 1, 1, 1, 1, 1, 1]])
}

outputs = model(**inputs)
loss = outputs.last_hidden_state.sum()
loss.backward()

print(model.embeddings.word_embeddings.weight.grad is None)


False
