In [1]:
import sys
import os.path
import argparse
import math
import json

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
from torch.autograd import Variable
import torch.backends.cudnn as cudnn
from tqdm import tqdm

import config
import data
import utils

import model

# 初步测试bert

In [2]:
from transformers import BertModel, BertTokenizer, file_utils

In [3]:
from huggingface_hub import hf_hub_download

In [4]:
from transformers import BertModel, BertConfig

In [5]:
# 指定你的模型文件夹路径
model_dir = './model_download/bert-base-uncased/'

# 加载模型配置
config = BertConfig.from_pretrained(model_dir)

# 根据配置加载模型
model = BertModel.from_pretrained(model_dir, config=config)

# 加载分词器
tokenizer = BertTokenizer.from_pretrained(model_dir)

Some weights of the model checkpoint at ./model_download/bert-base-uncased/ were not used when initializing BertModel: ['cls.seq_relationship.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [6]:
# 示例文本
text = "I love using transformers for NLP tasks!"

In [7]:
len(text)

40

In [8]:
inputs = tokenizer(text, return_tensors='pt', padding=True, truncation=True, max_length=512)

In [9]:
inputs

{'input_ids': tensor([[  101,  1045,  2293,  2478, 19081,  2005, 17953,  2361,  8518,   999,
           102]]), 'token_type_ids': tensor([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}

In [10]:
inputs['input_ids'].shape

torch.Size([1, 11])

In [11]:
# 推断
with torch.no_grad():
    outputs = model(**inputs)


In [25]:
outputs.last_hidden_state.shape

torch.Size([1, 11, 768])

In [24]:
outputs.pooler_output.shape

torch.Size([1, 768])

In [27]:
config

BertConfig {
  "_name_or_path": "./model_download/bert-base-uncased/",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.30.2",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}



In [28]:
model

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0): BertLayer(
        (attention): BertAttention(
          (self): BertSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False)
          

In [29]:
tokenizer

BertTokenizer(name_or_path='./model_download/bert-base-uncased/', vocab_size=30522, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True)

In [26]:
from transformers import BertTokenizer, BertForSequenceClassification
import torch

tokenizer = BertTokenizer.from_pretrained(model_dir)
model = BertForSequenceClassification.from_pretrained(model_dir)

# Example text
text = "Example text for classification."
inputs = tokenizer(text, return_tensors='pt', max_length=512, truncation=True, padding='max_length')

# Forward pass, get logits
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits  # Now logits is valid

print(logits)


Some weights of the model checkpoint at ./model_download/bert-base-uncased/ were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the m

tensor([[ 0.1389, -0.2861]])


In [28]:
logits

tensor([[ 0.1389, -0.2861]])