### 导入前置依赖

In [None]:
from torch import nn
from transformers import AutoTokenizer, BertModel, BertForTokenClassification

### 设置分词器和 BERT 预训练模型

In [None]:
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
model = BertForTokenClassification.from_pretrained("bert-base-uncased")

### 数据集

In [None]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")

### BERT 混合模型

In [None]:
class FusionBERT(nn.Module):
    """
    输入：句子（论文标题 + 作者 + 摘要）
    输出：Token 是否属于关键词
    """
    def __init__(self, dropout=0.5, hidden_size=768, ner_labels=2):
        super().__init__()
        self.model = BertModel.from_pretrained("bert-base-uncased", add_pooling_layer=True) # N * 768
        self.dropout = nn.Dropout(dropout)
        self.ner_classifier = nn.Linear(hidden_size, ner_labels)

    def forward(self, x):
        x = self.model(**x) # N * 768
        last_hidden_state = self.dropout(x.last_hidden_state) # N * 768
        x = self.ner_classifier(last_hidden_state) # N * 1
        return x

### 测试

In [None]:
inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
fusion_model = FusionBERT()
fusion_model(inputs)

In [None]:
# Loss 可以将关键词结果和分类结果求和再去做梯度的计算