## 首先对hook的应用场景做一下总结
①在自己构建模型的时候对某一层的输入进行相应的变换
②别人已经训练好的模型，希望能够得到某一层的输出

In [325]:
# 以BERT为例子
import torch

In [326]:
from torch import nn, Tensor

class VerboseExecution(nn.Module):
    def __init__(self, model: nn.Module):
        super().__init__()
        self.model = model

        # 模型的每一层都注册这个hook
        for name, layer in self.model.named_children():
            layer.__name__ = name
            #显然，这个表达式的意思是得到对应层的名字和输出向量的维度大小
            layer.register_forward_hook(
                lambda layer,_,__: print(f"{layer.__name__}")
            )

    def forward(self, x,y,z,step):
        return self.model(x,y,z,step)

In [327]:
from transformers import BertTokenizer,BertModel

In [328]:
class BMRC(nn.Module):
    """

    """
    def __init__(self,
                 args):
        hidden_size = args.hidden_size

        super(BMRC, self).__init__()

        if args.bert_model_type.find('bert-base-uncased')!=-1:# 只是使用BERT模型
            self._bert = BertModel.from_pretrained('../../bert/bert-base-uncased',output_attentions=True)
            self._tokenizer = BertTokenizer.from_pretrained('../../bert/bert-base-uncased')
            print('Bertbase model loaded')
        else:
            raise KeyError('Config.args.bert_model_type should be bert-based-uncased. ')

        self.classifier_start = nn.Linear(hidden_size, 2)

        self.classifier_end = nn.Linear(hidden_size, 2)

        self._classifier_sentiment = nn.Linear(hidden_size, 3)

        for name, layer in self._bert.named_children():
            layer.__name__ = name
            #显然，这个表达式的意思是得到对应层的名字和输出向量的维度大小
            layer.register_forward_hook(
                lambda layer,_,__: print(f"{layer.__name__}")
            )

    def forward(self,
                query_tensor,
                query_mask,
                query_seg,
                step):
        """

        :param query_tensor:就输入BERT的就行，本任务来讲是[CLS]Question（根据情况要更改）[SEP]原句子[PADDING]...[PADDING]
        :param query_mask:
        :param query_seg:
        :param step: 用来区分是span提取还是情感分类
        :return:
        """

        hidden_states = self._bert(query_tensor, attention_mask=query_mask, token_type_ids=query_seg)
        att=hidden_states.attentions[0][0][0]
        hidden_states=hidden_states.last_hidden_state
        if step == 0:  # 预测实体（即aspect或opinion）
            out_scores_start = self.classifier_start(hidden_states)
            out_scores_end = self.classifier_end(hidden_states)
            return out_scores_start, out_scores_end,att
        else:  # 预测情感（即sentiment）
            cls_hidden_states = hidden_states[:, 0, :]
            cls_hidden_scores = self._classifier_sentiment(cls_hidden_states)
            return cls_hidden_scores

In [340]:
class OPTION():
    def __init__(self):
        self.bert_model_type='bert-base-uncased'
        self.acc_batch_size=1
        self.cuda=True
        self.work_nums=1
        self.model_name='ROBMRC'
        self.hidden_size=768

opt=OPTION()
model=BMRC(opt)

Some weights of the model checkpoint at ../../bert/bert-base-uncased were not used when initializing BertModel: ['cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Bertbase model loaded


In [330]:
verbose_resnet = VerboseExecution(model)

In [331]:
from transformers import AutoTokenizer

In [332]:
tokenizer=AutoTokenizer.from_pretrained('../../bert/bert-base-uncased')

In [333]:
dummy_input = 'hello world'
S_A_tokenized=tokenizer(dummy_input)
_forward_S_A_query,_forward_S_A_query_mask,_forward_S_A_query_seg=S_A_tokenized['input_ids'],S_A_tokenized['attention_mask'],S_A_tokenized['token_type_ids']
_forward_S_A_query_mask=Tensor(_forward_S_A_query_mask).long()

In [334]:
_forward_S_A_query=Tensor(_forward_S_A_query).long()

In [335]:
_forward_S_A_query_seg=Tensor(_forward_S_A_query_seg).long()

In [336]:
_forward_S_A_query_seg=_forward_S_A_query_seg.unsqueeze(dim=0)
_forward_S_A_query=_forward_S_A_query.unsqueeze(dim=0)
_forward_S_A_query_mask=_forward_S_A_query_mask.unsqueeze(dim=0)

In [337]:
d = model(_forward_S_A_query,_forward_S_A_query_mask,_forward_S_A_query_seg,step=0)
print(1)

embeddings
encoder
pooler
_bert
classifier_start
classifier_end
1


In [338]:
d

(tensor([[[-0.0560, -0.5592],
          [-0.2164, -0.1846],
          [-0.1693,  0.6753],
          [-0.1545,  0.4091]]], grad_fn=<ViewBackward0>),
 tensor([[[ 0.5829,  0.5612],
          [ 0.2780,  0.6958],
          [ 0.2564,  1.2112],
          [-0.0422,  0.4670]]], grad_fn=<ViewBackward0>),
 tensor([[0.1494, 0.1146, 0.0919, 0.6441],
         [0.1728, 0.2124, 0.3702, 0.2446],
         [0.1655, 0.3525, 0.2016, 0.2804],
         [0.3115, 0.1846, 0.1493, 0.3546]], grad_fn=<SelectBackward0>))