# Evaluate for pretrain model

In [1]:
import sys

sys.path.insert(0, "..")
sys.path.insert(0, "../../")
import paddle
import paddlenlp
from paddlenlp.transformers import ErnieForMaskedLM, ErnieTokenizer

load model paramerters

In [2]:
MODEL_NAME = "ernie-1.0"
 
model = ErnieForMaskedLM.from_pretrained(MODEL_NAME, num_classes=2)
tokenizer = ErnieTokenizer.from_pretrained(MODEL_NAME)

[32m[2022-04-10 20:50:25,353] [    INFO][0m - Already cached /home/zhangshuai/.paddlenlp/models/ernie-1.0/ernie_v1_chn_base.pdparams[0m
W0410 20:50:25.355801   565 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.4, Runtime API Version: 10.2
W0410 20:50:25.361073   565 device_context.cc:465] device: 0, cuDNN Version: 8.2.
[32m[2022-04-10 20:50:29,934] [    INFO][0m - Already cached /home/zhangshuai/.paddlenlp/models/ernie-1.0/vocab.txt[0m


## Prepare for Interpretations

In [3]:
from trustai.interpretation.token_level import IntGradInterpreter
import numpy as np
from assets.utils import convert_example, load_data
from paddlenlp.data import Stack, Tuple, Pad


def masked_one_hot(input_ids, mask_id):
    res = []
    for x in input_ids:
        if x == mask_id:
            res.append(1)
        else:
            res.append(0)
    return res
    
# preprocess data functions 
def preprocess_fn(data):
    examples = []
    data_trans = []

    for key in data:
        data_trans.append(data[key])
 
    for text in data_trans:
        input_ids, segment_ids = convert_example(text, tokenizer, max_seq_length=128, is_test=True)
        masked = masked_one_hot(input_ids, tokenizer.convert_tokens_to_ids('[MASK]'))
        examples.append((input_ids, segment_ids, masked))
 
    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input id
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # segment id
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # masked_one_hot
    ): fn(samples)
 
    input_ids, segment_ids, masked = batchify_fn(examples)
    return paddle.to_tensor(input_ids, stop_gradient=False), paddle.to_tensor(segment_ids, stop_gradient=False),  paddle.to_tensor(masked, stop_gradient=False)



In [4]:
# download data
!wget --no-check-certificate -c https://trustai.bj.bcebos.com/data_samples/pretrain_predict -P ../assets/
!wget --no-check-certificate -c https://trustai.bj.bcebos.com/data_samples/pretrain_golden -P ../assets/

# predict data for predict
data = load_data("../assets/pretrain_predict")
print("data:\n", list(data.values())[:2])

# golden data for evluate
goldens = load_data("../assets/pretrain_golden")
print("goldens:\n", list(goldens.values())[:2])


--2022-04-10 20:50:31--  https://trustai.bj.bcebos.com/data_samples/pretrain_predict
Resolving trustai.bj.bcebos.com (trustai.bj.bcebos.com)... 10.70.0.165
Connecting to trustai.bj.bcebos.com (trustai.bj.bcebos.com)|10.70.0.165|:443... connected.
HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable

    The file is already fully retrieved; nothing to do.

--2022-04-10 20:50:31--  https://trustai.bj.bcebos.com/data_samples/pretrain_golden
Resolving trustai.bj.bcebos.com (trustai.bj.bcebos.com)... 10.70.0.165
Connecting to trustai.bj.bcebos.com (trustai.bj.bcebos.com)|10.70.0.165|:443... connected.
HTTP request sent, awaiting response... 416 Requested Range Not Satisfiable

    The file is already fully retrieved; nothing to do.

data:
 [{'id': 0, 'context': '迈向充满希望的[MASK]世纪――一九九八年新年讲话（附图片１张）', 'sent_token': ['迈', '向', '充', '满', '希', '望', '的', '[MASK]', '世', '纪', '―', '―', '一', '九', '九', '八', '年', '新', '年', '讲', '话', '（', '附', '图', '片', '１', '张', '）']}, {'id': 100,

## IG Interpreter

In [5]:
from trustai.interpretation.token_level.common import get_word_offset
from trustai.interpretation.token_level.data_processor import VisualizationTextRecord, visualize_text

contexts = []
batch_words = []
for example in data.values():
    contexts.append("[CLS]" + " " + example['context'] + " " + "[SEP]")
    batch_words.append(["[CLS]"] + example['sent_token'] + ["[SEP]"])
word_offset_maps = []
subword_offset_maps = []
for i in range(len(contexts)):
    word_offset_maps.append(get_word_offset(contexts[i], batch_words[i]))
    subword_offset_maps.append(tokenizer.get_offset_mapping(contexts[i]))

In [6]:
from trustai.interpretation.token_level.common import ig_predict_fn_on_paddlenlp_pretrain

ig = IntGradInterpreter(model, device="gpu", predict_fn=ig_predict_fn_on_paddlenlp_pretrain)
result = ig(preprocess_fn(data), steps=100)
align_res = ig.alignment(result, contexts, batch_words, word_offset_maps, subword_offset_maps, special_tokens=["[CLS]", '[SEP]', '[MASK]'])

In [7]:
def prepare_eval_data(data, results, paddle_model):
    res = {}
    for data_id, inter_res in zip(data, results):
        eval_data = {}
        eval_data['id'] = data_id
        eval_data['pred_label'] = inter_res.pred_label
        eval_data['pred_proba'] = inter_res.pred_proba
        eval_data['rationale'] = [inter_res.rationale]
        eval_data['non_rationale'] = [inter_res.non_rationale]
        eval_data['rationale_tokens'] = [inter_res.rationale_tokens]
        eval_data['non_rationale_tokens'] = [inter_res.non_rationale_tokens]

        rationale_context = "".join(inter_res.rationale_tokens)
        non_rationale_context = "".join(inter_res.non_rationale_tokens)

        res[data_id] = eval_data
    return res


predicts = prepare_eval_data(data, align_res, model)
print(list(predicts.values())[0])

{'id': 0, 'pred_label': array([8566], dtype=int64), 'pred_proba': array([[1.2374003e-16, 9.4894001e-16, 1.3985180e-12, ..., 3.1619304e-11,
        8.0197359e-12, 1.4950932e-06]], dtype=float32), 'rationale': [(1, 11, 13, 15, 17)], 'non_rationale': [(2, 3, 4, 5, 6, 7, 9, 10, 12, 14, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28)], 'rationale_tokens': [('迈', '―', '一', '九', '年')], 'non_rationale_tokens': [('向', '充', '满', '希', '望', '的', '世', '纪', '―', '九', '八', '新', '年', '讲', '话', '（', '附', '图', '片', '１', '张', '）')]}


evaluate for interpretation result

In [8]:
from trustai.evaluation import Evaluator

evaluator = Evaluator()

result = evaluator.cal_map(goldens, predicts)
print("map score:",result)

result = evaluator.cal_f1(goldens, predicts)
print("plausibility f1:", result)

result = evaluator.calc_iou_f1(goldens, predicts)
print("plausibility iou f1:",result)


map score: 0.46866666666666673
plausibility f1: 0.29399877899877896
plausibility iou f1: 0.0


## Attention Interpreter

In [9]:
from trustai.interpretation.token_level.common import attention_predict_fn_on_paddlenlp_pretrain
from trustai.interpretation.token_level import AttentionInterpreter

att = AttentionInterpreter(model, device="gpu", predict_fn=attention_predict_fn_on_paddlenlp_pretrain)
  
result = att(preprocess_fn(data))
align_res = att.alignment(result, contexts, batch_words, word_offset_maps, subword_offset_maps, special_tokens=["[CLS]", '[SEP]', '[MASK]'])

predicts = prepare_eval_data(data, align_res, model)

result = evaluator.cal_map(goldens, predicts)
print("map score:",result)

result = evaluator.cal_f1(goldens, predicts)
print("plausibility f1:", result)

result = evaluator.calc_iou_f1(goldens, predicts)
print("plausibility iou f1:", result)

map score: 0.7146666666666667
plausibility f1: 0.29318070818070824
plausibility iou f1: 0.0
