# Application for similarity

In [1]:
import sys
sys.path.append("..")
sys.path.append("../../trustai/")
import paddle
import paddlenlp
from paddlenlp.transformers import ErnieForSequenceClassification, ErnieTokenizer

Initialize the model and tokenizer

In [2]:
MODEL_NAME = "ernie-1.0"
 
model = ErnieForSequenceClassification.from_pretrained(MODEL_NAME, num_classes=2)
tokenizer = ErnieTokenizer.from_pretrained(MODEL_NAME)

[32m[2022-04-26 11:01:14,070] [    INFO][0m - Already cached /home/zhangshuai/.paddlenlp/models/ernie-1.0/ernie_v1_chn_base.pdparams[0m
W0426 11:01:14.073176 14110 device_context.cc:447] Please NOTE: device: 0, GPU Compute Capability: 7.0, Driver API Version: 11.4, Runtime API Version: 10.2
W0426 11:01:14.078840 14110 device_context.cc:465] device: 0, cuDNN Version: 8.2.
[32m[2022-04-26 11:01:18,771] [    INFO][0m - Already cached /home/zhangshuai/.paddlenlp/models/ernie-1.0/vocab.txt[0m


load model paramerters

In [3]:
from paddlenlp.datasets import load_dataset

DATASET_NAME = 'lcqmc'
train_ds, dev_ds, test_ds = load_dataset(DATASET_NAME, splits=["train", "dev", "test"])

# Load the trained model.
!wget --no-check-certificate -c https://trustai.bj.bcebos.com/lcqmc-ernie-1.0.tar
!tar -xvf ./lcqmc-ernie-1.0.tar -C ../assets/
!rm ./lcqmc-ernie-1.0.tar

state_dict = paddle.load(f'../assets/{DATASET_NAME}-{MODEL_NAME}/model_state.pdparams')
model.set_dict(state_dict)

--2022-04-26 11:01:19--  https://trustai.bj.bcebos.com/lcqmc-ernie-1.0.tar
Resolving trustai.bj.bcebos.com (trustai.bj.bcebos.com)... 10.70.0.165
Connecting to trustai.bj.bcebos.com (trustai.bj.bcebos.com)|10.70.0.165|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 399595520 (381M) [application/x-tar]
Saving to: ‘lcqmc-ernie-1.0.tar’


2022-04-26 11:01:23 (94.9 MB/s) - ‘lcqmc-ernie-1.0.tar’ saved [399595520/399595520]

lcqmc-ernie-1.0/
lcqmc-ernie-1.0/tokenizer_config.json
lcqmc-ernie-1.0/vocab.txt
lcqmc-ernie-1.0/model_state.pdparams
lcqmc-ernie-1.0/model_config.json


In [4]:
from assets.utils import predict

label_map = {0 : 'negative', 1 : 'positive'}

true_labels = [1, 1, 0]
batch_size = 32
predict_results = predict(model, dev_ds, tokenizer, label_map, batch_size=batch_size)

count = 0
right = 0
for idx, example in enumerate(dev_ds):
    count += 1
    if label_map[example['label']] == predict_results[idx]:
        right += 1
print('data size:', count)
print('acc:', right / count)
    

data size: 8802
acc: 0.895251079300159


## Prepare for Interpretations

In [5]:
from interpretation.token_level import IntGradInterpreter
import numpy as np
from assets.utils import convert_example, load_data
from paddlenlp.data import Stack, Tuple, Pad

def preprocess_fn(data):
    examples = []

    if not isinstance(data, list):
        data = [data]

    for text in data:
        input_ids, segment_ids = convert_example(text, tokenizer, max_seq_length=128, is_test=True)
        examples.append((input_ids, segment_ids))

    batchify_fn = lambda samples, fn=Tuple(
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # input id
        Pad(axis=0, pad_val=tokenizer.pad_token_id),  # segment id
    ): fn(samples)

    input_ids, segment_ids = batchify_fn(examples)
    return paddle.to_tensor(input_ids, stop_gradient=False), paddle.to_tensor(segment_ids, stop_gradient=False)

## IG Interpreter
This process will take some time.

In [6]:
from interpretation.token_level import IntGradInterpreter
interp_results = []
ig = IntGradInterpreter(model, device="gpu")
for idx, example in enumerate(dev_ds):
    if idx % 1000 == 0:
        print(idx)
    interp_results += ig(preprocess_fn(example), steps=50)



0
1000
2000
3000
4000
5000
6000
7000
8000


## Calculate sentence pair map scores.

In [7]:
from interpretation.token_level.common import get_rationales_and_non_ratioanles
from evaluation import Evaluator

evaluator = Evaluator()
map_scores = []
for idx, example in enumerate(dev_ds):
    text_a, text_b = example['query'], example['title']

    # get subword
    subwords_a = tokenizer.tokenize(text_a)
    subwords_b = tokenizer.tokenize(text_b)

    # calculate attributions individually
    attributions = interp_results[idx].attributions
    attributions_a = attributions[1:len(subwords_a) + 1]
    attributions_b = attributions[len(subwords_a) + 2:len(subwords_a) + len(subwords_b) + 2]

    # sorted subword by attributions
    sorted_tokens_a = [subwords_a[i] for i in sorted(range(len(subwords_a)), key=lambda j : attributions_a[j], reverse=False)]
    sorted_tokens_b = [subwords_b[i] for i in sorted(range(len(subwords_b)), key=lambda j : attributions_b[j], reverse=False)]

    # map score
    map_score_a = evaluator._calc_map_by_bin(sorted_tokens_a, sorted_tokens_b)
    map_score_b = evaluator._calc_map_by_bin(sorted_tokens_b, sorted_tokens_a)
    map_scores.append((map_score_a + map_score_b) / 2)
print("map_scores mean:", np.mean(map_scores))
print("map_scores median:", np.median(map_scores))
print("map_scores min:", np.min(map_scores))
print("map_scores max:", np.max(map_scores))

map_scores mean: 0.5252031447275877
map_scores median: 0.5311459836459835
map_scores min: 0.0
map_scores max: 0.9795918367346939


Filter the data under different thresholds and calculate the accuracy

In [8]:
count = 0
right = 0
for i in np.linspace(0, 1, 11):
    count = 0
    right = 0
    for idx, example in enumerate(dev_ds):
        if predict_results[idx] == 'positive' and map_scores[idx] <= i:
            count += 1
            if label_map[example['label']] == predict_results[idx]:
                right += 1
    print("thresholds:", i, "data size:", count, "acc:", right / count if count != 0 else 1)

thresholds: 0.0 data size: 0 acc: 1
thresholds: 0.1 data size: 4 acc: 1.0
thresholds: 0.2 data size: 60 acc: 0.9666666666666667
thresholds: 0.30000000000000004 data size: 303 acc: 0.7986798679867987
thresholds: 0.4 data size: 921 acc: 0.8089033659066233
thresholds: 0.5 data size: 1898 acc: 0.8261327713382508
thresholds: 0.6000000000000001 data size: 3010 acc: 0.8528239202657807
thresholds: 0.7000000000000001 data size: 3914 acc: 0.8730199284619315
thresholds: 0.8 data size: 4392 acc: 0.8825136612021858
thresholds: 0.9 data size: 4516 acc: 0.8844109831709478
thresholds: 1.0 data size: 4530 acc: 0.8841059602649006
