Skip to content

Commit

Permalink
更新ocnli csl模版,并更新得分
Browse files Browse the repository at this point in the history
  • Loading branch information
wellinxu committed Aug 13, 2021
1 parent 1c0c647 commit cdc4520
Show file tree
Hide file tree
Showing 7 changed files with 423 additions and 24 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ FewCLUE: A Chinese Few-shot Learning Evaluation Benchmark
| <a href="https://github.com/ymcui/Chinese-BERT-wwm">FineTuningB</a> | 39.35 |61.9N | 54.1N | 33.6N | 25.6N |40.5N | 50.3N |22.6N | 50.5N| 15.0N|
| <a href="https://github.com/CLUEbenchmark/FewCLUE/tree/main/baselines/models_keras/pet">PET</a> | 57.44 | 86.66(1.02, 88.2) | 56.04(4.98, 62.19) | 44.02(0.42, 49.25) | 51.69(1.04, 58.80) |54.47(1.21, 55.12) | 57.52(2.70, 64.65)| 46.01(1.07, 51.34) | 59.35(1.27, 66.28) | 61.21(1.10, 62.39) |
| <a href="https://github.com/CLUEbenchmark/FewCLUE/tree/main/baselines/models_keras/ptuning">PtuningB</a> | 51.81| 88.5N | 65.4 | 35.0N | 44.4N | 48.2N | 51.0N | 32.0N| 50.0N | 57.6N |
| <a href="https://github.com/CLUEbenchmark/FewCLUE/tree/main/baselines/models_keras/ptuning_origin">ori-PtuningB</a> | 57.77| 86.88(1.1, 89.34) | 60.92(2.9,65.01) | 35.81(1.8,44.44) | 56.02(1.1,63.50) | 54.23(1.0,55.91) | 57.52(2.4,67.52) | 57.63(0.9,62.26) | 50.87(1.3,51.86) | 60.04(1.2,60.14) |
| <a href="https://github.com/CLUEbenchmark/FewCLUE/tree/main/baselines/models_keras/ptuning_origin">ori-PtuningB</a> | 59.91| 88.26(0.7, 89.83) | 60.92(2.9,65.01) | 41.90(1.9,49.60) | 56.02(1.1,63.50) | 54.23(1.0,55.91) | 58.11(2.2,67.52) | 57.63(0.9,62.26) | 62.91(2.3,70.82) | 59.27(1.4,61.19) |
| <a href="https://arxiv.org/pdf/2009.07118.pdf">PtuningGPT</a> | 46.44| 75.65N | 54.9N | 35.75N | 33.69N | 45.3N | 49.0N | 24.0N | 53.5N | 13.7N |
| <a href="https://github.com/CLUEbenchmark/FewCLUE/tree/main/baselines/models_keras/gpt">Zero-shotG</a> | 43.36N | 57.54N | 50N | 34.4N | 26.23N | 36.96N | 50.31N | 19.04N | 50.14N | 65.63N |
| <a href="https://arxiv.org/abs/2005.14165">Zero-shotR</a> | 44.61N | 85.2N | 50.6N | 40.3N | 12.6N | 25.3N | 50.0N | 27.7N | 52.2N | 57.6N |
Expand Down
16 changes: 8 additions & 8 deletions baselines/models_keras/ptuning_origin/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,11 @@ python ptuning_iflytek.py 0 # 运行iflytek任务,并使用第0个数据集
## 结果
| 数据集 | score | eprstmt | bustm | ocnli | csldcp | tnews | wsc | ifytek| csl | chid |
| :----:| :----: | :----: |:----: |:----: |:----: |:----: |:----: |:----: |:----: |:----: |
|0|0.578132222| 0.85902 | 0.59086 | 0.38135 | 0.57063 | 0.55277 | 0.55328 | 0.57791 | 0.51797 | 0.5994 |
|1|0.584647778| 0.88361 | 0.6456 | 0.34405 | 0.56166 | 0.54158 | 0.61373 | 0.57599 | 0.49471 | 0.6009 |
|2|0.576458889| 0.87705 | 0.61456 | 0.34246 | 0.54877 | 0.53038 | 0.58402 | 0.57216 | 0.50035 | 0.61838 |
|3|0.57269| 0.85738 | 0.62415 | 0.37302 | 0.54933 | 0.53412 | 0.5625 | 0.56577 | 0.50352 | 0.58442 |
|4|0.576698889| 0.86721 | 0.57111 | 0.35 | 0.57063 | 0.55277 | 0.5625 | 0.59004 | 0.52713 | 0.5989 |
|few_all|0.622237778| 0.89344 | 0.65011 | 0.44444 | 0.63509 | 0.55917 | 0.6752 | 0.62261 | 0.51868 | 0.6014 |
|avg|0.577725556| 0.868854 | 0.609256 | 0.358176 | 0.560204 | 0.542324 | 0.575206 | 0.576374 | 0.508736 | 0.6004 |
|std|-| 0.011357563 | 0.029001638 | 0.017823875 | 0.010822466 | 0.010353214 | 0.024317789 | 0.008936847 | 0.013396514 | 0.012063797
|0|0.611124444| 0.88525 | 0.59086 | 0.44405 | 0.57063 | 0.55277 | 0.60656 | 0.57791 | 0.67019 | 0.6019 |
|1|0.604553333| 0.88197 | 0.6456 | 0.42659 | 0.56166 | 0.54158 | 0.60246 | 0.57599 | 0.62121 | 0.58392 |
|2|0.593363333| 0.87049 | 0.61456 | 0.40556 | 0.54877 | 0.53038 | 0.5666 | 0.57216 | 0.62086 | 0.61089 |
|3|0.591682222| 0.88689 | 0.62415 | 0.39603 | 0.54933 | 0.53412 | 0.57172 | 0.56577 | 0.62121 | 0.57592 |
|4|0.595266667| 0.88852 | 0.57111 | 0.42262 | 0.57063 | 0.55277 | 0.5584 | 0.59004 | 0.6124 | 0.59091 |
|few_all|0.650745556| 0.89836 | 0.65011 | 0.49603 | 0.63509 | 0.55917 | 0.6752 | 0.62261 | 0.70825 | 0.61189 |
|avg|0.599198| 0.882624 | 0.609256 | 0.41897 | 0.560204 | 0.542324 | 0.581148 | 0.576374 | 0.629174 | 0.599198 |
|std|-| 0.007202762 | 0.029001638 | 0.018751433 | 0.010822466 | 0.010353214 | 0.02189734 | 0.008936847 | 0.023236031 | 0.013940738
1 change: 1 addition & 0 deletions baselines/models_keras/ptuning_origin/pet_chid.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def __iter__(self, random=False):
target_ids[ind] = label_ids[i]
else:
source_ids[ind] = i - 3
target_ids[ind] = i - 3
# for i, label_id_ in zip(mask_idxs, label_ids):
# source_ids[i] = tokenizer._token_mask_id # i: 7(mask1的index) ;j: 1093(农); i:8 (mask2的index) ;j: 689(业)
# target_ids[i] = label_id_
Expand Down
192 changes: 192 additions & 0 deletions baselines/models_keras/ptuning_origin/pet_csl.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
#! -*- coding:utf-8 -*-

import os
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
import numpy as np
from bert4keras.backend import keras, K
from bert4keras.snippets import sequence_padding, DataGenerator
from bert4keras.snippets import open
import json
import sys
from modeling import tokenizer


maxlen = 256
batch_size = 16
unused_length=2



# 模板
# input_str_format = "{},黴鹹{}几点内容" # 黴鹹:生僻字组合会被替换为 强调 or 提到,方便寻找mask index [7957, 7919]
input_str_format = "#"*unused_length+"黴鹹用{}概括{}" # 黴鹹:生僻字组合会被替换为 不能 or 可以,方便寻找mask index [7957, 7919]
labels = ["不能", "可以"]
label2words = {"0": "不能", "1":"可以"}

num_classes = 2
acc_list = []


def load_data(filename): # 加载数据
D = []
with open(filename, encoding='utf-8') as f:
for i, l in enumerate(f):
l = json.loads(l.strip())
keyword = ",".join(l["keyword"])
abst = l['abst']
content = input_str_format.format(keyword, abst)
content_ids, segment_ids = tokenizer.encode(content)
while len(content_ids) > 256:
content_ids.pop(-2) # 截断abst内容保证max_seq_length==256
segment_ids.pop(-2)
# abst_ids = tokenizer.encode(abst)[0]
# keyword_ids = tokenizer.encode(keyword)[0]
# abst_ids_len = min(256-7-2-(len(keyword_ids)-2), len(abst_ids)-2) # seq_length-promopt_length-keyword_length
# abst = tokenizer.decode(abst_ids[1:1+abst_ids_len])

mask_idxs = [idx for idx, c in enumerate(content_ids) if c == 7957 and content_ids[idx+1] == 7919]
mask_idxs.append(mask_idxs[0]+1)
if "label" in l:
label = l["label"]
else:
label = "0"
D.append(((content, content_ids, segment_ids), label2words[label], mask_idxs))
return D


path = '../../../datasets/csl'
data_num = sys.argv[1]

# 加载数据集
train_data = load_data('{}/train_{}.json'.format(path,data_num))
valid_data = load_data('{}/dev_{}.json'.format(path,data_num))
test_data = load_data('{}/test_public.json'.format(path))


def random_masking(token_ids):
"""对输入进行随机mask
"""
rands = np.random.random(len(token_ids))
source, target = [], []
for r, t in zip(rands, token_ids):
if r < 0.15 * 0.8:
source.append(tokenizer._token_mask_id)
target.append(t)
elif r < 0.15 * 0.9:
source.append(t)
target.append(t)
elif r < 0.15:
source.append(np.random.choice(tokenizer._vocab_size - 1) + 1)
target.append(t)
else:
source.append(t)
target.append(0)
return source, target


class data_generator(DataGenerator):
"""数据生成器
"""
def __iter__(self, random=False):
batch_token_ids, batch_segment_ids, batch_output_ids = [], [], []
for is_end, (content_ids, label, mask_idx) in self.sample(random):
# if len(label) == 2: # label是两个字的文本
# text = text # 拼接文本
# token_ids, segment_ids = tokenizer.encode(text, maxlen=maxlen)
content, token_ids, segment_ids = content_ids[0], content_ids[1], content_ids[2]
if random:
source_ids, target_ids = random_masking(token_ids)
else:
source_ids, target_ids = token_ids[:], token_ids[:]
if len(label) == 2: # label是两个字的文本
label_ids = tokenizer.encode(label)[0][1:-1] # label_ids: [1093, 689]。 e.g. [101, 1093, 689, 102] =[CLS,农,业,SEP]. tokenizer.encode(label): ([101, 1093, 689, 102], [0, 0, 0, 0])
for i, label_id_ in zip(mask_idx, label_ids):
#if tokenizer.id_to_token(source_ids[i]) not in ["黴", "鹹", "[MASK]"]:
# print(content, tokenizer.id_to_token(source_ids[i]), mask_idx) # 确保mask掉了正确的token
source_ids[i] = tokenizer._token_mask_id # i: 7(mask1的index) ;j: 1093(农); i:8 (mask2的index) ;j: 689(业)
target_ids[i] = label_id_
for i in range(1, unused_length+1):
source_ids[i] = i
target_ids[i] = i
batch_token_ids.append(source_ids)
batch_segment_ids.append(segment_ids)
batch_output_ids.append(target_ids)

if len(batch_token_ids) == self.batch_size or is_end: # 分批padding和生成
batch_token_ids = sequence_padding(batch_token_ids)
batch_segment_ids = sequence_padding(batch_segment_ids)
batch_output_ids = sequence_padding(batch_output_ids)
yield [
batch_token_ids, batch_segment_ids, batch_output_ids
], None
batch_token_ids, batch_segment_ids, batch_output_ids = [], [], []


from modeling import get_model
model, train_model = get_model(pattern_len=unused_length, trainable=True, lr=3e-5)


# 转换数据集
train_generator = data_generator(train_data, batch_size)
valid_generator = data_generator(valid_data, batch_size)
test_generator = data_generator(test_data, batch_size)


class Evaluator(keras.callbacks.Callback):
def __init__(self):
self.best_val_acc = 0.

def on_epoch_end(self, epoch, logs=None):
# model.save_weights('pet_tnews_model.weights')
val_acc = evaluate(valid_generator)
if val_acc > self.best_val_acc: # # 保存最好的模型,并记录最好的准确率
self.best_val_acc = val_acc
# model.save_weights('best_model_pet_sentencepair.weights')
test_acc = evaluate(test_generator)
print(
u'val_acc: %.5f, best_val_acc: %.5f, test_acc: %.5f\n' %
(val_acc, self.best_val_acc, test_acc)
)


def evaluate(data):
"""
计算候选标签列表中每一个标签(如'科技')的联合概率,并与正确的标签做对比。候选标签的列表:['科技','娱乐','汽车',..,'农业']
y_pred=(32, 2, 21128)=--->(32, 1, 14) = (batch_size, 1, label_size)---argmax--> (batch_size, 1, 1)=(batch_size, 1, index in the label),批量得到联合概率分布最大的标签词语
:param data:
:return:
"""
pred_result_list = []
label_ids = np.array([tokenizer.encode(l)[0][1:-1] for l in labels]) # 获得两个字的标签对应的词汇表的id列表,如: label_id=[1093, 689]。label_ids=[[1093, 689],[],[],..[]]tokenizer.encode('农业') = ([101, 1093, 689, 102], [0, 0, 0, 0])
total, right = 0., 0.
for x_true, _ in data:
x_true, y_true = x_true[:2], x_true[2] # x_true = [batch_token_ids, batch_segment_ids]; y_true: batch_output_ids
mask_idxs = np.where(x_true[0] == tokenizer._token_mask_id)[1].reshape(y_true.shape[0], 2)

y_pred = model.predict(x_true)
y_pred = np.array([y_pred[i][mask_idx] for i, mask_idx in enumerate(mask_idxs)]) # 取出每个样本特定位置上的索引下的预测值。y_pred=[batch_size, 2, vocab_size]。mask_idxs = [7, 8]

y_true = np.array([y_true[i][mask_idx] for i, mask_idx in enumerate(mask_idxs)])
# print("y_pred:",y_pred.shape,";y_pred:",y_pred) # (32, 2, 21128)
# print("label_ids",label_ids) # [[4906 2825],[2031 727],[3749 6756],[3180 3952],[6568 5307],[3136 5509],[1744 7354],[2791 772],[4510 4993],[1092 752],[3125 752],[3152 1265],[ 860 5509],[1093 689]]
y_pred = y_pred[:, 0, label_ids[:, 0]] * y_pred[:, 1, label_ids[:, 1]] # y_pred=[batch_size,1,label_size]=[32,1,14]。联合概率分布。 y_pred[:, 0, label_ids[:, 0]]的维度为:[32,1,21128]
y_pred = y_pred.argmax(axis=1) # 找到概率最大的那个label(词)。如“财经”
# print("y_pred:",y_pred.shape,";y_pred:",y_pred) # O.K. y_pred: (16,) ;y_pred: [4 0 4 1 1 4 5 3 9 1 0 9]
# print("y_true.shape:",y_true.shape,";y_true:",y_true) # y_true: (16, 128)
y_true = np.array([labels.index(tokenizer.decode(y)) for y in y_true])
total += len(y_true)
right += np.where(np.array(y_pred) == np.array(y_true))[0].shape[0] # (y_true == y_pred).sum()
return right / total
# pred_result_list += (y_true == y_pred).tolist()
# return pred_result_list


if __name__ == '__main__':
evaluator = Evaluator()

train_model.fit_generator(
train_generator.forfit(),
steps_per_epoch=len(train_generator) * 5,
epochs=10,
callbacks=[evaluator]
)
Loading

0 comments on commit cdc4520

Please sign in to comment.