-
Notifications
You must be signed in to change notification settings - Fork 9
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
add: similarity and nli pipline (#15)
* add: similarity and nli pipline * update: similarity and nli pipline Co-authored-by: wuxiaojun <wuxiaojun@idea.edu.cn>
- Loading branch information
Showing
18 changed files
with
2,082 additions
and
17 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -127,3 +127,8 @@ dmypy.json | |
|
||
# Pyre type checker | ||
.pyre/ | ||
|
||
# test file | ||
test**.sh | ||
output/ | ||
tasks/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{"id": 0, "sentence1": "村里还有一个土地庙,里面也装饰得十分气派", "sentence2": "村里只有一个土地庙", "label": "contradiction"} | ||
{"id": 1, "sentence1": "再有一个,我要跟您汇报我的一个改变,就是闻过则喜,我体会到了", "sentence2": "我不懂得闻过则善的意思", "label": "contradiction"} | ||
{"id": 2, "sentence1": "为什么吃饭总剩下", "sentence2": "小孩子因为食欲差不想吃完饭。", "label": "neutral"} | ||
{"id": 3, "sentence1": "任何自卑的心理、民族虚无主义的论调,萎靡不振的精神状态,都是错误的", "sentence2": "萎靡不振的人会更受到人们的欢迎", "label": "contradiction"} | ||
{"id": 4, "sentence1": "他所以去见姨母,是为了同她商量去苏联的事情", "sentence2": "他与姨母就是否去苏联这件事进行了协商", "label": "entailment"} | ||
{"id": 5, "sentence1": "她不知道,爱丽丝公寓,那一套套的房间里,盛的全是各色各样的等", "sentence2": "爱丽丝公寓只有一个房间", "label": "contradiction"} | ||
{"id": 6, "sentence1": "推进一流大学和一流学科建设,支持中西部建设有特色、高水平大学", "sentence2": "大学要实现“双一流", "label": "entailment"} | ||
{"id": 7, "sentence1": "李主任说他中午前就得走,是抽空回来看看的", "sentence2": "李主任一会儿又要离开。", "label": "entailment"} | ||
{"id": 8, "sentence1": "到锅里它不甘心接受成为盘中餐、碗中汤的命运,还得跳,是悲剧英雄", "sentence2": "它不想成为食物", "label": "entailment"} | ||
{"id": 9, "sentence1": "正在安徽视察的李鹏总理,心情焦虑", "sentence2": "李鹏总理在安徽旅行", "label": "contradiction"} | ||
{"id": 10, "sentence1": "对治安混乱地区进行重点整治", "sentence2": "各类治安和安全隐患完全消除", "label": "contradiction"} | ||
{"id": 11, "sentence1": "这次会议后,我们将进一步调查研究,做更细的工作", "sentence2": "工作可以直接进行", "label": "contradiction"} | ||
{"id": 12, "sentence1": "像那视频里边那个太极宗师,一推,他十几个弟子,多米诺骨牌那个", "sentence2": "视频中的太极宗师是杨氏太极拳的传人", "label": "neutral"} | ||
{"id": 13, "sentence1": "这孩子是不是有点犯二呢", "sentence2": "这个孩子做事谨慎而妥帖,大家都很满意。", "label": "contradiction"} | ||
{"id": 14, "sentence1": "因为我记得我看着那个水一滴一滴一滴的往下滴", "sentence2": "我只听到了水声", "label": "contradiction"} | ||
{"id": 15, "sentence1": "他就是说到那个时候说是十七岁的时候", "sentence2": "他三十岁了。", "label": "neutral"} | ||
{"id": 16, "sentence1": "就是他这人太精,但是具体说他精在哪儿", "sentence2": "他有的时候也“犯傻”", "label": "neutral"} | ||
{"id": 17, "sentence1": "第二天是个阴雨的天气,潮湿而温暖", "sentence2": "第二天非常干燥。", "label": "contradiction"} | ||
{"id": 18, "sentence1": "动员更多科技人员投身经济建设第一线,推广技术、研发产品、创办科技型企业", "sentence2": "科技人员的作用只有研发产品。", "label": "contradiction"} | ||
{"id": 19, "sentence1": "提高产业发展和集聚人口能力,促进农业转移人口就近从业", "sentence2": "人口分散生产成为流行", "label": "neutral"} | ||
{"id": 20, "sentence1": "身上裹一件工厂发的棉大衣,手插在袖筒里", "sentence2": "身上至少一件衣服", "label": "entailment"} | ||
{"id": 21, "sentence1": "她不想去娘家,伯家里人问这问那,更不想让他们来,也是怕问这问那,连电话都懒得打,几乎断了来往", "sentence2": "她现在不在娘家。", "label": "entailment"} | ||
{"id": 22, "sentence1": "呃,还有那个荣,荣庆啊.", "sentence2": "除了荣庆,还有别的(人)", "label": "entailment"} | ||
{"id": 23, "sentence1": "对,咱懂都不懂人家搞的这个学术", "sentence2": "我不懂人家的学术", "label": "entailment"} | ||
{"id": 24, "sentence1": "要为8400万山东人民的利益负责", "sentence2": "山东有8400万人口", "label": "entailment"} | ||
{"id": 25, "sentence1": "西餐馆里西餐也走样走得厉害,杯盘碗碟都缺了口,那调面的器具二十年都没洗似的,结了老厚的锅巴", "sentence2": "西餐馆里西餐没有缺点", "label": "contradiction"} | ||
{"id": 26, "sentence1": "没有,这次确实是住的最长一次在纽约,终于把纽约家给住热乎了,差不多10个月吧没回来", "sentence2": "之前最长的一次是6个月", "label": "neutral"} | ||
{"id": 27, "sentence1": "把球传给十秒后的对决", "sentence2": "传球对赢得对决有帮助", "label": "entailment"} | ||
{"id": 28, "sentence1": "你知道,我刚才突然想到", "sentence2": "我刚才大脑一片空白", "label": "contradiction"} | ||
{"id": 29, "sentence1": "他腾讯肯定未来火啊", "sentence2": "他的发展前途一帆风顺", "label": "neutral"} | ||
{"id": 30, "sentence1": "高楼就像海上的浮标", "sentence2": "海上从来不会建浮标", "label": "contradiction"} | ||
{"id": 31, "sentence1": "这个我看到那个莽弟回来以后吧.", "sentence2": "我已经办妥了这件事", "label": "contradiction"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
#!/bin/bash | ||
|
||
WORK_DIR=$(dirname $(dirname $(dirname $(readlink -f "$0")))) | ||
echo "working directory: $WORK_DIR" | ||
|
||
cd $WORK_DIR | ||
|
||
TASK_DIR=$WORK_DIR/tasks/nli_example | ||
|
||
if [ ! -d $TASK_DIR ]; then | ||
echo "task dir $TASK_DIR not exists, please train first." | ||
exit 1 | ||
fi | ||
|
||
export CUDA_VISIBLE_DEVICES=0 | ||
python gts_engine/gts_engine_inference.py \ | ||
--task_dir=$TASK_DIR \ | ||
--task_type=nli \ | ||
--input_path=examples/nli/test.json \ | ||
--output_path=$TASK_DIR/output.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#!/bin/bash | ||
|
||
WORK_DIR=$(dirname $(dirname $(dirname $(readlink -f "$0")))) | ||
echo "working directory: $WORK_DIR" | ||
|
||
cd $WORK_DIR | ||
mkdir -p $WORK_DIR/tasks | ||
mkdir -p $WORK_DIR/pretrained | ||
|
||
PRETRAINED_DIR=$WORK_DIR/pretrained | ||
TASK_DIR=$WORK_DIR/tasks/nli_example | ||
mkdir -p $TASK_DIR | ||
|
||
export CUDA_VISIBLE_DEVICES=0 | ||
python gts_engine/gts_engine_train.py \ | ||
--task_dir=$TASK_DIR \ | ||
--task_type=nli \ | ||
--train_data=train.json \ | ||
--valid_data=dev.json \ | ||
--test_data=test.json \ | ||
--data_dir=$WORK_DIR/examples/nli \ | ||
--save_path=$TASK_DIR/outputs \ | ||
--pretrained_model_dir=$PRETRAINED_DIR \ | ||
--train_batchsize=2 \ | ||
--valid_batchsize=4 \ | ||
--max_len=512 \ | ||
--max_epochs=1 \ | ||
--min_epochs=1 \ | ||
--seed=123 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{"id": 0, "sentence1": "村里还有一个土地庙,里面也装饰得十分气派", "sentence2": "村里只有一个土地庙", "label": "contradiction"} | ||
{"id": 1, "sentence1": "再有一个,我要跟您汇报我的一个改变,就是闻过则喜,我体会到了", "sentence2": "我不懂得闻过则善的意思", "label": "contradiction"} | ||
{"id": 2, "sentence1": "为什么吃饭总剩下", "sentence2": "小孩子因为食欲差不想吃完饭。", "label": "neutral"} | ||
{"id": 3, "sentence1": "任何自卑的心理、民族虚无主义的论调,萎靡不振的精神状态,都是错误的", "sentence2": "萎靡不振的人会更受到人们的欢迎", "label": "contradiction"} | ||
{"id": 4, "sentence1": "他所以去见姨母,是为了同她商量去苏联的事情", "sentence2": "他与姨母就是否去苏联这件事进行了协商", "label": "entailment"} | ||
{"id": 5, "sentence1": "她不知道,爱丽丝公寓,那一套套的房间里,盛的全是各色各样的等", "sentence2": "爱丽丝公寓只有一个房间", "label": "contradiction"} | ||
{"id": 6, "sentence1": "推进一流大学和一流学科建设,支持中西部建设有特色、高水平大学", "sentence2": "大学要实现“双一流", "label": "entailment"} | ||
{"id": 7, "sentence1": "李主任说他中午前就得走,是抽空回来看看的", "sentence2": "李主任一会儿又要离开。", "label": "entailment"} | ||
{"id": 8, "sentence1": "到锅里它不甘心接受成为盘中餐、碗中汤的命运,还得跳,是悲剧英雄", "sentence2": "它不想成为食物", "label": "entailment"} | ||
{"id": 9, "sentence1": "正在安徽视察的李鹏总理,心情焦虑", "sentence2": "李鹏总理在安徽旅行", "label": "contradiction"} | ||
{"id": 10, "sentence1": "对治安混乱地区进行重点整治", "sentence2": "各类治安和安全隐患完全消除", "label": "contradiction"} | ||
{"id": 11, "sentence1": "这次会议后,我们将进一步调查研究,做更细的工作", "sentence2": "工作可以直接进行", "label": "contradiction"} | ||
{"id": 12, "sentence1": "像那视频里边那个太极宗师,一推,他十几个弟子,多米诺骨牌那个", "sentence2": "视频中的太极宗师是杨氏太极拳的传人", "label": "neutral"} | ||
{"id": 13, "sentence1": "这孩子是不是有点犯二呢", "sentence2": "这个孩子做事谨慎而妥帖,大家都很满意。", "label": "contradiction"} | ||
{"id": 14, "sentence1": "因为我记得我看着那个水一滴一滴一滴的往下滴", "sentence2": "我只听到了水声", "label": "contradiction"} | ||
{"id": 15, "sentence1": "他就是说到那个时候说是十七岁的时候", "sentence2": "他三十岁了。", "label": "neutral"} | ||
{"id": 16, "sentence1": "就是他这人太精,但是具体说他精在哪儿", "sentence2": "他有的时候也“犯傻”", "label": "neutral"} | ||
{"id": 17, "sentence1": "第二天是个阴雨的天气,潮湿而温暖", "sentence2": "第二天非常干燥。", "label": "contradiction"} | ||
{"id": 18, "sentence1": "动员更多科技人员投身经济建设第一线,推广技术、研发产品、创办科技型企业", "sentence2": "科技人员的作用只有研发产品。", "label": "contradiction"} | ||
{"id": 19, "sentence1": "提高产业发展和集聚人口能力,促进农业转移人口就近从业", "sentence2": "人口分散生产成为流行", "label": "neutral"} | ||
{"id": 20, "sentence1": "身上裹一件工厂发的棉大衣,手插在袖筒里", "sentence2": "身上至少一件衣服", "label": "entailment"} | ||
{"id": 21, "sentence1": "她不想去娘家,伯家里人问这问那,更不想让他们来,也是怕问这问那,连电话都懒得打,几乎断了来往", "sentence2": "她现在不在娘家。", "label": "entailment"} | ||
{"id": 22, "sentence1": "呃,还有那个荣,荣庆啊.", "sentence2": "除了荣庆,还有别的(人)", "label": "entailment"} | ||
{"id": 23, "sentence1": "对,咱懂都不懂人家搞的这个学术", "sentence2": "我不懂人家的学术", "label": "entailment"} | ||
{"id": 24, "sentence1": "要为8400万山东人民的利益负责", "sentence2": "山东有8400万人口", "label": "entailment"} | ||
{"id": 25, "sentence1": "西餐馆里西餐也走样走得厉害,杯盘碗碟都缺了口,那调面的器具二十年都没洗似的,结了老厚的锅巴", "sentence2": "西餐馆里西餐没有缺点", "label": "contradiction"} | ||
{"id": 26, "sentence1": "没有,这次确实是住的最长一次在纽约,终于把纽约家给住热乎了,差不多10个月吧没回来", "sentence2": "之前最长的一次是6个月", "label": "neutral"} | ||
{"id": 27, "sentence1": "把球传给十秒后的对决", "sentence2": "传球对赢得对决有帮助", "label": "entailment"} | ||
{"id": 28, "sentence1": "你知道,我刚才突然想到", "sentence2": "我刚才大脑一片空白", "label": "contradiction"} | ||
{"id": 29, "sentence1": "他腾讯肯定未来火啊", "sentence2": "他的发展前途一帆风顺", "label": "neutral"} | ||
{"id": 30, "sentence1": "高楼就像海上的浮标", "sentence2": "海上从来不会建浮标", "label": "contradiction"} | ||
{"id": 31, "sentence1": "这个我看到那个莽弟回来以后吧.", "sentence2": "我已经办妥了这件事", "label": "contradiction"} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,32 @@ | ||
{"id": 0, "sentence1": "七五期间开始,国家又投资将武汉市区的部分土堤改建为钢筋泥凝土防水墙", "sentence2": "八五期间会把剩下的土堤都改建完", "label": "neutral"} | ||
{"id": 1, "sentence1": "相反,一些小摊小贩乘机抬高食品的价格,主要是风味小吃和饮料,来抠我们本已羞涩的腰包", "sentence2": "我们手头目前都比较宽裕", "label": "contradiction"} | ||
{"id": 2, "sentence1": "它是没有章法,乱了套的,也不按规矩来,到哪算哪的,有点流氓地痞气的", "sentence2": "这里章法指的是中华人民共和国宪法", "label": "neutral"} | ||
{"id": 3, "sentence1": "我们要继续向贫困宣战,决不让贫困代代相传", "sentence2": "贫困现象已经有所好转。", "label": "neutral"} | ||
{"id": 4, "sentence1": "她又说:阿二怎么养阿姐呢", "sentence2": "她从没听说过阿二和阿姐", "label": "contradiction"} | ||
{"id": 5, "sentence1": "张永红禁不住惭愧地想:她们这时代的时尚,只不过是前朝几代的零头,她们要补的课实在太多了", "sentence2": "张永红为她们这个时代的时尚感到骄傲", "label": "contradiction"} | ||
{"id": 6, "sentence1": "中国一贯奉行独立自主的和平外交政策,支持世界人民的正义事业,反对霸权主义,维护世界和平,促进共同发展", "sentence2": "我国只注重自己发展", "label": "contradiction"} | ||
{"id": 7, "sentence1": "年内要重点抓好五个方面", "sentence2": "要重点避免这五个方面", "label": "contradiction"} | ||
{"id": 8, "sentence1": "散会后,几个负责人边走边议论说:刁市长动真格啦", "sentence2": "几个负责人没有参加会议", "label": "contradiction"} | ||
{"id": 9, "sentence1": "通过增加居民收入提高消费能力,完善消费政策,培育消费热点", "sentence2": "消费是经济增长的重要因素。", "label": "entailment"} | ||
{"id": 10, "sentence1": "其实我觉得你不懂球啊", "sentence2": "你不懂篮球。", "label": "neutral"} | ||
{"id": 11, "sentence1": "散步后,小平同志在省市负责人陪同下,乘车观光深圳市容", "sentence2": "省市负责人是深圳的", "label": "entailment"} | ||
{"id": 12, "sentence1": "你看潘老师,这是主要符合观众特点的", "sentence2": "潘老师是一名受到观众欢迎的演员", "label": "neutral"} | ||
{"id": 13, "sentence1": ",“我瞅你咋地", "sentence2": "我就要瞅你", "label": "entailment"} | ||
{"id": 14, "sentence1": "柔弱如王琦瑶,除了耐心还有什么可作争取的武器", "sentence2": "王琦瑶做事情很急躁", "label": "contradiction"} | ||
{"id": 15, "sentence1": "他现在钱都没有留给孩子,从现在开始,目前查克正以每年4亿美元的速度散财", "sentence2": "查克每年都在散财", "label": "entailment"} | ||
{"id": 16, "sentence1": "(开始各说各话),你讲,你讲,(讲真,印度英语真不容易懂)", "sentence2": "印度英语可以学会", "label": "neutral"} | ||
{"id": 17, "sentence1": "嗯,买计算机也要钱啊.", "sentence2": "除了买计算机外还要买别的东西。", "label": "entailment"} | ||
{"id": 18, "sentence1": "迹象表明,在前6轮谈判取得进展的基础上,双方有可能克服最后分歧,达成停火协议", "sentence2": "双方处于和平状态", "label": "contradiction"} | ||
{"id": 19, "sentence1": "过圣诞的事,是由这城市里最摩登的人物担任", "sentence2": "过圣诞的事情,由外城人来办", "label": "contradiction"} | ||
{"id": 20, "sentence1": "重点建设取得新成就", "sentence2": "建设必须抓重点", "label": "neutral"} | ||
{"id": 21, "sentence1": "双方官员以及中英、中葡联合联络小组通过磋商,就有关顺利过渡的一系列问题达成了协议", "sentence2": "中国官员在此次磋商中起了重要的作用。", "label": "neutral"} | ||
{"id": 22, "sentence1": "算了,我回去再跟你讲啦.", "sentence2": "是很重要的事情。", "label": "neutral"} | ||
{"id": 23, "sentence1": "紧扣国家发展战略,加快实施一批重点项目", "sentence2": "很多项目将会被实施", "label": "entailment"} | ||
{"id": 24, "sentence1": "中国有一句俗话你可能听过,叫把它生米先做成熟饭,就是这个意思", "sentence2": "中国的俗话我一句都不知道", "label": "contradiction"} | ||
{"id": 25, "sentence1": "全是叫生计熬炼的,挤子汁,沥干水,凝结成块,怎么样的激荡也泛不起来", "sentence2": "温度很低", "label": "neutral"} | ||
{"id": 26, "sentence1": "然后,另外一个人进来,他说你看我眼睛,一看他说就是他,他说没审我就知道肯定是这个人了", "sentence2": "这个人是罪犯", "label": "neutral"} | ||
{"id": 27, "sentence1": "五是改进社会治理方式,保持社会和谐稳定", "sentence2": "社会治理无需关注", "label": "contradiction"} | ||
{"id": 28, "sentence1": "以后,想帮家里安个电话.", "sentence2": "是孩子说的", "label": "neutral"} | ||
{"id": 29, "sentence1": "以前要多的多啊,没事儿.", "sentence2": "以前是现在的两倍", "label": "neutral"} | ||
{"id": 30, "sentence1": "这样的夜晚真是很凄凉,无思无想,也没有梦,就像死了一样", "sentence2": "这个夜晚很冷", "label": "neutral"} | ||
{"id": 31, "sentence1": "国内生产总值突破3万亿元大关,比上年增长百分之十三点四", "sentence2": "去年国内生产总值小于三万亿", "label": "entailment"} |
Oops, something went wrong.