# Loading

In [8]:
import torch
from tqdm import tqdm
import json
import random

In [2]:
fn_static_embeddings = torch.load(
    '../ROT/data/Weibo/FN_bert-base-chinese_embeddings_static.pt')
fn_dynamic_embeddings = torch.load(
    '../ROT/data/Weibo/FN_bert-base-chinese_embeddings_dynamic.pt')
dn_static_embeddings = torch.load(
    '../ROT/data/Weibo/DN_bert-base-chinese_embeddings_static.pt')
dn_dynamic_embeddings = torch.load(
    '../ROT/data/Weibo/DN_bert-base-chinese_embeddings_dynamic.pt')

len(fn_static_embeddings), len(fn_dynamic_embeddings), len(dn_static_embeddings), len(dn_dynamic_embeddings)

(11934, 11934, 27505, 27505)

In [3]:
with open('../../dataset/Weibo/raw/FN_11934_filtered.json', 'r') as f:
    FN = json.load(f)

with open('../../dataset/Weibo/raw/DN_27505_filtered.json', 'r') as f:
    DN = json.load(f)

fnOid2item = {fn['_id']: fn for fn in FN}
dnOid2item = {dn['_id']: dn for dn in DN}

fnIdx2item = {i: fn for i, fn in enumerate(FN)}
dnIdx2item = {i: dn for i, dn in enumerate(DN)}

fnOid2idx = {fn['_id']: i for i, fn in enumerate(FN)}
dnOid2idx = {dn['_id']: i for i, dn in enumerate(DN)}

len(FN), len(DN)

(11934, 27505)

In [4]:
def pytorch_euclidean_distance(a, b):
    return torch.dist(a, b)

# Claim-sentence scores

In [11]:
def get_sim_scores(fn_emb, dn_emb):
    score_dict = dict()

    for qidx, fn in enumerate(tqdm(FN)):
        dn_oids = fn['debunking_ids']

        for did in dn_oids:
            dn = dnOid2item[did]
            didx = dnOid2idx[did]

            query = fn_emb[qidx]
            sentences = dn_emb[didx]

            items = [pytorch_euclidean_distance(
                query, sent) for sent in sentences]

            # Scale
            m, M = min(items), max(items)
            items = [1 - (x - m) / (M - m + 1e-8) for x in items]

            if qidx not in score_dict.keys():
                score_dict[qidx] = {didx: items}
            else:
                score_dict[qidx][didx] = items
                
    return score_dict

In [12]:
scores_dynamic = get_sim_scores(fn_dynamic_embeddings, dn_dynamic_embeddings)

100%|██████████| 11934/11934 [00:46<00:00, 258.20it/s]


In [13]:
scores_static = get_sim_scores(fn_static_embeddings, dn_static_embeddings)

100%|██████████| 11934/11934 [00:46<00:00, 253.97it/s]


In [14]:
def print_in_color(s, cint=31, end='\n'):
    print('\x1b[{}m{}\x1b[0m'.format(cint, s), end=end)

In [18]:
sorted(scores_dynamic[0][19], reverse=True)

[tensor(1.),
 tensor(0.9864),
 tensor(0.9283),
 tensor(0.8933),
 tensor(0.8823),
 tensor(0.7885),
 tensor(0.7509),
 tensor(0.7465),
 tensor(0.7306),
 tensor(0.7234),
 tensor(0.7197),
 tensor(0.7056),
 tensor(0.6803),
 tensor(0.6667),
 tensor(0.6539),
 tensor(0.6325),
 tensor(0.6219),
 tensor(0.5724),
 tensor(0.5424),
 tensor(0.5198),
 tensor(0.4378),
 tensor(0.3717),
 tensor(0.3205),
 tensor(0.1436),
 tensor(0.)]

## Compare Dynamic & Static

In [25]:
TOP = 3

qidx = random.randint(0, len(FN) - 1)
fn = FN[qidx]

print('[Fake News]\n{}'.format(fn['content_all']))

for did in fn['debunking_ids']:
    didx = dnOid2idx[did]
    dn = DN[didx]

    s_dynamic = scores_dynamic[qidx][didx]
    s_static = scores_static[qidx][didx]

    top_s_dynamic = sorted(s_dynamic, reverse=True)[:TOP]
    top_s_static = sorted(s_static, reverse=True)[:TOP]

    print('\n---------------------------------------------')
    print('qidx = {}, didx = {}\n'.format(qidx, didx))

    print('[Debunking News]')
    for j, sent in enumerate(dn['content_all']):
        print('[Sent-{}]['.format(j), end='')

        print_func = print_in_color if s_dynamic[j] in top_s_dynamic else print
        print_func('Dynamic: {:.3f}'.format(s_dynamic[j]), end='')

        print(', ', end='')

        print_func = print_in_color if s_static[j] in top_s_static else print
        print_func('Static: {:.3f}'.format(s_static[j]), end='')

        print(']\t{}'.format(sent))

[Fake News]
新冠病毒来源于美国的证据来了。应该是美国那家军事实验室泄露@魔法部之声 求大神解释

---------------------------------------------
qidx = 11580, didx = 22830

[Debunking News]
[Sent-0][[31mDynamic: 0.839[0m, Static: 0.581]	#每日疫情快报#【最新辟谣】1. 新型冠状病毒是实验室制造的生物武器？
[Sent-1][[31mDynamic: 1.000[0m, [31mStatic: 1.000[0m]	辟谣：2月19日，在世卫组织东地中海区域办事处新闻发布会上，世卫组织东地中海区域主任称，没有证据表明新型冠状病毒是实验室制造的，也没有证据表明新冠病毒是以生物武器的身份制造出来的，新冠病毒来自动物界。
[Sent-2][Dynamic: 0.484, Static: 0.316]	 2. 新冠肺炎预警人之一，艾芬医生去世？
[Sent-3][[31mDynamic: 0.736[0m, [31mStatic: 0.787[0m]	辟谣：2月20日13时许，武汉经济广播官方微博发布消息称，2月20日中午12点44分，仍在一线工作的武汉市中心医院急诊科主任艾芬，利用午饭间隙，向所有关心关注她的朋友表示感谢。
[Sent-4][Dynamic: 0.000, Static: 0.000]	 3. 网传江苏省学校3月2日陆续开学？
[Sent-5][Dynamic: 0.685, Static: 0.611]	辟谣：这条信息来源于前几天网上流传的江苏某地开会会议讨论稿上的信息。
[Sent-6][Dynamic: 0.370, Static: 0.339]	江苏省政府、省教育厅、市政府、市教育局等官方网站、微信公众号、微博等从未发布过类似信息。
[Sent-7][Dynamic: 0.531, [31mStatic: 0.652[0m]	江苏省教育厅只在2月6日发布过通知：根据疫情发展情况，省政府决定，全省各级各类学校(高校、中小学、中职学校、幼儿园、托育机构)2月底前不开学。
[Sent-8][Dynamic: 0.282, Static: 0.509]	 【紧急寻人】最新患者同乘信息公示：1

# Pattern-sentence scores

# Key Sentence Selection