## Data loading

In [1]:
from clef.utils.data_loading import load_datasets
from clef.utils.data_loading import write_trec_format_output

train, dev = load_datasets(preprocess=False)

out_dir = 'data-out'

loaded 96 training json lines and 32 dev json lines.


In [2]:
import json

print(json.dumps(train[1], indent=4))

{
    "id": "AuRED_037",
    "rumor": "Macron to Sky News: After my visit to Mrs. Fairouz last night and the visit to the Jaj Cedar Reserve, I realized the love of a large section of the Lebanese people for their President of the Republic. Fairouz also told me about her appreciation and love for the President and the reform project that the President of the Republic wants to implement. I also salute the President of the Republic and his efforts and patience. https://t.co/7pMab8yWCD",
    "label": "REFUTES",
    "timeline": [
        [
            "https://twitter.com/skynewsarabia",
            "1302029928867729411",
            "#Iraq.. Record infections with #Corona and hospitals are on the verge of collapse #Sky_News #Covid19 #covid19 https://t.co/zSr70lBncb"
        ],
        [
            "https://twitter.com/skynewsarabia",
            "1302028670446444544",
            "The drums of war are beating in the #Mediterranean. Follow us in the latest episodes of the #Sky_News_Room pr

In [3]:
import json

print(json.dumps(dev[0], indent=4))

{
    "id": "AuRED_142",
    "rumor": "Naturalization decree in preparation: Lebanese passports for sale?! https://t.co/UuQ7yMbSWJ https://t.co/Jf1K1NbZJD",
    "label": "REFUTES",
    "timeline": [
        [
            "https://twitter.com/LBpresidency",
            "1556600039211925504",
            "Today, the President of the Republic, General Michel Aoun, signed 9 laws that were previously approved by the House of Representatives. Details at the following link: https://t.co/wmrSaaEwDu"
        ],
        [
            "https://twitter.com/LBpresidency",
            "1556559119045332992",
            "President Aoun received the Minister of Foreign Affairs and Expatriates, Dr. Abdullah Bouhabib, and the Minister of Social Affairs, Hector Hajjar, and discussed with them developments related to the file of displaced Syrians in Lebanon https://t.co/QLQAJSKzs1"
        ],
        [
            "https://twitter.com/LBpresidency",
            "1556558220533157890",
            "Presiden

## RQ2

In [4]:
# individual step RQ2
# we fake similarity scores for RQ2

import json
from copy import deepcopy

rq2_dataset = deepcopy(dev)

for i, item in enumerate(rq2_dataset):
    item['retrieved_evidence'] = []

    for author_account, tweet_id, tweet_text in item['evidence']:
        item['retrieved_evidence'] += [[
            author_account, #'author_account'
            tweet_id, #'authority_tweet_id'
            tweet_text, #'doc_text'
            1, #'rank'
            1, #'score'
        ]]

    rq2_dataset[i] = item
# print(json.dumps(rq2_dataset[0], indent=2))

### RQ2 NLI

In [5]:
from clef.verification.verify import check_dataset_with_model
from clef.utils.data_loading import write_jsonlines_from_dicts

result = check_dataset_with_model(rq2_dataset, 'bart', preprocess=True)

outfile = f'{out_dir}/zeroshot-ver-rq2-nli.jsonl'
write_jsonlines_from_dicts(outfile, result)

  0%|          | 0/32 [00:00<?, ?it/s]

Naturalization decree in preparation Lebanese passports for sale !
	-0.8310818076133728 The Information Office of the Presidency of the Republic denies a false news broadcast by the MTV station about Baabda Palace preparing a decree naturalizing 4 000 people and recalls that it had denied yesterday the false information published by the French magazine Liberation about the same fabricated news
	-0.7493147253990173 The Information Office of the Presidency of the Republic What was published by the French newspaper Liberation about the selling of Lebanese passports to non-Lebanese is false and baseless news
label: REFUTES
predicted: REFUTES

In the video The spread of unidentified gunmen east of the capital Baghdad
	-0.6212475895881653 The security forces of all types and formations are the ones who hold the reins in all governorates of the country and there is no truth to what some social media sites have reported about the spread of unknown gunmen in various areas As there are strict or

In [6]:
# spot check for data cleaning impact
from clef.verification.models.bart import inference_bart

claim = "Qatar dispenses with Al-Annabi coach"
evidence = "The Qatar Football Association and Spain's Felix Sanchez decide not to extend the contract concluded between the two parties Al-Ittihad expresses its sincere gratitude to the coach wishing him all the best"

print(claim)
print(evidence)

# Use the NLI model to infer the relationship
result = inference_bart(claim, evidence)

# Print the result
print(result)

Qatar dispenses with Al-Annabi coach
The Qatar Football Association and Spain's Felix Sanchez decide not to extend the contract concluded between the two parties Al-Ittihad expresses its sincere gratitude to the coach wishing him all the best
('REFUTES', 0.5920832753181458)


### RQ2 openai

In [5]:
from clef.verification.verify import check_dataset_with_model
from clef.utils.data_loading import write_jsonlines_from_dicts

result = check_dataset_with_model(rq2_dataset, 'openai', preprocess=False)

outfile = f'{out_dir}/zeroshot-ver-rq2-openai.jsonl'
write_jsonlines_from_dicts(outfile, result)

  0%|          | 0/32 [00:00<?, ?it/s]

Naturalization decree in preparation: Lebanese passports for sale?! https://t.co/UuQ7yMbSWJ https://t.co/Jf1K1NbZJD
	-1.0 “The Information Office of the Presidency of the Republic denies a false news broadcast by the MTV station about Baabda Palace preparing a decree naturalizing 4,000 people, and recalls that it had denied yesterday the false information published by the French magazine ‘Liberation’ about the same fabricated news. "
	1.0 “The Information Office of the Presidency of the Republic: What was published by the French newspaper “Liberation” about the “selling” of Lebanese passports to non-Lebanese is false and baseless news.
label: REFUTES
predicted: NOT ENOUGH INFO

In the video.. The spread of unidentified gunmen east of the capital, Baghdad https://t.co/L18KV8tKuZ
	1.0 The security forces of all types and formations are the ones who hold the reins in all governorates of the country, and there is no truth to what some social media sites have reported about the spread of un

## RQ3

In [6]:
# for RQ3
# we add real similarity scores from real TREC file

from clef.utils.data_loading import combine_rumors_with_trec_file_judgements, clef_base_path
import os, json

# organizers:
submission_path = os.path.join(clef_base_path,
                               'submission_samples',
                               'KGAT_zeroShot_evidence_English_dev.txt')

# own retrieval:
submission_path = f'{out_dir}/OPENAI-dev.trec.txt'

rq3_dataset = combine_rumors_with_trec_file_judgements(dev, submission_path)

# print(json.dumps(rq3_dataset[2], indent=4))

### RQ3 NLI

In [9]:
from clef.verification.verify import check_dataset_with_model
from clef.utils.data_loading import write_jsonlines_from_dicts

result = check_dataset_with_model(rq3_dataset, 'bart', preprocess=True)

outfile = f'{out_dir}/zeroshot-ver-rq3-nli.jsonl'
write_jsonlines_from_dicts(outfile, result)

  0%|          | 0/32 [00:00<?, ?it/s]

Naturalization decree in preparation Lebanese passports for sale !
	-0.15375000408385064 Today the President of the Republic General Michel Aoun signed 9 laws that were previously approved by the House of Representatives Details at the following link
	0.21294425436336212 President Aoun received the Minister of Foreign Affairs and Expatriates Dr Abdullah Bouhabib and the Minister of Social Affairs Hector Hajjar and discussed with them developments related to the file of displaced Syrians in Lebanon
	-0.31924682199183335 The Information Office of the Presidency of the Republic denies a false news broadcast by the MTV station about Baabda Palace preparing a decree naturalizing 4 000 people and recalls that it had denied yesterday the false information published by the French magazine Liberation about the same fabricated news
	0.26596748953074056 The President of the Republic awarded the Papal Ambassador the National Order of the Cedar with the rank of Grand Officer wishing him success and

### RQ3 openai

In [7]:
from clef.verification.verify import check_dataset_with_model
from clef.utils.data_loading import write_jsonlines_from_dicts

result = check_dataset_with_model(rq3_dataset, 'openai', preprocess=False)

outfile = f'{out_dir}/zeroshot-ver-rq3-openai.jsonl'
write_jsonlines_from_dicts(outfile, result)

  0%|          | 0/32 [00:00<?, ?it/s]

Naturalization decree in preparation: Lebanese passports for sale?! https://t.co/UuQ7yMbSWJ https://t.co/Jf1K1NbZJD
	0.0 Today, the President of the Republic, General Michel Aoun, signed 9 laws that were previously approved by the House of Representatives. Details at the following link: https://t.co/wmrSaaEwDu
	0.0 President Aoun received the Minister of Foreign Affairs and Expatriates, Dr. Abdullah Bouhabib, and the Minister of Social Affairs, Hector Hajjar, and discussed with them developments related to the file of displaced Syrians in Lebanon https://t.co/QLQAJSKzs1
	0.38413405162678044 “The Information Office of the Presidency of the Republic denies a false news broadcast by the MTV station about Baabda Palace preparing a decree naturalizing 4,000 people, and recalls that it had denied yesterday the false information published by the French magazine ‘Liberation’ about the same fabricated news. "
	0.0 The President of the Republic awarded the Papal Ambassador the National Order of 

## Evaluation

In [8]:
from clef.utils.scoring import eval_run_custom
from clef.utils.data_loading import clef_base_path

import pandas as pd
from IPython.core.display import display_html

import datetime
from csv import writer

sample_submission_file = clef_base_path + '/submission_samples/KGAT_zeroShot_verification_English_dev.json'

rq2_nli_submission_file = f'{out_dir}/zeroshot-ver-rq2-nli.jsonl'
rq2_openai_submission_file = f'{out_dir}/zeroshot-ver-rq2-openai.jsonl'

rq3_nli_submission_file = f'{out_dir}/zeroshot-ver-rq3-nli.jsonl'
rq3_openai_submission_file = f'{out_dir}/zeroshot-ver-rq3-openai.jsonl'

ground_truth_file = clef_base_path + '/data/English_dev.json'

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8



In [9]:
time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
out_file = f'{out_dir}/eval/RQ2-{time_now}.csv'

eval_data = [
    ['baseline',    *eval_run_custom(sample_submission_file, ground_truth_file, out_file)],
    ['RQ2-nl',      *eval_run_custom(rq2_nli_submission_file, ground_truth_file, out_file)],
    ['RQ2-openai',  *eval_run_custom(rq2_openai_submission_file, ground_truth_file, out_file)],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'macro-F1', 'strict-macro-F1']

df_r5  = eval_df[['method', 'macro-F1']].sort_values('macro-F1', axis=0, ascending=False)
df_map = eval_df[['method', 'strict-macro-F1']].sort_values('strict-macro-F1', axis=0, ascending=False)

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('macro-F1')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('strict-macro-F1')

eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)

Unnamed: 0,method,macro-F1
1,RQ2-nl,0.791925
0,baseline,0.508159
2,RQ2-openai,0.357143

Unnamed: 0,method,strict-macro-F1
1,RQ2-nl,0.791925
0,baseline,0.508159
2,RQ2-openai,0.357143


In [10]:
time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
out_file = f'{out_dir}/eval/RQ3-{time_now}.csv'

eval_data = [
    ['baseline',    *eval_run_custom(sample_submission_file, ground_truth_file, out_file)],
    ['RQ3-nli',     *eval_run_custom(rq3_nli_submission_file, ground_truth_file, out_file)],
    ['RQ3-openai',  *eval_run_custom(rq3_openai_submission_file, ground_truth_file, out_file)],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'macro-F1', 'strict-macro-F1']

df_r5  = eval_df[['method', 'macro-F1']].sort_values('macro-F1', axis=0, ascending=False)
df_map = eval_df[['method', 'strict-macro-F1']].sort_values('strict-macro-F1', axis=0, ascending=False)

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('macro-F1')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('strict-macro-F1')

eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)


Unnamed: 0,method,macro-F1
0,baseline,0.508159
2,RQ3-openai,0.31677
1,RQ3-nli,0.287037

Unnamed: 0,method,strict-macro-F1
0,baseline,0.508159
2,RQ3-openai,0.31677
1,RQ3-nli,0.287037
