## Data loading

In [1]:
from clef.utils.data_loading import load_datasets
from clef.verification.verify import check_dataset_with_model
from clef.utils.data_loading import write_jsonlines_from_dicts

import os

root_path = '../../'
out_dir = './data-out/train-setup2'

train, dev = load_datasets(preprocess=True,
                           add_author_name=True,
                           add_author_bio=False,
                           root_path= root_path,)

# ensure out_dir directories exist for later
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
    if not os.path.exists(os.path.join(out_dir, 'eval')):
        os.makedirs(os.path.join(out_dir, 'eval'))

loaded 96 training json lines and 32 dev json lines.


In [2]:
# for RQ3 we add real similarity scores from real TREC file

from clef.utils.data_loading import combine_rumors_with_trec_file_judgements, task5_dir
import os, json

# organizers:
submission_path = os.path.join(root_path,
                               task5_dir,
                               'submission_samples',
                               'KGAT_zeroShot_evidence_English_dev.txt')

# own retrieval:
submission_path = os.path.join(root_path,
                               'clef',
                               'RQ1',
                               'data-out',
                               'train-setup2',
                               'TERRIER-DPH-train.trec.txt')



# terrier outputs trec files using " " as separator, our own files use "\t" as separator 
rq3_dataset = combine_rumors_with_trec_file_judgements(train, submission_path, sep=' ')

print(json.dumps(rq3_dataset[0], indent=4))

{
    "id": "AuRED_014",
    "rumor": "Urgent Ramallah Ministry of Health spokesman Kamal Al-Shakhra: We received 2 000 doses of the American Moderna Corona vaccine and this batch will be designated for President Abbas the Fatah Central Committee and VIPs",
    "label": "REFUTES",
    "timeline": [
        [
            "https://twitter.com/ibrahimmilhim",
            "1357270458756960257",
            "Account: Ibrahim Melhem\n Ibraim Milhim\nText: Qalqilya (5) Bethlehem (10) Nablus (24) Jericho and the Jordan Valley (9) Ramallah and Al-Bireh (0) Tulkarm (28) Hebron (23) Jenin (19) Gaza Strip ( 275) The Ministry of Health noted that there were 55 patients in intensive care rooms including 20 patients on ventilators"
        ],
        [
            "https://twitter.com/ibrahimmilhim",
            "1357270456915685377",
            "Account: Ibrahim Melhem\n Ibraim Milhim\nText: Salfit (49) Jerusalem suburbs (61) Tubas (18) Qalqilya (12) Bethlehem (42) Nablus (53) Jericho and Al-Aghwar

### RQ3 NLI

In [None]:
result = check_dataset_with_model(rq3_dataset, 'bart', debug=True)

outfile = f'{out_dir}/zeroshot-ver-rq3-nli.jsonl'
write_jsonlines_from_dicts(outfile, result)

### RQ3 openai

In [3]:
result = check_dataset_with_model(rq3_dataset, 'openai', debug=True)

outfile = f'{out_dir}/zeroshot-ver-rq3-openai.jsonl'
write_jsonlines_from_dicts(outfile, result)

  0%|          | 0/96 [00:00<?, ?it/s]

Urgent Ramallah Ministry of Health spokesman Kamal Al-Shakhra: We received 2 000 doses of the American Moderna Corona vaccine and this batch will be designated for President Abbas the Fatah Central Committee and VIPs
	0.0 Account: Ibrahim Melhem Ibraim Milhim Text: Vaccination will begin in the middle of this month and we will provide the Gaza Strip with a share of all the vaccines we receive We expect the first batch of vaccines that the government has contracted to purchase to arrive at the end of this month of February or the beginning of the following month The Ministry of Health was directed to prepare a health protocol for the electoral process
	0.0 Account: Ibrahim Melhem Ibraim Milhim Text: During todays Cabinet session: - The Supreme Emergency Committee recommends to His Excellency the President to extend the state of emergency for 30 days - To continue the procedures in place now for another two weeks - The government will obtain the first batch of Corona vaccines amounting t

## Evaluation

In [4]:
from clef.utils.scoring import eval_run_custom
from clef.utils.data_loading import task5_dir

import pandas as pd
from IPython.core.display import display_html

import datetime

# sample_submission_file = f'{root_path}/{task5_dir}/submission_samples/KGAT_zeroShot_verification_English_dev.json'

# rq3_nli_submission_file = f'{out_dir}/zeroshot-ver-rq3-nli.jsonl'
rq3_openai_submission_file = f'{out_dir}/zeroshot-ver-rq3-openai.jsonl'

ground_truth_file = f'{root_path}/{task5_dir}/data/English_train.json'

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8



eval RQ3

In [5]:
time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
out_file = f'{out_dir}/eval/RQ3-{time_now}.csv'

eval_data = [
    # ['baseline',    *eval_run_custom(sample_submission_file, ground_truth_file, out_file)],
    # ['RQ3-nli',     *eval_run_custom(rq3_nli_submission_file, ground_truth_file, out_file)],
    ['RQ3-openai',  *eval_run_custom(rq3_openai_submission_file, ground_truth_file, out_file)],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'macro-F1', 'strict-macro-F1']

df_r5  = eval_df[['method', 'macro-F1']].sort_values('macro-F1', axis=0, ascending=False)
df_map = eval_df[['method', 'strict-macro-F1']].sort_values('strict-macro-F1', axis=0, ascending=False)

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('macro-F1')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('strict-macro-F1')

eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)


Unnamed: 0,method,macro-F1
0,RQ3-openai,0.784026

Unnamed: 0,method,strict-macro-F1
0,RQ3-openai,0.762686
