# Data loading

In [1]:
from clef.utils.data_loading import load_datasets
from clef.verification.verify import check_dataset_with_model
from clef.utils.data_loading import write_jsonlines_from_dicts

import os

root_path = '../../'
out_dir = './data-out/setup1-ollama'

train, dev = load_datasets(preprocess=True,
                           add_author_name=True,
                           add_author_bio=False,
                           root_path= root_path,)

# ensure out_dir directories exist for later
if not os.path.exists(out_dir):
    os.makedirs(out_dir)
    if not os.path.exists(os.path.join(out_dir, 'eval')):
        os.makedirs(os.path.join(out_dir, 'eval'))

loaded 96 training json lines and 32 dev json lines.


In [2]:
# for RQ3 we add real similarity scores from real TREC file

from clef.utils.data_loading import combine_rumors_with_trec_file_judgements, task5_dir
import os, json

# organizers:
submission_path = os.path.join(root_path,
                               task5_dir,
                               'submission_samples',
                               'KGAT_zeroShot_evidence_English_dev.txt')

# own retrieval:
submission_path = os.path.join(root_path,
                               'clef',
                               'RQ1',
                               'data-out',
                               'setup2',
                               'TERRIER-BM25-dev.trec.txt')

# terrier outputs trec files use " " as separator, our own files use "\t" as separator 
rq3_dataset = combine_rumors_with_trec_file_judgements(dev, submission_path, sep=' ')

# print(json.dumps(rq3_dataset[0], indent=4))

# RQ3 llama3

ensure you have an active ollama server online - sstart it with `ollama serve`!

## 8b-instruct

In [None]:
method = 'ollama'

In [None]:
result = check_dataset_with_model(rq3_dataset, method, debug=True, model_string='instruct')
outfile = f'{out_dir}/zeroshot-ver-rq3-{method}-8b.jsonl'
write_jsonlines_from_dicts(outfile, result)

## 70b-instruct (custom modelfile)

In [3]:
method = 'llama3'

In [4]:
result = check_dataset_with_model(rq3_dataset, method, debug=True)

outfile = f'{out_dir}/zeroshot-ver-rq3-{method}-70b.jsonl'
write_jsonlines_from_dicts(outfile, result)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


  0%|          | 0/32 [00:00<?, ?it/s]

Naturalization decree in preparation: Lebanese passports for sale !
	0.0 Account: Lebanese Presidency Text: President Aoun received the new British Ambassador to Lebanon Hamish Cowell and discussed with him Lebanese-British relations and ways to enhance them in all fields
	-0.9 Account: Lebanese Presidency Text: The Information Office of the Presidency of the Republic denies a false news broadcast by the MTV station about Baabda Palace preparing a decree naturalizing 4 000 people and recalls that it had denied yesterday the false information published by the French magazine Liberation about the same fabricated news
	0.0 Account: Lebanese Presidency Text: President Aoun received the Lebanese Ambassador to Canada Ambassador Fadi Ziadeh who briefed him on the conditions of the Lebanese community in Canada Lebanese-Canadian relations and ways to develop them in all fields
	-0.9 Account: Lebanese Presidency Text: The Information Office of the Presidency of the Republic: What was published b

# Evaluation

In [8]:
from clef.utils.scoring import eval_run_custom
from clef.utils.data_loading import task5_dir

import pandas as pd # type: ignore
from IPython.core.display import display_html # type: ignore

import datetime
from csv import writer

sample_submission_file = f'{root_path}/{task5_dir}/submission_samples/KGAT_zeroShot_verification_English_dev.json'

# ollama_8b_submission_file = f'{out_dir}/zeroshot-ver-rq3-ollama-8b.jsonl'
ollama_8b_submission_file = f'{out_dir}/zeroshot-ver-rq3-llama3-8b.jsonl'
ollama_70b_submission_file = f'{out_dir}/zeroshot-ver-rq3-llama3-70b.jsonl'

ground_truth_file = f'{root_path}/{task5_dir}/data/English_dev.json'

eval RQ3

In [10]:
time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
out_file = f'{out_dir}/eval/RQ3-{time_now}.csv'

eval_data = [
    ['baseline',                *eval_run_custom(sample_submission_file,        ground_truth_file, out_file)],
    ['llama3:8b-instruct',      *eval_run_custom(ollama_8b_submission_file,     ground_truth_file, out_file)],
    ['llama3:70b-instruct',     *eval_run_custom(ollama_70b_submission_file,    ground_truth_file, out_file)],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'macro-F1', 'strict-macro-F1']

df_r5  = eval_df[['method', 'macro-F1']].sort_values('macro-F1', axis=0, ascending=False) # type: ignore
df_map = eval_df[['method', 'strict-macro-F1']].sort_values('strict-macro-F1', axis=0, ascending=False) # type: ignore

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('macro-F1')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('strict-macro-F1')

eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)

Unnamed: 0,method,macro-F1
2,llama3:70b-instruct,0.728151
1,llama3:8b-instruct,0.617193
0,baseline,0.508159

Unnamed: 0,method,strict-macro-F1
2,llama3:70b-instruct,0.712825
1,llama3:8b-instruct,0.599415
0,baseline,0.508159
