## data loading and imports

In [1]:
from clef.utils.data_loading import load_datasets
from clef.utils.data_loading import write_trec_format_output
from clef.retrieval.retrieve import retrieve_evidence

train, dev = load_datasets(preprocess=True)

out_dir = 'data-out/exp-add-author-info'

# ensure out_dir directories exist for later
import os
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
    if not os.path.exists(os.path.join(out_dir, 'eval')):
        os.mkdir(os.path.join(out_dir, 'eval'))

loaded 96 training json lines and 32 dev json lines.


## pyserini

In [2]:
method = 'LUCENE'
data = retrieve_evidence(dev, method, kwargs={})
write_trec_format_output(f'{out_dir}/{method}-dev.trec.txt', data, method)

  0%|          | 0/32 [00:00<?, ?it/s]

wrote 153 lines to data-out/exp-preprocessing-off/LUCENE-dev.trec.txt


In [4]:
# method = 'LUCENE'
# data = retrieve_evidence(train, method, kwargs={})
# write_trec_format_output(f'{out_dir}/{method}-train.trec.txt', data, method)

## naive tfidf

In [2]:
method = 'TFIDF'
data = retrieve_evidence(dev, method, kwargs={})
write_trec_format_output(f'{out_dir}/{method}-dev.trec.txt', data, method)

  0%|          | 0/32 [00:00<?, ?it/s]

wrote 160 lines to data-out/exp-preprocessing-off/TFIDF-dev.trec.txt


## sentence_transformers

In [3]:
method = 'SBERT'
data = retrieve_evidence(dev, method, kwargs={})
write_trec_format_output(f'{out_dir}/{method}-dev.trec.txt', data, method)

  0%|          | 0/32 [00:00<?, ?it/s]

wrote 160 lines to data-out/exp-preprocessing-off/SBERT-dev.trec.txt


In [4]:
# method = 'SBERT'
# data = retrieve_evidence(train, method, kwargs={})
# write_trec_format_output(f'{out_dir}/{method}-train.trec.txt', data, method)

## openai embeddings

In [5]:
method = 'OPENAI'
data = retrieve_evidence(dev, method, kwargs={})
write_trec_format_output(f'{out_dir}/{method}-dev.trec.txt', data, method)

  0%|          | 0/32 [00:00<?, ?it/s]

wrote 160 lines to data-out/exp-preprocessing-off/OPENAI-dev.trec.txt


## terrier

In [6]:
import pandas as pd

def jsons_to_pandas(jsons):
    data = []
    for entry in jsons:
        rumor_id = entry['id']
        query = entry['rumor']
        timeline = entry['timeline']

        for author, tw_id, tw in timeline:
            data += [
                [rumor_id, "".join([x if x.isalnum() else " " for x in query]), tw_id, tw]
            ]

    df = pd.DataFrame(data,
                      columns=["qid", "query", "docno", "text"],)
    return df

df = jsons_to_pandas(dev)

In [7]:
import pyterrier as pt

if not pt.started():
    pt.init()

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8



qe off

In [8]:
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import P, R, MAP

from pyterrier.batchretrieve import TextScorer

wmodel = 'BM25'

textscorer = TextScorer(takes="docs",
                        returns="queries",
                        body_attr="text",
                        wmodel=wmodel,
                        controls={"qe":"off"})

rtr = textscorer.transform(df)

method = 'TERRIER'
tag = wmodel
fn = f'{out_dir}/{method}-{wmodel}-dev.trec.txt'

ptio._write_results_trec(rtr.query('rank < 5'), fn, run_name=wmodel)
d = ptio._read_results_trec(fn)


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [P@5, R@5, MAP],perquery=False)
eval



{'P@5': 0.2947368421052632,
 'R@5': 0.7210526315789473,
 'AP': 0.7000000000000001}

In [9]:
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import P, R, MAP

from pyterrier.batchretrieve import TextScorer

wmodel = 'DPH'

textscorer = TextScorer(takes="docs",
                        returns="queries",
                        body_attr="text",
                        wmodel=wmodel,
                        controls={"qe":"off"})

rtr = textscorer.transform(df)

method = 'TERRIER'
tag = wmodel
fn = f'{out_dir}/{method}-{wmodel}-dev.trec.txt'

ptio._write_results_trec(rtr.query('rank < 5'), fn, run_name=wmodel)
d = ptio._read_results_trec(fn)


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [P@5, R@5, MAP],perquery=False)
eval



{'P@5': 0.2947368421052632,
 'R@5': 0.7210526315789473,
 'AP': 0.6890350877192982}

qe on

In [10]:
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import P, R, MAP

from pyterrier.batchretrieve import TextScorer

wmodel = 'BM25'

textscorer = TextScorer(takes="docs",
                        returns="queries",
                        body_attr="text",
                        wmodel=wmodel,
                        controls={"qe":"on"})

rtr = textscorer.transform(df)

method = 'TERRIER'
tag = wmodel
fn = f'{out_dir}/{method}-{wmodel}-qe-dev.trec.txt'

ptio._write_results_trec(rtr.query('rank < 5'), fn, run_name=wmodel)
d = ptio._read_results_trec(fn)


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [P@5, R@5, MAP],perquery=False)
eval

23:42:21.485 [main] WARN org.terrier.querying.QueryExpansion - qemodel control not set for QueryExpansion post process. Using default model Bo1


{'P@5': 0.2736842105263158,
 'R@5': 0.7035087719298246,
 'AP': 0.6442982456140351}

In [11]:
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import P, R, MAP

from pyterrier.batchretrieve import TextScorer

wmodel = 'DPH'

textscorer = TextScorer(takes="docs",
                        returns="queries",
                        body_attr="text",
                        wmodel=wmodel,
                        controls={"qe":"on"})

rtr = textscorer.transform(df)

method = 'TERRIER'
tag = wmodel
fn = f'{out_dir}/{method}-{wmodel}-qe-dev.trec.txt'

ptio._write_results_trec(rtr.query('rank < 5'), fn, run_name=wmodel)
d = ptio._read_results_trec(fn)


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [P@5, R@5, MAP],perquery=False)
eval

23:42:24.094 [main] WARN org.terrier.querying.QueryExpansion - qemodel control not set for QueryExpansion post process. Using default model Bo1


{'P@5': 0.2631578947368421, 'R@5': 0.6771929824561403, 'AP': 0.637719298245614}

## evaluation

In [12]:
import datetime
from clef.utils.scoring import eval_run_retrieval
from clef.utils.data_loading import clef_base_path

import pandas as pd
from IPython.core.display import display_html

sample_submission_file = clef_base_path + '/submission_samples/KGAT_zeroShot_evidence_English_dev.txt'

lucene_submission_file = f'{out_dir}/LUCENE-dev.trec.txt'
tfidf_submission_file = f'{out_dir}/TFIDF-dev.trec.txt'
terrier_submission_file = f'{out_dir}/TERRIER-DPH-dev.trec.txt'
sbert_submission_file = f'{out_dir}/SBERT-dev.trec.txt'
openai_submission_file = f'{out_dir}/OPENAI-dev.trec.txt'

golden_labels_file = clef_base_path + '/data/dev_qrels.txt'

time_now  = datetime.datetime.now().strftime('%d_%m_%Y_%H_%M_%S') 
out_file = f'{out_dir}/eval/RQ1-{time_now}.csv'

eval_data = [
    ['baseline',    *[v for v in eval_run_retrieval(sample_submission_file,  golden_labels_file).values()]],
    ['lucence',     *[v for v in eval_run_retrieval(lucene_submission_file,  golden_labels_file).values()]],
    ['tfidf',       *[v for v in eval_run_retrieval(tfidf_submission_file,   golden_labels_file).values()]],
    ['terrier',     *[v for v in eval_run_retrieval(terrier_submission_file, golden_labels_file).values()]],
    ['sbert',       *[v for v in eval_run_retrieval(sbert_submission_file,   golden_labels_file).values()]],
    ['openai',      *[v for v in eval_run_retrieval(openai_submission_file,  golden_labels_file).values()]],
]

eval_df = pd.DataFrame(eval_data)
eval_df.columns = ['method', 'R@5', 'MAP']
df_r5  = eval_df[['method', 'R@5']].sort_values('R@5', axis=0, ascending=False)
df_map = eval_df[['method', 'MAP']].sort_values('MAP', axis=0, ascending=False)

df1_styler = df_r5.style.set_table_attributes("style='display:inline'").set_caption('Recall @ 5')
df2_styler = df_map.style.set_table_attributes("style='display:inline'").set_caption('Mean Average Precision')

eval_df.to_csv(out_file)

display_html(df1_styler._repr_html_()+df2_styler._repr_html_(), raw=True)

Unnamed: 0,method,R@5
5,openai,0.745263
3,terrier,0.721053
4,sbert,0.710175
1,lucence,0.695088
2,tfidf,0.668772
0,baseline,0.635789

Unnamed: 0,method,MAP
3,terrier,0.689035
5,openai,0.644842
4,sbert,0.643965
1,lucence,0.635556
2,tfidf,0.593158
0,baseline,0.561228
