## data loading

In [1]:
from utils import load_rumors_from_jsonl
import os

out_dir = './temp-data'

clef_path = '../clef2024-checkthat-lab/task5'
data_path = os.path.join(clef_path, 'data')

filepath_train = os.path.join(data_path, 'English_train.json')
filepath_dev = os.path.join(data_path, 'English_dev.json')

train_jsons = load_rumors_from_jsonl(filepath_train)
dev_jsons = load_rumors_from_jsonl(filepath_dev)

print(f'loaded {len(train_jsons)} training json objects and {len(dev_jsons)} dev objects')

loaded 96 training json objects and 32 dev objects


### cleaning and normalization

In [2]:
from clef.utils import clean_tweet

def clean_jsons(jsons):
    data_cleaned = []

    for entry in jsons:
        
        tl_clean = []
        for account_url, tl_tweet_id, tl_tweet in entry['timeline']:
            tl_tweet_cleaned = clean_tweet(tl_tweet)
            if tl_tweet_cleaned:
                tl_clean += [[account_url, tl_tweet_id, tl_tweet_cleaned]]

        ev_clean = []
        for account_url, ev_tweet_id, ev_tweet in entry['evidence']:
            ev_tweet_cleaned = clean_tweet(ev_tweet)
            if ev_tweet_cleaned:
                ev_clean += [[account_url, ev_tweet_id, ev_tweet_cleaned]]

        data_cleaned += [{
            'id': entry['id'],
            'rumor': clean_tweet(entry['rumor']),
            'label': entry['label'],
            'timeline': tl_clean,
            'evidence': ev_clean,
        }]
    return data_cleaned

In [3]:
data_cleaned_train = clean_jsons(train_jsons)
data_cleaned_dev = clean_jsons(dev_jsons)

# data_cleaned_train
# data_cleaned_dev

## pyserini

In [7]:
from pyserini.search.lucene import LuceneSearcher
import os
import json
import subprocess

# if you get the error "NameError: name '_C' is not defined" --> restart the Jupyter Kernel

def searchPyserini(rumor_id,
                   query,
                   timeline,
                   k = 5,
                   temp_dir = 'temp-data-dir',
                   index = 'temp-data-dir/index_timeline_dynamic'):
    
    # ensure "working directory" exists (where we store intermediate data like the dynamic index that will be quered later)
    if not os.path.exists(temp_dir):
        os.mkdir(temp_dir)

    # set up "dynamic" (= temporary) index using timeline data
    dynamic_idx_filename = 'eng-train-dynamic.jsonl'
    with open(os.path.join(temp_dir, dynamic_idx_filename), mode='w', encoding='utf8') as file:
        for tweet in timeline:
            id = tweet[1]
            text = tweet[2]
            file.write(json.dumps({'id': id, 'contents': text}) + '\n')
    
    # ensure index directory exists and is empty
    if os.path.exists(index):
        for filename in os.listdir(index):
            if os.path.isfile(os.path.join(index, filename)):
                os.remove(os.path.join(index, filename))
    else:
        os.mkdir(index)

    # set up pyserini command since python embeddable is not out yet
    nthreads = 1
    command = f'python -m pyserini.index.lucene ' \
    f'-input {temp_dir} ' \
    f'-collection JsonCollection ' \
    f'-generator DefaultLuceneDocumentGenerator ' \
    f'-index {index} ' \
    f'-threads {nthreads} ' \
    f'-storePositions ' \
    f'-storeDocvectors ' \
    f'-storeRaw ' \
    f'-language en'

    result = subprocess.run(command, capture_output=True)

    # load searcher from index directoy
    searcher = LuceneSearcher(index)
    hits = searcher.search(query)

    ranked = []

    for i, hit in enumerate(hits[:k]):
        ranked += [[rumor_id, hit.docid, i+1, hit.score]]

        # doc = searcher.doc(hit.docid)
        # json_doc = json.loads(doc.raw())
        # wrap(f'{i+1:2} {hit.docid:4} {hit.score:.5f}\n{json_doc["contents"]}')

    return ranked

In [8]:
# for testing...
test_rumor = data_cleaned_dev[2]
test_rumor = data_cleaned_dev[2]
query = test_rumor['rumor']
timeline = test_rumor['timeline']

ranked_docs = searchPyserini(test_rumor['id'], query, timeline)
display(ranked_docs)

# simple spot check
for evidence in test_rumor['evidence']:
    print(f'{"WAS FOUND" if evidence[1] in [x[1] for x in ranked_docs] else "NOT FOUND"}\t{evidence[1]} {evidence[2]}')

[['AuRED_132', '1590400068208988160', 1, 23.428499221801758],
 ['AuRED_132', '1591489851106668544', 2, 15.630800247192383],
 ['AuRED_132', '1589654877890019331', 3, 11.791999816894531],
 ['AuRED_132', '1589949764107665409', 4, 9.603899955749512],
 ['AuRED_132', '1591404278996168705', 5, 9.46500015258789]]

WAS FOUND	1590400068208988160 After circulating news that the Governor of the Bank of Lebanon Riad Salameh had announced to NBN about raising the value of the dollar and raising the ceiling on banking withdrawals the NBN channel denies the validity of this information that is being circulated citing the channel and confirms that there is no truth to it on this subject
NOT FOUND	1590364198462435329 There is no truth to the information being circulated quoted by the NBN channel regarding a statement by the Governor of the Central Bank regarding banking circulars


In [10]:
from tqdm.auto import tqdm

data = []

for item in tqdm(data_cleaned_dev):
    rumor_id = item['id']
    query = item['rumor']
    timeline = item['timeline']

    data += searchPyserini(rumor_id, query, timeline)


from utils import write_trec_format_output

out_path = 'temp-data/lucene-trec-dev.txt'
write_trec_format_output(out_path, data, 'LUCENE')

# display(data)

  0%|          | 0/32 [00:00<?, ?it/s]

In [11]:
from tqdm.auto import tqdm

data = []

for item in tqdm(data_cleaned_train):
    rumor_id = item['id']
    query = item['rumor']
    timeline = item['timeline']

    data += searchPyserini(rumor_id, query, timeline)

from utils import write_trec_format_output

out_path = 'temp-data/lucene-trec-train.txt'
write_trec_format_output(out_path, data, 'LUCENE')

# display(data)

  0%|          | 0/96 [00:00<?, ?it/s]

## naive tfidf

In [31]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

def retrieve_relevant_documents_tfidf(rumor_id, query, timeline, k=5):
    # Get only doc texts
    documents = [t[2] for t in timeline]
    tweet_ids = [t[1] for t in timeline]

    # Combine query and documents for TF-IDF vectorization
    combined_texts = [query] + documents

    # Generate TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(combined_texts)

    # Calculate similarity of the query to each document
    similarity_scores = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:])
    
    # Rank documents based on similarity scores
    ranked_doc_indices = similarity_scores.argsort()[0][::-1]

    ranked = []
    for i, idx in enumerate(ranked_doc_indices[:k]):
        ranked += [[rumor_id, tweet_ids[idx], i, similarity_scores[0][idx]]]
    
    return ranked

    # # Sort the documents according to rank
    # ranked_documents = [documents[i] for i in ranked_doc_indices]
    # ranked_scores = [similarity_scores[0][i] for i in ranked_doc_indices]
    # ranked_ids = [tweet_ids[i] for i in ranked_doc_indices]

    # # Create a list of tuples of shape (doc, score)
    # ranked_tuples = (list(zip(ranked_ids, ranked_scores, ranked_documents)))
    
    # return ranked_tuples

In [32]:
from tqdm import tqdm

data = []

for item in tqdm(data_cleaned_dev):
    rumor_id = item['id']
    query = item['rumor']
    timeline = item['timeline']
    
    # ranked_docs = retrieve_relevant_documents(rumor_id, query, timeline)
    data += retrieve_relevant_documents_tfidf(rumor_id, query, timeline)

    # for rank, (authority_tweet_id, score, doc_text) in enumerate(ranked_docs[:5]):
    #     data += [(rumor_id, authority_tweet_id, rank+1, score)]


from utils import write_trec_format_output

out_path = 'temp-data/tfidf-trec-dev.txt'
write_trec_format_output(out_path, data, 'TFIDF-BASIC')

# display(data)

100%|██████████| 32/32 [00:00<00:00, 139.12it/s]


## terrier

In [4]:
import pandas as pd

def jsons_to_pandas(jsons):
    data = []
    for entry in jsons:
        rumor_id = entry['id']
        query = entry['rumor']
        timeline = entry['timeline']

        for author, tw_id, tw in timeline:
            data += [
                [rumor_id, "".join([x if x.isalnum() else " " for x in query]), tw_id, tw]
            ]

    df = pd.DataFrame(data,
                      columns=["qid", "query", "docno", "text"],)
    return df

df = jsons_to_pandas(data_cleaned_dev)

In [5]:
import pandas as pd
import pyterrier as pt

from pyterrier.batchretrieve import TextScorer

if not pt.started():
    pt.init()
    
textscorer = TextScorer(takes="docs", returns="queries", body_attr="text", wmodel="BM25", controls={"qe":"on", "qemodel":"Bo1"})
rtr = textscorer.transform(df)
rtr

ValueError: VM is already running, can't set classpath/options; VM started at  File "c:\Users\luisk\miniconda3\envs\clef\lib\runpy.py", line 194, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "c:\Users\luisk\miniconda3\envs\clef\lib\runpy.py", line 87, in _run_code
    exec(code, run_globals)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\traitlets\config\application.py", line 1075, in launch_instance
    app.start()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\kernelapp.py", line 739, in start
    self.io_loop.start()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\tornado\platform\asyncio.py", line 205, in start
    self.asyncio_loop.run_forever()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\asyncio\base_events.py", line 570, in run_forever
    self._run_once()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\asyncio\base_events.py", line 1859, in _run_once
    handle._run()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\asyncio\events.py", line 81, in _run
    self._context.run(self._callback, *self._args)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\kernelbase.py", line 545, in dispatch_queue
    await self.process_one()
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\kernelbase.py", line 534, in process_one
    await dispatch(*args)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\kernelbase.py", line 437, in dispatch_shell
    await result
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\ipkernel.py", line 359, in execute_request
    await super().execute_request(stream, ident, parent)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\kernelbase.py", line 778, in execute_request
    reply_content = await reply_content
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\ipkernel.py", line 446, in do_execute
    res = shell.run_cell(
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\ipykernel\zmqshell.py", line 549, in run_cell
    return super().run_cell(*args, **kwargs)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\IPython\core\interactiveshell.py", line 3009, in run_cell
    result = self._run_cell(
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\IPython\core\interactiveshell.py", line 3064, in _run_cell
    result = runner(coro)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\IPython\core\async_helpers.py", line 129, in _pseudo_sync_runner
    coro.send(None)
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\IPython\core\interactiveshell.py", line 3269, in run_cell_async
    has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\IPython\core\interactiveshell.py", line 3448, in run_ast_nodes
    if await self.run_code(code, result, async_=asy):
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\IPython\core\interactiveshell.py", line 3508, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\luisk\AppData\Local\Temp\ipykernel_11796\1516532247.py", line 4, in <module>
    from pyterrier.batchretrieve import TextScorer
  File "<frozen importlib._bootstrap>", line 991, in _find_and_load
  File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 843, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\pyterrier\batchretrieve.py", line 1, in <module>
    from jnius import autoclass, cast
  File "<frozen importlib._bootstrap>", line 991, in _find_and_load
  File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 843, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\jnius\__init__.py", line 36, in <module>
    from .reflect import *  # noqa
  File "<frozen importlib._bootstrap>", line 991, in _find_and_load
  File "<frozen importlib._bootstrap>", line 975, in _find_and_load_unlocked
  File "<frozen importlib._bootstrap>", line 671, in _load_unlocked
  File "<frozen importlib._bootstrap_external>", line 843, in exec_module
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_removed
  File "c:\Users\luisk\miniconda3\envs\clef\lib\site-packages\jnius\reflect.py", line 19, in <module>
    class Class(JavaClass, metaclass=MetaJavaClass):


In [63]:
import pyterrier as pt
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import R, MAP    

ptio._write_results_trec( rtr.query('rank < 5'), 'temp-data/terrier-trec-bm25-qe.txt')
d = ptio._read_results_trec('temp-data/terrier-trec-bm25-qe.txt')


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [R@5,MAP],perquery=False)
eval

{'R@5': 0.6859649122807018, 'AP': 0.6412280701754386}

In [59]:
import pyterrier as pt
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import R, MAP    

ptio._write_results_trec( rtr.query('rank < 5'), 'temp-data/terrier-trec-bm25-qe.txt')
d = ptio._read_results_trec('temp-data/terrier-trec-bm25-qe.txt')


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [R@5,MAP],perquery=False)
eval

{'R@5': 0.7189473684210527, 'AP': 0.6810818713450292}

In [60]:
import pyterrier as pt
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import R, MAP    

# ptio._write_results_trec( rtr.query('rank < 5'), 'temp-data/terrier-trec-c.txt')
d = ptio._read_results_trec('temp-data/terrier-trec-c.txt')


task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(d, golden , metrics = [R@5,MAP],perquery=False)
eval

{'R@5': 0.7189473684210527, 'AP': 0.6806608187134503}

In [42]:
def write_trec_format_output_from_pandas(filename: str, data, tag: str) -> None:
    """
    Writes data to a file in the TREC format.

    Parameters:
    - filename (str): The name of the file to write to.
    - data (List[Tuple[str, int, int, float]]): A list of tuples, where each tuple contains:
        - rumor_id (str): The unique ID for the given rumor.
        - authority_tweet_id (int): The unique ID for the authority tweet.
        - rank (int): The rank of the authority tweet ID for that given rumor_id.
        - score (float): The score given by the model for the authority tweet ID.
    - tag (str): The string identifier of the team/model.
    """
    with open(filename, 'w') as file:
        for row in range(len(data)):
            i = row%5
            line = f"{data.at[i, 'qid']}\tQ0\t{data.at[i, 'docno']}\t{data.at[i, 'rank']}\t{data.at[i, 'score']}\t{tag}\n"
            file.write(line)

write_trec_format_output_from_pandas('temp-data/terrier-trec.txt', rtr.query('rank < 5'), 'TERRIER-BM25')

In [44]:
import pyterrier as pt
import pyterrier.io as ptio
import pyterrier.pipelines as ptpipelines
from ir_measures import R, MAP    

task5_dir = '../clef2024-checkthat-lab/task5'
golden_labels_file = task5_dir + '/data/dev_qrels.txt'

golden = ptio.read_qrels(golden_labels_file)
eval= ptpipelines.Evaluate(rtr.query('rank < 5'), golden , metrics = [R@5,MAP],perquery=False)
eval

{'R@5': 0.7189473684210527, 'AP': 0.6806608187134503}

In [32]:
d = pd.merge(rtr, df[['docno', 'text']], on='docno', how='left')
d[d['text'].str.contains("Please note that food items unfit for human consumption are destroyed after they are confiscated")]

Unnamed: 0,qid,docid,docno,rank,score,query,text
820,AuRED_099,749,1233784722238705670,149,0.0,Qatar threw Iranian peas into garbage for fear...,Hello my dear brother thank you for your obser...


## sentence_transformers

In [33]:
from sentence_transformers import SentenceTransformer, util
import torch

embedder = SentenceTransformer("all-MiniLM-L6-v2")

def retrieve_relevant_documents_sbert(rumor_id, query, timeline, k=5):
    corpus = [t[2] for t in timeline]
    corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

    top_k = min(k, len(corpus))
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    # We use cosine-similarity and torch.topk to find the highest 5 scores
    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)

    # if debug:
    #     print("\n\n======================\n\n")
    #     print("Query:", query)
    #     evidence_ids = [e[1] for e in evidence]

    found = []
    docs = []

    for i, (score, idx) in enumerate(zip(top_results[0], top_results[1])):
            id = timeline[idx][1]

            # if debug:
            #     is_evidence = id in evidence_ids
            #     star = "(*)" if is_evidence else "\t"
            #     print(star, '\t', "(Rank: {:.0f})".format(i+1), "(Score: {:.4f})".format(score), corpus[idx])
            #     if is_evidence: found += [id]

            docs += [[rumor_id, id, i+1, score.item()]]

    # if debug:    
    #     for _, ev_id, ev_text in evidence:
    #         if ev_id not in found:
    #                 print('(!) ', ev_text)
    
    return docs

In [34]:
data = []
for item in data_cleaned_train[:]:
    query = item['rumor']
    timeline = item['timeline']
    rumor_id = item['id']
    data += retrieve_relevant_documents_sbert(rumor_id, query, timeline)

from utils import write_trec_format_output

out_path = 'temp-data/sbert-trec-train.txt'
write_trec_format_output(out_path, data, 'SBERT')

In [35]:
data = []
for item in data_cleaned_dev[:]:
    query = item['rumor']
    timeline = item['timeline']
    rumor_id = item['id']
    data += retrieve_relevant_documents_sbert(rumor_id, query, timeline)

from utils import write_trec_format_output

out_path = 'temp-data/sbert-trec-dev.txt'
write_trec_format_output(out_path, data, 'SBERT')

## openai embeddings

In [None]:
import os
import numpy as np
from openai import OpenAI

from clef.embedding_utils import cosine_similarity

client = OpenAI(
    api_key=os.environ.get("OPENAI_API_KEY"), # This is the default and can be omitted
)

def get_embedding(text):
    response = client.embeddings.create(
        input = text,
        model = 'text-embedding-3-small'
    )
    return response.data[0].embedding

In [None]:
def retrieve_relevant_documents_openai(rumor_id, query, timeline, k=5):
    # print(rumor_id, query)

    # Generate embedding for the rumor
    rumor_embedding = get_embedding(query)

    # Generate embeddings for each tweet in the timeline
    timeline_embeddings = [get_embedding(tweet[2]) for tweet in timeline]

    # Compute similarities
    similarities = [cosine_similarity(rumor_embedding, tweet_embedding) for tweet_embedding in timeline_embeddings]

    # Select the most relevant tweets based on similarities
    # Here, we choose a simple approach to select the top N most similar tweets. You can adjust N as needed.
    # [-k:][::-1] --> this is: top-k, then reverse the list  so highest-scoring entry is index 0, then second-highest on index 1 etc.
    most_relevant_tweet_indices = np.argsort(similarities)[-k:][::-1] 

    scores = [similarities[i] for i in most_relevant_tweet_indices]
    relevant_tweets = [timeline[i] for i in most_relevant_tweet_indices]

    ranked = []
    for i, (cos_sim, [acc, id, text]) in enumerate(zip(scores, relevant_tweets)):
        ranked += [[rumor_id, id, i+1, cos_sim]]
    
        # print('\t',[rumor_id, id, i+1, cos_sim, text])

    return ranked
        

In [None]:
from tqdm.auto import tqdm

data = []

# using non-cleaned data here
# this is suuuuper-slow, maybe use this for reranking with a reduced search space?

for item in tqdm(dev_jsons):
    rumor_id = item['id']
    query = item['rumor']
    timeline = item['timeline']
    
    data += retrieve_relevant_documents_openai(rumor_id, query, timeline)

from utils import write_trec_format_output

out_path = 'temp-data/openai-trec-dev.txt'
write_trec_format_output(out_path, data, 'OPENAI')

  0%|          | 0/32 [00:00<?, ?it/s]

AuRED_142 Naturalization decree in preparation: Lebanese passports for sale?! https://t.co/UuQ7yMbSWJ https://t.co/Jf1K1NbZJD
	 ['AuRED_142', '1555424541509386240', 1, 0.5292499614547626, '“The Information Office of the Presidency of the Republic: What was published by the French newspaper “Liberation” about the “selling” of Lebanese passports to non-Lebanese is false and baseless news.']
	 ['AuRED_142', '1555986659279360001', 2, 0.38413405162678044, '“The Information Office of the Presidency of the Republic denies a false news broadcast by the MTV station about Baabda Palace preparing a decree naturalizing 4,000 people, and recalls that it had denied yesterday the false information published by the French magazine ‘Liberation’ about the same fabricated news. "']
	 ['AuRED_142', '1556600039211925504', 3, 0.3836070343328534, 'Today, the President of the Republic, General Michel Aoun, signed 9 laws that were previously approved by the House of Representatives. Details at the following li

## evaluation

In [1]:
from scoring_utils import eval_run_retrieval

task5_dir = '../clef2024-checkthat-lab/task5'
sample_submission_file = task5_dir + '/submission_samples/KGAT_zeroShot_evidence_English_dev.txt'
lucene_submission_file = 'temp-data/lucene-trec-dev.txt'
tfidf_submission_file = 'temp-data/tfidf-trec-dev.txt'
terrier_submission_file = 'temp-data/terrier-trec-bm25-qe.txt'
sbert_submission_file = 'temp-data/sbert-trec-dev.txt'
openai_submission_file = 'temp-data/openai-trec-dev.txt'

golden_labels_file = task5_dir + '/data/dev_qrels.txt'
out_file = 'temp-data/out.csv'

print('sample', '\t',eval_run_retrieval(sample_submission_file,golden_labels_file))
print('lucence', '', eval_run_retrieval(lucene_submission_file,golden_labels_file))
print('tfidf', '\t', eval_run_retrieval(tfidf_submission_file,golden_labels_file))
print('terrier', '', eval_run_retrieval(terrier_submission_file,golden_labels_file))
print('sbert', '\t', eval_run_retrieval(sbert_submission_file,golden_labels_file))
print('openai', '\t', eval_run_retrieval(openai_submission_file,golden_labels_file))

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8



sample 	 {'R@5': 0.6357894736842106, 'AP': 0.5612280701754385}
lucence  {'R@5': 0.6971929824561404, 'AP': 0.663766081871345}
tfidf 	 {'R@5': 0.7235087719298245, 'AP': 0.6301754385964913}
terrier  {'R@5': 0.6859649122807018, 'AP': 0.6412280701754386}
sbert 	 {'R@5': 0.7080701754385965, 'AP': 0.6363508771929824}
openai 	 {'R@5': 0.7452631578947368, 'AP': 0.6448421052631579}
