In [1]:
# notebook to evaluate the information retrieval results from an external system (i.e. not pyterrier)
import pyterrier as pt
if not pt.started():
    pt.init()


PyTerrier 0.9.2 has loaded Terrier 5.7 (built by craigm on 2022-11-10 18:30) and terrier-helper 0.0.7

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:

import xml.etree.ElementTree as ET
import pandas as pd

def load_topics(path):
    with open(path) as f:
        root = ET.fromstring(f.read())
    topic_dict = {}
    for topic in root.findall("topic"):
        topic_id = topic.findtext("id")
        topic_query = topic.findtext("query")
        if topic_id and topic_query:
            topic_dict[topic_id] = topic_query.strip().lower()
    topics = pd.DataFrame(topic_dict.items(), columns=["qid", "query"]) 
    topics["query"] = topics["query"].str.replace(r'\W+', ' ', regex=True)
    return topics

In [5]:
topics = load_topics("../data/topics/topics.txt")
qrels = pt.io.read_qrels("../data/assessments/qrels.txt") # type: ignore
qcred = pt.io.read_qrels("../data/assessments/qcredibility.txt") # type: ignore
qread = pt.io.read_qrels("../data/assessments/qreadability.txt") # type: ignore

all_qs = [("qrels", qrels), ("qcred", qcred), ("qread", qread)]

In [37]:
qrels

Unnamed: 0,qid,docno,label
0,1,a6195d99-f7d7-43ec-907c-435cb7a62ee7,2
1,1,80fd9af1-c9b6-4d82-b62a-fe16d5b9d76f,1
2,1,4a8c5d85-b2d0-43bc-83f4-c1acfe0b3481,2
3,1,22990a4b-6a9f-4e00-9e72-aef0fbee202b,0
4,1,fd6ddab2-e4d9-4151-9972-256f5be6eb21,1
...,...,...,...
12495,132,122cd674-3897-5129-bf7a-5cc639995bf9,1
12496,132,1a9a9729-5ab3-5444-af62-77e69ace6b74,0
12497,132,17912579-67a2-469c-a0c7-6ff1e48eba13,1
12498,132,b80cb282-487e-5197-9391-96513c8eaced,1


In [6]:
import pyterrier as pt

def run_experiment(pipeline, simple_name, topics, qrels, eval_metrics=["map", "bpref", "ndcg_cut_10"]):
    experiments = []
    for name, q in qrels:
        # change pipeline name to include the name of the query

        exp = pt.Experiment([pipeline], topics, q, eval_metrics, names=[name + '_' + simple_name])
        experiments.append(exp)
    return pd.concat(experiments, axis=0)

In [39]:
# load results without index
retrieval = pd.read_csv("rankings/colbert-pubmed-bert-rankings.csv")
simple_name = "colbert-pubmed-bert"

In [40]:

results = run_experiment(retrieval[retrieval['rank'] < 250],simple_name, topics, all_qs, ["map", "bpref", "ndcg_cut_10"])
results.to_csv("results/" + simple_name + "_results.csv")