In [36]:
import pyterrier as pt
import os
os.environ["JAVA_HOME"] = "/home/andrew/Java"
os.environ["JVM_PATH"] = '/home/andrew/Java/jre/lib/server/libjvm.so'
if not pt.started():
    pt.init(boot_packages=["com.github.terrierteam:terrier-prf:-SNAPSHOT"])
from pyterrier.model import add_ranks
import json
import pandas as pd
import numpy as np
import itertools
from ir_measures import *
from ir_measures import evaluator
import ir_datasets as irds
from collections import defaultdict

In [37]:
index = pt.IndexFactory.of('/home/andrew/Documents/Data/paced/marcoblocks')

16:32:54.546 [main] WARN org.terrier.structures.BaseCompressingMetaIndex - Structure meta reading data file directly from disk (SLOW) - try index.meta.data-source=fileinmem in the index properties file. 1.9 GiB of memory would be required.


In [38]:
bm25 = pt.BatchRetrieve(index, wmodel="BM25", controls={"bm25.k_1": 0.45, "bm25.b": 0.55, "bm25.k_3": 0.5})
dph = pt.BatchRetrieve(index, wmodel="DPH")

In [39]:
smd = pt.rewrite.SequentialDependence()
bo1 = pt.rewrite.Bo1QueryExpansion(index)
kl = pt.rewrite.KLQueryExpansion(index)
rm3 = pt.rewrite.RM3(index)

In [40]:
models = [
    bm25 >> bo1 >> bm25 % 1000,
    bm25 >> kl >> bm25 % 1000,
    bm25 >> rm3 >> bm25 % 1000,
    dph >> bo1 >> dph % 1000,
    dph >> kl >> dph % 1000,
]

In [41]:
def convert_to_dict(result):
    result = result.groupby('qid').apply(lambda x: dict(zip(x['docno'], zip(x['score'], x['rank'])))).to_dict()
    return result

In [42]:
class EnsembleScorer(pt.Transformer):
    DEFAULT = (0, 10000)
    def __init__(self, models, C=0) -> None:
        super().__init__()
        self.models = models
        self.C = C
    
    def get_fusion_scores(self, target_sets, qids):
        records = []
        if len(target_sets) == 1:
            target = target_sets[0]
            for qid in qids:
                for doc_id, (score, rank) in target[qid].items():
                    records.append({
                        'qid': str(qid),
                        'docno': str(doc_id),
                        'score': score,
                    })
            return pd.DataFrame.from_records(records)
        for qid in qids:
            all_sets = [set(target[qid].keys()) for target in target_sets]
            candidates = all_sets[0].union(*all_sets[1:])
            for candidate in candidates:
                for target in target_sets:
                    if candidate not in target[qid]:
                        target[qid][candidate] = self.DEFAULT
                scores = [1 / (self.C + target[qid][candidate][1] + 1) for target in target_sets]
                score = np.mean(scores)
                records.append({
                    'qid': str(qid),
                    'docno': str(candidate),
                    'score': score,
                })   
        return pd.DataFrame.from_records(records)

    def transform(self, inp):
        result_sets = [model.transform(inp) for model in self.models]
        sets = [convert_to_dict(res) for res in result_sets]
        qids = list(inp["qid"].unique())

        return add_ranks(self.get_fusion_scores(sets, qids))

In [43]:
ensemble = EnsembleScorer(models, C=0.0)

In [44]:
dataset = pt.get_dataset('irds:msmarco-passage/trec-dl-2019/judged')

In [45]:
rez = pt.Experiment(
    [ensemble],
    dataset.get_topics(),
    dataset.get_qrels(),
    eval_metrics=["map", "recip_rank", "ndcg_cut_10", "recall_1000"]
)

In [46]:
rez

Unnamed: 0,name,map,recip_rank,ndcg_cut_10,recall_1000
0,<__main__.EnsembleScorer object at 0x7f5653d28...,0.442763,0.779511,0.539718,0.786096
