In [89]:
from tira.third_party_integrations import ir_datasets, ensure_pyterrier_is_loaded, persist_and_normalize_run
import pyterrier as pt

ensure_pyterrier_is_loaded()

training_dataset = 'ir-lab-jena-leipzig-wise-2023/training-20231104-training'
validation_dataset = 'ir-lab-jena-leipzig-wise-2023/validation-20231104-training'
from jnius import autoclass

In [90]:
def create_index(documents):
    indexer = pt.IterDictIndexer("/tmp/index", overwrite=True, meta={'docno': 100, 'text': 20480})
    index_ref = indexer.index(({'docno': i.doc_id, 'text': i.text} for i in documents))
    return pt.IndexFactory.of(index_ref)

In [91]:
def run_lm(index, output_dir, queries):
    for mu in [1000, 1200, 1400, 1600, 1800, 2000, 2200, 2400, 2600]:
        system = f'lm-mu={mu}'
        run_output_dir = output_dir + '/' + system
        !rm -Rf {run_output_dir}
        !mkdir -p {run_output_dir}
        print(f'Run {system}')
        dlm = autoclass("org.terrier.matching.models.DirichletLM")()
        dlm.mu=mu
        LM = pt.BatchRetrieve(index, wmodel=dlm, verbose=True)
        #LM = pt.BatchRetrieve(index, verbose=True)
        run = LM(queries)
        persist_and_normalize_run(run, system, run_output_dir)

## Run All Configurations on the Training Data

First, we load the training dataset and index the documents, then we run our `run_lm`.

In [92]:
dataset = ir_datasets.load(training_dataset)
queries = pt.io.read_topics(ir_datasets.topics_file(training_dataset), format='trecxml')

queries.head(3)

Load ir_dataset "ir-lab-jena-leipzig-wise-2023/training-20231104-training" from tira.
No settings given in /root/.tira/.tira-settings.json. I will use defaults.


Unnamed: 0,qid,query
0,q06223196,car shelter
1,q062228,airport
2,q062287,antivirus comparison


In [93]:
index = create_index(dataset.docs_iter())

No settings given in /root/.tira/.tira-settings.json. I will use defaults.


In [94]:
run_lm(index, 'lm/training', queries)

Run lm-mu=1000


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.24q/s]

Done. run file is stored under "lm/training/lm-mu=1000/run.txt".





Run lm-mu=1200


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.27q/s]

Done. run file is stored under "lm/training/lm-mu=1200/run.txt".





Run lm-mu=1400


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.25q/s]

Done. run file is stored under "lm/training/lm-mu=1400/run.txt".





Run lm-mu=1600


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.20q/s]

Done. run file is stored under "lm/training/lm-mu=1600/run.txt".





Run lm-mu=1800


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.08q/s]

Done. run file is stored under "lm/training/lm-mu=1800/run.txt".





Run lm-mu=2000


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.05q/s]

Done. run file is stored under "lm/training/lm-mu=2000/run.txt".





Run lm-mu=2200


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.13q/s]

Done. run file is stored under "lm/training/lm-mu=2200/run.txt".





Run lm-mu=2400


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.12q/s]

Done. run file is stored under "lm/training/lm-mu=2400/run.txt".





Run lm-mu=2600


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.09q/s]

Done. run file is stored under "lm/training/lm-mu=2600/run.txt".





Run All Configurations on the Validation Data

Second, we load the validation dataset and index the documents, then we run our `run_lm`.

In [95]:
dataset = ir_datasets.load(validation_dataset)
queries = pt.io.read_topics(ir_datasets.topics_file(validation_dataset), format='trecxml')

queries.head(3)

Load ir_dataset "ir-lab-jena-leipzig-wise-2023/validation-20231104-training" from tira.
No settings given in /root/.tira/.tira-settings.json. I will use defaults.


Unnamed: 0,qid,query
0,q072224,purchase money
1,q072226,purchase used car
2,q072232,buy gold silver


In [96]:
index = create_index(dataset.docs_iter())

No settings given in /root/.tira/.tira-settings.json. I will use defaults.


In [97]:
run_lm(index, 'lm/validation', queries)

Run lm-mu=1000


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.07q/s]

Done. run file is stored under "lm/validation/lm-mu=1000/run.txt".





Run lm-mu=1200


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.13q/s]

Done. run file is stored under "lm/validation/lm-mu=1200/run.txt".





Run lm-mu=1400


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.11q/s]

Done. run file is stored under "lm/validation/lm-mu=1400/run.txt".





Run lm-mu=1600


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.07q/s]

Done. run file is stored under "lm/validation/lm-mu=1600/run.txt".





Run lm-mu=1800


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.10q/s]

Done. run file is stored under "lm/validation/lm-mu=1800/run.txt".





Run lm-mu=2000


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.11q/s]

Done. run file is stored under "lm/validation/lm-mu=2000/run.txt".





Run lm-mu=2200


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.04q/s]

Done. run file is stored under "lm/validation/lm-mu=2200/run.txt".





Run lm-mu=2400


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.08q/s]

Done. run file is stored under "lm/validation/lm-mu=2400/run.txt".





Run lm-mu=2600


BR(DPH): 100%|█████████████████████████████████████████████████████████████████████████████| 3/3 [00:02<00:00,  1.06q/s]

Done. run file is stored under "lm/validation/lm-mu=2600/run.txt".





Part 2: Evaluate all Configurations of the Grid Search

First, we import the dependencies and load the training and validation qrels.

In [98]:
from trectools import TrecRun, TrecQrel, TrecEval
from tira.rest_api_client import Client
from glob import glob
import pandas as pd
tira = Client()

def load_qrels(dataset):
    return TrecQrel(tira.download_dataset('ir-lab-jena-leipzig-wise-2023', dataset, truth_dataset=True) + '/qrels.txt')

training_qrels = load_qrels('training-20231104-training')
validation_qrels = load_qrels('validation-20231104-training')

No settings given in /root/.tira/.tira-settings.json. I will use defaults.


We download the rusn of the grid search and evaluate them.

In [99]:
!wget https://files.webis.de/teaching/ir-wise-23/ir-lab-sose-grid-search.zip
!unzip ir-lab-sose-grid-search.zip

--2023-12-21 14:35:43--  https://files.webis.de/teaching/ir-wise-23/ir-lab-sose-grid-search.zip
Resolving files.webis.de (files.webis.de)... 141.54.132.200
Connecting to files.webis.de (files.webis.de)|141.54.132.200|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 208266950 (199M) [application/zip]
Saving to: ‘ir-lab-sose-grid-search.zip’


2023-12-21 14:36:00 (11.8 MB/s) - ‘ir-lab-sose-grid-search.zip’ saved [208266950/208266950]

/usr/bin/sh: 1: unzip: not found


In [100]:
def evaluate_run(run_dir, qrels):
    run = TrecRun(run_dir + '/run.txt')
    trec_eval = TrecEval(run, qrels)

    return {
        'run': run.get_runid(),
        'nDCG@10': trec_eval.get_ndcg(depth=10),
        'nDCG@10 (unjudgedRemoved)': trec_eval.get_ndcg(depth=10, removeUnjudged=True),
        'MAP': trec_eval.get_map(depth=10),
        'MRR': trec_eval.get_reciprocal_rank()
    }

In [101]:
df = []
for r in glob('lm/training/lm*'):
    df += [evaluate_run(r, training_qrels)]
df = pd.DataFrame(df)
df.sort_values('nDCG@10', ascending=False)

  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()


Unnamed: 0,run,nDCG@10,nDCG@10 (unjudgedRemoved),MAP,MRR
5,lm-mu=2000,0.063349,0.419171,0.055556,0.124809
6,lm-mu=2200,0.063349,0.419171,0.055556,0.123888
7,lm-mu=2400,0.063349,0.419171,0.055556,0.123093
8,lm-mu=2600,0.063349,0.419171,0.055556,0.122701
0,lm-mu=1000,0.054566,0.425434,0.041667,0.10387
1,lm-mu=1200,0.054566,0.425434,0.041667,0.100908
2,lm-mu=1400,0.054566,0.419171,0.041667,0.099386
3,lm-mu=1600,0.054566,0.419171,0.041667,0.09872
4,lm-mu=1800,0.054566,0.419171,0.041667,0.097549


In [102]:
df = []
for r in glob('lm/validation/lm*'):
    df += [evaluate_run(r, validation_qrels)]
df = pd.DataFrame(df)
df.sort_values('nDCG@10', ascending=False)

  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()
  selection = selection[~selection["rel"].isnull()].groupby("query").first().copy()


Unnamed: 0,run,nDCG@10,nDCG@10 (unjudgedRemoved),MAP,MRR
3,lm-mu=1600,0.029536,0.37889,0.015873,0.049471
4,lm-mu=1800,0.027953,0.37889,0.013889,0.043468
7,lm-mu=2400,0.027953,0.354196,0.013889,0.043325
8,lm-mu=2600,0.027953,0.354196,0.013889,0.043277
0,lm-mu=1000,0.026674,0.435561,0.012346,0.039447
1,lm-mu=1200,0.026674,0.435561,0.012346,0.039355
2,lm-mu=1400,0.026674,0.382329,0.012346,0.038953
5,lm-mu=2000,0.026674,0.376302,0.012346,0.038764
6,lm-mu=2200,0.026674,0.354196,0.012346,0.038721
