In [1]:
import pyterrier as pt
import pandas as pd
from tira.third_party_integrations import ensure_pyterrier_is_loaded
from tira.rest_api_client import Client
from feature_extraction import get_all_features
import pickle
import warnings
from pathlib import Path

warnings.filterwarnings('ignore')

ensure_pyterrier_is_loaded()
tira = Client()

PyTerrier 0.10.0 has loaded Terrier 5.8 (built by craigm on 2023-11-01 18:05) and terrier-helper 0.0.8

No etc/terrier.properties, using terrier.default.properties for bootstrap configuration.


In [2]:
with open('lmart.pkl', 'rb') as f:
    lmart = pickle.load(f)

In [3]:
RUN_NAME = 'ows_ltr'
RUN_DIR = Path('../runs/')

In [4]:
def create_run(dataset_name: str, run_suffix: str):
    dataset = pt.get_dataset(f'irds:ir-benchmarks/{dataset_name}')

    extract_features = get_all_features(tira, dataset)

    bm25 = tira.pt.from_submission('ir-benchmarks/tira-ir-starter/BM25 Re-Rank (tira-ir-starter-pyterrier)', dataset)

    pipeline = (bm25 % 1000) >> extract_features >> pt.ltr.apply_learned_model(lmart, form="ltr")

    run = pipeline(dataset.get_topics(variant='query'))

    run_file = RUN_DIR / RUN_NAME / f'{RUN_NAME}.{run_suffix}'
    run_file.parent.mkdir(exist_ok=True, parents=True)

    pt.io.write_results(run, run_file, format='trec', run_name=RUN_NAME)

In [5]:
DATASETS = {
    # 'longeval-train-20230513-training': 'train_2023', 
    # 'longeval-heldout-20230513-training': 'WT',
    # 'longeval-short-july-20230513-training': 'ST',
    # 'longeval-long-september-20230513-training': 'LT',
    'longeval-2023-01-20240423-training': 'train_2024',
    'longeval-2023-06-20240418-training': 'lag6',
    'longeval-2023-08-20240418-training': 'lag8',
}

In [6]:
for dataset_name, suffix in DATASETS.items():
    print(f'Running {dataset_name}...')
    create_run(dataset_name, suffix)

Running longeval-2023-01-20240423-training...
Running longeval-2023-06-20240418-training...
The download is derived from The LongEval Dataset under the "Qwant LongEval Attribution-NonCommercial-ShareAlike License". Hence, the download is also under this License. By using it, you agree to the terms of this license. Please find details at: https://lindat.mff.cuni.cz/repository/xmlui/page/Qwant_LongEval_BY-NC-SA_License


Download: 110MiB [00:14, 8.19MiB/s] 


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/longeval-2023-06-20240418-training/ows
The download is derived from The LongEval Dataset under the "Qwant LongEval Attribution-NonCommercial-ShareAlike License". Hence, the download is also under this License. By using it, you agree to the terms of this license. Please find details at: https://lindat.mff.cuni.cz/repository/xmlui/page/Qwant_LongEval_BY-NC-SA_License


Download: 3.12MiB [00:00, 4.00MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/longeval-2023-06-20240418-training/ows
Running longeval-2023-08-20240418-training...
The download is derived from The LongEval Dataset under the "Qwant LongEval Attribution-NonCommercial-ShareAlike License". Hence, the download is also under this License. By using it, you agree to the terms of this license. Please find details at: https://lindat.mff.cuni.cz/repository/xmlui/page/Qwant_LongEval_BY-NC-SA_License


Download: 303MiB [00:30, 10.5MiB/s] 


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/longeval-2023-08-20240418-training/ows
The download is derived from The LongEval Dataset under the "Qwant LongEval Attribution-NonCommercial-ShareAlike License". Hence, the download is also under this License. By using it, you agree to the terms of this license. Please find details at: https://lindat.mff.cuni.cz/repository/xmlui/page/Qwant_LongEval_BY-NC-SA_License


Download: 3.15MiB [00:00, 5.86MiB/s]


Download finished. Extract...
Extraction finished:  /root/.tira/extracted_runs/ir-benchmarks/longeval-2023-08-20240418-training/ows
