In [9]:
import csv
from pathlib import Path
import numpy as np
from ranking import Ranking
from index import Mode, InMemoryIndex
from encoder import TCTColBERTQueryEncoder as TCTColBERTQueryEncoderFF
# from pyserini.search.faiss import FaissSearcher, TctColBertQueryEncoder
from ir_measures import read_trec_qrels, calc_aggregate, nDCG, RR, P, R
import sys

In [4]:
# in_memory_index = InMemoryIndex(mode=Mode.PASSAGE, encoder=TCTColBERTQueryEncoderFF("castorini/tct_colbert-msmarco"))
in_memory_index = InMemoryIndex.from_disk('../../dev/testing/ffindex_passage_2019_2020.pkl')
in_memory_index.encoder = TCTColBERTQueryEncoderFF("castorini/tct_colbert-msmarco")

Some weights of the model checkpoint at castorini/tct_colbert-msmarco were not used when initializing BertModel: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
sparse_ranking_2019 = Ranking.from_file(Path("../../dev/testing/msmarco-passage-test2019-sparse10000.txt"))
sparse_ranking_2019.cut(5000)
sparse_ranking_2019.name = "sparse_ranking_2019"
sparse_ranking_2020 = Ranking.from_file(Path("../../dev/testing/msmarco-passage-test2020-sparse10000.txt"))
sparse_ranking_2020.cut(5000)
sparse_ranking_2020.name = "sparse_ranking_2020"
all_ids = set.union(
    *[set(sparse_ranking_2019[q_id].keys()) for q_id in sparse_ranking_2019.q_ids],
    *[set(sparse_ranking_2020[q_id].keys()) for q_id in sparse_ranking_2020.q_ids]
)
print(f"indexing {len(all_ids)} documents or passages")

indexing 440079 documents or passages


In [15]:
with open(
    "../../dev/testing/msmarco-test2019-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

alpha = 0.2
result = in_memory_index.get_scores(
    sparse_ranking_2019,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=False
)
result2 = in_memory_index.get_scores_rrf(
    sparse_ranking_2019,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False
)
result3 = in_memory_index.get_scores(
    sparse_ranking_2019,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=True
)
qrels = list(read_trec_qrels("../../dev/testing/2019qrels-pass.txt"))
bm25Res = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, sparse_ranking_2019.run)
print(list(bm25Res.keys())[0])
print(
    "BM25",
    bm25Res,
    'f-score:{}'.format((2*bm25Res[P@10]*bm25Res[R@10])/(bm25Res[P@10]+bm25Res[R@10]))
)

interpolationRes = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result[alpha].run)
print(
    f"Interpolation",
    interpolationRes,
    'f-score:{}'.format((2*interpolationRes[P@10]*interpolationRes[R@10])/(interpolationRes[P@10]+interpolationRes[R@10]))
)

rrfRes = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result2[alpha].run)
print(
    f"RRF",
    rrfRes,
    'f-score:{}'.format((2*rrfRes[P@10]*rrfRes[R@10])/(rrfRes[P@10]+rrfRes[R@10]))
)

interpolationMinMax = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result3[alpha].run)
print(
    f"Interpolation w/ minimax",
    interpolationMinMax,
    'f-score:{}'.format((2*interpolationMinMax[P@10]*interpolationMinMax[R@10])/(interpolationMinMax[P@10]+interpolationMinMax[R@10]))
)

loaded 200 queries
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 66.25it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.16it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 70.20it/s]


sparse_ranking_2019 None
P@10
BM25 {P@10: 0.6186046511627907, R@10: 0.12847703679948552, nDCG@10: 0.5058310024399072, RR(rel=2)@10: 0.7024178663713547} f-score:0.2127652004121614
Interpolation {P@10: 0.8069767441860466, R@10: 0.17310336499562765, nDCG@10: 0.7158066715626032, RR(rel=2)@10: 0.901937984496124} f-score:0.28505912645948106
RRF {P@10: 0.7767441860465117, R@10: 0.16565014746118, nDCG@10: 0.6816417471378419, RR(rel=2)@10: 0.8775193798449613} f-score:0.27306571014556874
Interpolation w/ minimax {P@10: 0.8162790697674419, R@10: 0.17355258664965964, nDCG@10: 0.7229942609361284, RR(rel=2)@10: 0.8949612403100775} f-score:0.2862453288247246


In [16]:
with open(
    "../../dev/testing/msmarco-test2020-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

alpha = 0.2
result = in_memory_index.get_scores(
    sparse_ranking_2020,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=False
)
result2 = in_memory_index.get_scores_rrf(
    sparse_ranking_2020,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False
)
result3 = in_memory_index.get_scores(
    sparse_ranking_2020,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=True
)
qrels = list(read_trec_qrels("../../dev/testing/2020qrels-pass.txt"))
bm25Res2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, sparse_ranking_2020.run)
print(
    "BM25",
    bm25Res2,
    'f-score:{}'.format((2*bm25Res2[P@10]*bm25Res2[R@10])/(bm25Res2[P@10]+bm25Res2[R@10]))
)


interpolation2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result[alpha].run)
print(
    f"Interpolation",
    interpolation2,
    'f-score:{}'.format((2*interpolation2[P@10]*interpolation2[R@10])/(interpolation2[P@10]+interpolation2[R@10]))
)

rrf2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result2[alpha].run)
print(
    f"RRF",
    rrf2,
    'f-score:{}'.format((2*rrf2[P@10]*rrf2[R@10])/(rrf2[P@10]+rrf2[R@10]))
)


interpolationMinmax2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result3[alpha].run)
print(
    f"Interpolation w/ minimax",
    interpolationMinmax2,
    'f-score:{}'.format((2*interpolationMinmax2[P@10]*interpolationMinmax2[R@10])/(interpolationMinmax2[P@10]+interpolationMinmax2[R@10]))
)

loaded 200 queries
Encoding queries for interpolation...


100%|██████████| 54/54 [00:02<00:00, 21.34it/s]


sparse_ranking_2020 None
Encoding queries for RRF...


100%|██████████| 54/54 [00:00<00:00, 69.63it/s]


sparse_ranking_2020 None
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 64.66it/s]


sparse_ranking_2020 None


KeyError: P@10