In [14]:
import csv
from pathlib import Path
import numpy as np
from ranking import Ranking
from index import Mode, InMemoryIndex
from encoder import TCTColBERTQueryEncoder as TCTColBERTQueryEncoderFF
# from pyserini.search.faiss import FaissSearcher, TctColBertQueryEncoder
from ir_measures import read_trec_qrels, calc_aggregate, nDCG, RR, P, R, AP
import sys
from tqdm import tqdm

# define values of eta to test
etas = [10,20,30,40,50,60,70,80,90,100,500]

# define values of alpha to test
alphas = [0.0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0,-1.0]

In [15]:
# in_memory_index = InMemoryIndex(mode=Mode.PASSAGE, encoder=TCTColBERTQueryEncoderFF("castorini/tct_colbert-msmarco"))
in_memory_index = InMemoryIndex.from_disk('../testing/ffindex_passage_2019_2020.pkl')
in_memory_index.encoder = TCTColBERTQueryEncoderFF("castorini/tct_colbert-msmarco")

Some weights of the model checkpoint at castorini/tct_colbert-msmarco were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [16]:
sparse_ranking_2019 = Ranking.from_file(Path("../testing/msmarco-passage-test2019-sparse10000.txt"))
sparse_ranking_2019.cut(5000)
sparse_ranking_2019.name = "sparse_ranking_2019"
sparse_ranking_2020 = Ranking.from_file(Path("../testing/msmarco-passage-test2020-sparse10000.txt"))
sparse_ranking_2020.cut(5000)
sparse_ranking_2020.name = "sparse_ranking_2020"
all_ids = set.union(
    *[set(sparse_ranking_2019[q_id].keys()) for q_id in sparse_ranking_2019.q_ids],
    *[set(sparse_ranking_2020[q_id].keys()) for q_id in sparse_ranking_2020.q_ids]
)
print(f"indexing {len(all_ids)} documents or passages")

indexing 440079 documents or passages


In [None]:
with open(
    "../testing/msmarco-test2019-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

alpha = 0.2
result = in_memory_index.get_scores(
    sparse_ranking_2019,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=False
)
result2 = in_memory_index.get_scores_rrf(
    sparse_ranking_2019,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
)
result3 = in_memory_index.get_scores_rrf(
    sparse_ranking_2019,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    eta=60,
    eta2=10,
)
qrels = list(read_trec_qrels("../testing/2019qrels-pass.txt"))
print(
    "BM25",
    calc_aggregate([nDCG@10, RR(rel=2)@10], qrels, sparse_ranking_2019.run)
)
print(
    f"Interpolation",
    calc_aggregate([nDCG@10, RR(rel=2)@10], qrels, result[alpha].run)
)
print(
    f"RRF",
    calc_aggregate([nDCG@10, RR(rel=2)@10], qrels, result2[alpha].run)
)
print(
    f"rrf with eta_lex = 60 and eta_sem = 10",
    calc_aggregate([nDCG@10, RR(rel=2)@10], qrels, result3[alpha].run)
)

loaded 200 queries
Encoding queries for interpolation...
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


100%|██████████| 43/43 [00:01<00:00, 33.50it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 102.57it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 62.37it/s]


BM25 {nDCG@10: 0.5058310024399072, RR(rel=2)@10: 0.7024178663713547}
Interpolation {nDCG@10: 0.7158066715626032, RR(rel=2)@10: 0.901937984496124}
RRF {nDCG@10: 0.6816417471378419, RR(rel=2)@10: 0.8775193798449614}
rrf with eta_lex = 60 and eta_sem = 10 {nDCG@10: 0.698939518670405, RR(rel=2)@10: 0.841860465116279}


In [25]:
with open(
    "../testing/msmarco-test2019-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

# define dictionary to keep track of best results for tested hyperparameters
bestHyperparams2019 = {
    'ccUnnormalized': {'precision': {'value': -1, 'alpha': 0}, 'recall': {'value': -1, 'alpha': 0}, 'fscore': {'value': -1, 'alpha': 0}, 'rr': {'value': -1, 
'alpha': 0}, 'ndcg': {'value': -1, 'alpha': 0}, 'map': {'value': -1, 'alpha': 0}},
    'ccNormalized': {'precision': {'value': -1, 'alpha': 0}, 'recall': {'value': -1, 'alpha': 0}, 'fscore': {'value': -1, 'alpha': 0}, 'rr': {'value': -1, 
'alpha': 0}, 'ndcg': {'value': -1, 'alpha': 0}, 'map': {'value': -1, 'alpha': 0}},
}


for i in tqdm(range(len(etas))):
    alpha = alphas[i]
    eta = etas[i]

    result = in_memory_index.get_scores(
        sparse_ranking_2019,
        queries,
        alpha=alpha,
        cutoff=None,
        early_stopping=False,
        normalise=False
    )
    result3 = in_memory_index.get_scores(
        sparse_ranking_2019,
        queries,
        alpha=alpha,
        cutoff=None,
        early_stopping=False,
        normalise=True
    )
    qrels = list(read_trec_qrels("../testing/2019qrels-pass.txt"))

    interpolationRes = calc_aggregate([nDCG@10, RR(rel=2)@10,P(rel=2)@10,R(rel=2)@10, AP(rel=2)@10], qrels, result[alpha].run)
    fscoreInterpolationRes = (2*interpolationRes[P(rel=2)@10]*interpolationRes[R(rel=2)@10])/(interpolationRes[P(rel=2)@10]+interpolationRes[R(rel=2)@10])

    interpolationMinMax = calc_aggregate([nDCG@10, RR(rel=2)@10,P(rel=2)@10,R(rel=2)@10, AP(rel=2)@10], qrels, result3[alpha].run)
    fscoreInterpolationMinMax = (2*interpolationMinMax[P(rel=2)@10]*interpolationMinMax[R(rel=2)@10])/(interpolationMinMax[P(rel=2)@10]+interpolationMinMax[R(rel=2)@10])

    # update dictionary of best parameters per method
    if interpolationRes[P(rel=2)@10] > bestHyperparams2019['ccUnnormalized']['precision']['value']:
        bestHyperparams2019['ccUnnormalized']['precision']['value'] = interpolationRes[P(rel=2)@10]
        bestHyperparams2019['ccUnnormalized']['precision']['alpha'] = alpha

    if interpolationRes[R(rel=2)@10] > bestHyperparams2019['ccUnnormalized']['recall']['value']:
        bestHyperparams2019['ccUnnormalized']['recall']['value'] = interpolationRes[R(rel=2)@10]
        bestHyperparams2019['ccUnnormalized']['recall']['alpha'] = alpha

    if interpolationRes[nDCG@10] > bestHyperparams2019['ccUnnormalized']['ndcg']['value']:
        bestHyperparams2019['ccUnnormalized']['ndcg']['value'] = interpolationRes[nDCG@10]
        bestHyperparams2019['ccUnnormalized']['ndcg']['alpha'] = alpha

    if interpolationRes[RR(rel=2)@10] > bestHyperparams2019['ccUnnormalized']['rr']['value']:
        bestHyperparams2019['ccUnnormalized']['rr']['value'] = interpolationRes[RR(rel=2)@10]
        bestHyperparams2019['ccUnnormalized']['rr']['alpha'] = alpha

    if interpolationRes[AP(rel=2)@10] > bestHyperparams2019['ccUnnormalized']['map']['value']:
        bestHyperparams2019['ccUnnormalized']['map']['value'] = interpolationRes[AP(rel=2)@10]
        bestHyperparams2019['ccUnnormalized']['map']['alpha'] = alpha
    
    if fscoreInterpolationRes > bestHyperparams2019['ccUnnormalized']['fscore']['value']:
        bestHyperparams2019['ccUnnormalized']['fscore']['value'] = fscoreInterpolationRes
        bestHyperparams2019['ccUnnormalized']['fscore']['alpha'] = alpha
    
    if interpolationMinMax[P(rel=2)@10] > bestHyperparams2019['ccNormalized']['precision']['value']:
        bestHyperparams2019['ccNormalized']['precision']['value'] = interpolationMinMax[P(rel=2)@10]
        bestHyperparams2019['ccNormalized']['precision']['alpha'] = alpha

    if interpolationMinMax[R(rel=2)@10] > bestHyperparams2019['ccNormalized']['recall']['value']:
        bestHyperparams2019['ccNormalized']['recall']['value'] = interpolationMinMax[R(rel=2)@10]
        bestHyperparams2019['ccNormalized']['recall']['alpha'] = alpha

    if interpolationMinMax[nDCG@10] > bestHyperparams2019['ccNormalized']['ndcg']['value']:
        bestHyperparams2019['ccNormalized']['ndcg']['value'] = interpolationMinMax[nDCG@10]
        bestHyperparams2019['ccNormalized']['ndcg']['alpha'] = alpha

    if interpolationMinMax[RR(rel=2)@10] > bestHyperparams2019['ccNormalized']['rr']['value']:
        bestHyperparams2019['ccNormalized']['rr']['value'] = interpolationMinMax[RR(rel=2)@10]
        bestHyperparams2019['ccNormalized']['rr']['alpha'] = alpha
    
    if fscoreInterpolationMinMax > bestHyperparams2019['ccNormalized']['fscore']['value']:
        bestHyperparams2019['ccNormalized']['fscore']['value'] = fscoreInterpolationMinMax
        bestHyperparams2019['ccNormalized']['fscore']['alpha'] = alpha
    
    if interpolationMinMax[AP(rel=2)@10] > bestHyperparams2019['ccNormalized']['map']['value']:
        bestHyperparams2019['ccNormalized']['map']['value'] = interpolationMinMax[AP(rel=2)@10]
        bestHyperparams2019['ccNormalized']['map']['alpha'] = alpha


loaded 200 queries


  0%|          | 0/11 [00:00<?, ?it/s]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 44.37it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 61.58it/s]


sparse_ranking_2019 None


  9%|▉         | 1/11 [00:04<00:48,  4.81s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 62.15it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 55.65it/s]


sparse_ranking_2019 None


 18%|█▊        | 2/11 [00:09<00:41,  4.64s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 60.72it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 52.33it/s]


sparse_ranking_2019 None


 27%|██▋       | 3/11 [00:13<00:37,  4.63s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 55.19it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 63.25it/s]


sparse_ranking_2019 None


 36%|███▋      | 4/11 [00:18<00:31,  4.57s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 54.38it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 49.16it/s]


sparse_ranking_2019 None


 45%|████▌     | 5/11 [00:23<00:27,  4.64s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 53.27it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 64.71it/s]


sparse_ranking_2019 None


 55%|█████▍    | 6/11 [00:27<00:23,  4.60s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 56.65it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:01<00:00, 41.54it/s]


sparse_ranking_2019 None


 64%|██████▎   | 7/11 [00:32<00:18,  4.66s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 52.84it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 54.69it/s]


sparse_ranking_2019 None


 73%|███████▎  | 8/11 [00:37<00:14,  4.67s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 51.29it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 62.01it/s]


sparse_ranking_2019 None


 82%|████████▏ | 9/11 [00:41<00:09,  4.66s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 67.34it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 54.16it/s]


sparse_ranking_2019 None


 91%|█████████ | 10/11 [00:46<00:04,  4.63s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 60.90it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:01<00:00, 35.03it/s]


sparse_ranking_2019 None


100%|██████████| 11/11 [00:51<00:00,  4.68s/it]


In [26]:
print(bestHyperparams2019['ccNormalized'])
print(bestHyperparams2019['ccUnnormalized'])

{'precision': {'value': 0.6348837209302324, 'alpha': 0.2}, 'recall': {'value': 0.2719114158943346, 'alpha': 0.1}, 'fscore': {'value': 0.38033293287532133, 'alpha': 0.1}, 'rr': {'value': 0.9085271317829459, 'alpha': 0.3}, 'ndcg': {'value': 0.7229942609361282, 'alpha': 0.2}, 'map': {'value': 0.2130699577564767, 'alpha': 0.2}}
{'precision': {'value': 0.6279069767441859, 'alpha': 0.1}, 'recall': {'value': 0.27300662836997697, 'alpha': 0.2}, 'fscore': {'value': 0.3796945188026042, 'alpha': 0.2}, 'rr': {'value': 0.901937984496124, 'alpha': 0.2}, 'ndcg': {'value': 0.7165204423861107, 'alpha': 0.1}, 'map': {'value': 0.21189662633253065, 'alpha': 0.1}}


In [None]:
with open(
    "../testing/msmarco-test2019-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

# define dictionary to keep track of best results for tested hyperparameters
bestHyperparams2019_rrf = {
    'rrf': {'precision': {'value': -1, 'eta1': 0, 'eta2': 0}, 
            'recall': {'value': -1, 'eta1': 0, 'eta2': 0}, 
            'fscore': {'value': -1, 'eta1': 0, 'eta2': 0}, 
            'rr': {'value': -1, 'eta1': 0, 'eta2': 0}, 
            'ndcg': {'value': -1, 'eta1': 0, 'eta2': 0}, 
            'map': {'value': -1, 'eta1': 0, 'eta2': 0}},
}

explored = set()

for i in tqdm(range(len(etas))):
    for j in range(len(etas)):
        if (etas[i],etas[j]) in explored:
            continue
        else:
            explored.add((etas[i],etas[j]))
            eta = etas[i]

        result2 = in_memory_index.get_scores_rrf(
            sparse_ranking_2019,
            queries,
            alpha=alpha,
            cutoff=None,
            early_stopping=False,
            eta = etas[i],
            eta2 = etas[j],
        )

        qrels = list(read_trec_qrels("../testing/2019qrels-pass.txt"))

        rrfRes = calc_aggregate([nDCG@10, RR(rel=2)@10,P(rel=2)@10,R(rel=2)@10,AP(rel=2)@10], qrels, result2[alpha].run)
        fscoreRRRes = (2*rrfRes[P(rel=2)@10]*rrfRes[R(rel=2)@10])/(rrfRes[P(rel=2)@10]+rrfRes[R(rel=2)@10])


        if rrfRes[P(rel=2)@10] > bestHyperparams2019_rrf['rrf']['precision']['value']:
            bestHyperparams2019_rrf['rrf']['precision']['value'] = rrfRes[P(rel=2)@10]
            bestHyperparams2019_rrf['rrf']['precision']['eta1'] = etas[i]
            bestHyperparams2019_rrf['rrf']['precision']['eta2'] = etas[j]


        if rrfRes[R(rel=2)@10] > bestHyperparams2019_rrf['rrf']['recall']['value']:
            bestHyperparams2019_rrf['rrf']['recall']['value'] = rrfRes[R(rel=2)@10]
            bestHyperparams2019_rrf['rrf']['recall']['eta1'] = etas[i]
            bestHyperparams2019_rrf['rrf']['recall']['eta2'] = etas[j]


        if rrfRes[nDCG@10] > bestHyperparams2019_rrf['rrf']['ndcg']['value']:
            bestHyperparams2019_rrf['rrf']['ndcg']['value'] = rrfRes[nDCG@10]
            bestHyperparams2019_rrf['rrf']['ndcg']['eta1'] = etas[i]
            bestHyperparams2019_rrf['rrf']['ndcg']['eta2'] = etas[j]

        if rrfRes[RR(rel=2)@10] > bestHyperparams2019_rrf['rrf']['rr']['value']:
            bestHyperparams2019_rrf['rrf']['rr']['value'] = rrfRes[RR(rel=2)@10]
            bestHyperparams2019_rrf['rrf']['rr']['eta1'] = etas[i]
            bestHyperparams2019_rrf['rrf']['rr']['eta2'] = etas[j]

        if fscoreRRRes > bestHyperparams2019_rrf['rrf']['fscore']['value']:
            bestHyperparams2019_rrf['rrf']['fscore']['value'] = fscoreRRRes
            bestHyperparams2019_rrf['rrf']['fscore']['eta1'] = etas[i]
            bestHyperparams2019_rrf['rrf']['fscore']['eta2'] = etas[j]

        if rrfRes[AP(rel=2)@10] > bestHyperparams2019_rrf['rrf']['map']['value']:
            bestHyperparams2019_rrf['rrf']['map']['value'] = rrfRes[AP(rel=2)@10]
            bestHyperparams2019_rrf['rrf']['map']['eta1'] = etas[i]
            bestHyperparams2019_rrf['rrf']['map']['eta2'] = etas[j]

loaded 200 queries


  0%|          | 0/11 [00:00<?, ?it/s]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 52.52it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 79.33it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 76.34it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 58.37it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 59.35it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 87.86it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 83.16it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.02it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 63.17it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 71.69it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.03it/s]
  9%|▉         | 1/11 [03:43<37:10, 223.01s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 81.74it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 83.18it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 58.32it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 72.24it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 69.26it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 87.99it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.37it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 86.58it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.17it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 82.39it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 74.79it/s]
 18%|█▊        | 2/11 [07:24<33:18, 222.03s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 66.68it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 61.53it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 86.24it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 74.96it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 76.97it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.24it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 85.33it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 72.18it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 83.98it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 76.20it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 82.58it/s]
 27%|██▋       | 3/11 [11:07<29:38, 222.32s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 83.66it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 74.17it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 61.09it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 89.20it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 70.71it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 67.68it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 62.15it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.77it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.19it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 55.50it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 70.40it/s]
 36%|███▋      | 4/11 [14:49<25:57, 222.44s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 82.03it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 69.50it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.33it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 54.86it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.44it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 79.57it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 80.14it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.32it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.27it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 65.28it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.28it/s]
 45%|████▌     | 5/11 [18:32<22:14, 222.41s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.28it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.89it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 44.60it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.90it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.45it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 89.13it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.40it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 81.16it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.62it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.54it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 67.68it/s]
 55%|█████▍    | 6/11 [22:15<18:33, 222.66s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.18it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 88.05it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.21it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 68.82it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.38it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 84.69it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 70.34it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 69.57it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.76it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 62.41it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 79.77it/s]
 64%|██████▎   | 7/11 [25:56<14:49, 222.27s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 80.77it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.44it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.21it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.50it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 66.26it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 83.44it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 63.70it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 71.69it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 62.75it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 71.50it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 76.55it/s]
 73%|███████▎  | 8/11 [29:37<11:05, 221.95s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.72it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 81.69it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 87.02it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 90.90it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 90.06it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 67.32it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 65.44it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 90.25it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 96.68it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 77.02it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 78.61it/s]
 82%|████████▏ | 9/11 [33:18<07:22, 221.50s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 75.93it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 87.61it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:01<00:00, 31.73it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:01<00:00, 36.05it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 71.85it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 48.99it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 55.71it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:01<00:00, 40.51it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 61.55it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 74.76it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 46.73it/s]
 91%|█████████ | 10/11 [37:09<03:44, 224.60s/it]

Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 55.93it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:01<00:00, 40.94it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 48.25it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 69.59it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 47.97it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 79.70it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 45.87it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 63.85it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 47.05it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 74.25it/s]


Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 60.93it/s]
100%|██████████| 11/11 [41:02<00:00, 223.87s/it]


In [18]:
print(bestHyperparams2019_rrf['rrf'])

{'precision': {'value': 0.6348837209302325, 'eta1': 40, 'eta2': 10}, 'recall': {'value': 0.26976473421199393, 'eta1': 40, 'eta2': 10}, 'fscore': {'value': 0.3786426368358511, 'eta1': 40, 'eta2': 10}, 'rr': {'value': 0.8976744186046514, 'eta1': 20, 'eta2': 20}, 'ndcg': {'value': 0.7151248552748896, 'eta1': 80, 'eta2': 40}, 'map': {'value': 0.20941181623477217, 'eta1': 100, 'eta2': 10}}


In [27]:
with open(
    "../testing/msmarco-test2019-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")


for key in bestHyperparams2019.keys():
    print('\n'+'TESTING RESULTS FOR BEST VALIDATION METRICS FOR {}'.format('normalized' if key == 'ccNormalized' else 'unnormalized'))
    for metric in bestHyperparams2019[key].keys():
        # print(f"best {metric} for {key} is {bestHyperparams2019[key][metric]['value']} with alpha {bestHyperparams2019[key][metric]['alpha']}")
    
        alpha = bestHyperparams2019[key][metric]['alpha']

        result = in_memory_index.get_scores(
            sparse_ranking_2019,
            queries,
            alpha=alpha*1.0,
            cutoff=None,
            early_stopping=False,
            normalise= False if key == 'ccUnnormalized' else True
        )
        # result3 = in_memory_index.get_scores(
        #     sparse_ranking_2020,
        #     queries,
        #     alpha=alpha,
        #     cutoff=None,
        #     early_stopping=False,
        #     normalise=True
        # )
        qrels = list(read_trec_qrels("../testing/2019qrels-pass.txt"))
        print('using best alpha={} for {}'.format(alpha, metric))
        interpolation2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P(rel=2)@10,R(rel=2)@10,AP(rel=2)@10], qrels, result[alpha].run)
        fscoreInterpolation2 = (2*interpolation2[P(rel=2)@10]*interpolation2[R(rel=2)@10])/(interpolation2[P(rel=2)@10]+interpolation2[R(rel=2)@10])
        print(
            f"Interpolation",
            interpolation2,
            'f-score:{}'.format(fscoreInterpolation2)
        )

loaded 200 queries

TESTING RESULTS FOR BEST VALIDATION METRICS FOR unnormalized
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 52.26it/s]


sparse_ranking_2019 None
using best alpha=0.1 for precision
Interpolation {R(rel=2)@10: 0.2652624093046782, RR(rel=2)@10: 0.862015503875969, nDCG@10: 0.7165204423861107, P(rel=2)@10: 0.6279069767441859, AP(rel=2)@10: 0.21189662633253065} f-score:0.3729642329260645
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 56.27it/s]


sparse_ranking_2019 None
using best alpha=0.2 for recall
Interpolation {R(rel=2)@10: 0.27300662836997697, RR(rel=2)@10: 0.901937984496124, nDCG@10: 0.7158066715626032, P(rel=2)@10: 0.6232558139534883, AP(rel=2)@10: 0.20869274688772674} f-score:0.3796945188026042
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 52.79it/s]


sparse_ranking_2019 None
using best alpha=0.2 for fscore
Interpolation {R(rel=2)@10: 0.27300662836997697, RR(rel=2)@10: 0.901937984496124, nDCG@10: 0.7158066715626032, P(rel=2)@10: 0.6232558139534883, AP(rel=2)@10: 0.20869274688772674} f-score:0.3796945188026042
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 50.08it/s]


sparse_ranking_2019 None
using best alpha=0.2 for rr
Interpolation {R(rel=2)@10: 0.27300662836997697, RR(rel=2)@10: 0.901937984496124, nDCG@10: 0.7158066715626032, P(rel=2)@10: 0.6232558139534883, AP(rel=2)@10: 0.20869274688772674} f-score:0.3796945188026042
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 55.72it/s]


sparse_ranking_2019 None
using best alpha=0.1 for ndcg
Interpolation {R(rel=2)@10: 0.2652624093046782, RR(rel=2)@10: 0.862015503875969, nDCG@10: 0.7165204423861107, P(rel=2)@10: 0.6279069767441859, AP(rel=2)@10: 0.21189662633253065} f-score:0.3729642329260645
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 57.80it/s]


sparse_ranking_2019 None
using best alpha=0.1 for map
Interpolation {R(rel=2)@10: 0.2652624093046782, RR(rel=2)@10: 0.862015503875969, nDCG@10: 0.7165204423861107, P(rel=2)@10: 0.6279069767441859, AP(rel=2)@10: 0.21189662633253065} f-score:0.3729642329260645

TESTING RESULTS FOR BEST VALIDATION METRICS FOR normalized
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 61.56it/s]


sparse_ranking_2019 None
using best alpha=0.2 for precision
Interpolation {R(rel=2)@10: 0.27028960969387333, RR(rel=2)@10: 0.8949612403100775, nDCG@10: 0.7229942609361282, P(rel=2)@10: 0.6348837209302324, AP(rel=2)@10: 0.2130699577564767} f-score:0.37915936611368956
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 56.62it/s]


sparse_ranking_2019 None
using best alpha=0.1 for recall
Interpolation {R(rel=2)@10: 0.2719114158943346, RR(rel=2)@10: 0.843798449612403, nDCG@10: 0.717513190926385, P(rel=2)@10: 0.6325581395348836, AP(rel=2)@10: 0.21021356927994342} f-score:0.38033293287532133
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 61.76it/s]


sparse_ranking_2019 None
using best alpha=0.1 for fscore
Interpolation {R(rel=2)@10: 0.2719114158943346, RR(rel=2)@10: 0.843798449612403, nDCG@10: 0.717513190926385, P(rel=2)@10: 0.6325581395348836, AP(rel=2)@10: 0.21021356927994342} f-score:0.38033293287532133
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 66.09it/s]


sparse_ranking_2019 None
using best alpha=0.3 for rr
Interpolation {R(rel=2)@10: 0.2646355873798301, RR(rel=2)@10: 0.9085271317829459, nDCG@10: 0.7068684205361265, P(rel=2)@10: 0.6, AP(rel=2)@10: 0.20725967103552692} f-score:0.36727924398546924
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 55.66it/s]


sparse_ranking_2019 None
using best alpha=0.2 for ndcg
Interpolation {R(rel=2)@10: 0.27028960969387333, RR(rel=2)@10: 0.8949612403100775, nDCG@10: 0.7229942609361282, P(rel=2)@10: 0.6348837209302324, AP(rel=2)@10: 0.2130699577564767} f-score:0.37915936611368956
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 55.44it/s]


sparse_ranking_2019 None
using best alpha=0.2 for map
Interpolation {R(rel=2)@10: 0.27028960969387333, RR(rel=2)@10: 0.8949612403100775, nDCG@10: 0.7229942609361282, P(rel=2)@10: 0.6348837209302324, AP(rel=2)@10: 0.2130699577564767} f-score:0.37915936611368956


In [28]:
with open(
    "../testing/msmarco-test2020-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")


for key in bestHyperparams2019.keys():
    print('\n'+'TESTING RESULTS FOR BEST VALIDATION METRICS FOR {}'.format('normalized' if key == 'ccNormalized' else 'unnormalized'))
    for metric in bestHyperparams2019[key].keys():
        # print(f"best {metric} for {key} is {bestHyperparams2019[key][metric]['value']} with alpha {bestHyperparams2019[key][metric]['alpha']}")
    
        alpha = bestHyperparams2019[key][metric]['alpha']

        result = in_memory_index.get_scores(
            sparse_ranking_2020,
            queries,
            alpha=alpha*1.0,
            cutoff=None,
            early_stopping=False,
            normalise= False if key == 'ccUnnormalized' else True
        )
        # result3 = in_memory_index.get_scores(
        #     sparse_ranking_2020,
        #     queries,
        #     alpha=alpha,
        #     cutoff=None,
        #     early_stopping=False,
        #     normalise=True
        # )
        qrels = list(read_trec_qrels("../testing/2020qrels-pass.txt"))
        print('using best alpha for {}'.format(metric))
        interpolation2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P(rel=2)@10,R(rel=2)@10,AP(rel=2)@10], qrels, result[alpha].run)
        fscoreInterpolation2 = (2*interpolation2[P(rel=2)@10]*interpolation2[R(rel=2)@10])/(interpolation2[P(rel=2)@10]+interpolation2[R(rel=2)@10])
        print(
            f"Interpolation",
            interpolation2,
            'f-score:{}'.format(fscoreInterpolation2)
        )

loaded 200 queries

TESTING RESULTS FOR BEST VALIDATION METRICS FOR unnormalized
Encoding queries for interpolation...


100%|██████████| 54/54 [00:01<00:00, 45.47it/s]


sparse_ranking_2020 None
using best alpha for precision
Interpolation {R(rel=2)@10: 0.391207940493183, RR(rel=2)@10: 0.814814814814815, nDCG@10: 0.6939603772252907, P(rel=2)@10: 0.5333333333333333, AP(rel=2)@10: 0.2980423955640365} f-score:0.4513465019601671
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 57.84it/s]


sparse_ranking_2020 None
using best alpha for recall
Interpolation {R(rel=2)@10: 0.37349994843573076, RR(rel=2)@10: 0.797530864197531, nDCG@10: 0.6714497182268858, P(rel=2)@10: 0.5203703703703701, AP(rel=2)@10: 0.2876461836252215} f-score:0.43486913573863895
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 59.40it/s]


sparse_ranking_2020 None
using best alpha for fscore
Interpolation {R(rel=2)@10: 0.37349994843573076, RR(rel=2)@10: 0.797530864197531, nDCG@10: 0.6714497182268858, P(rel=2)@10: 0.5203703703703701, AP(rel=2)@10: 0.2876461836252215} f-score:0.43486913573863895
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 57.87it/s]


sparse_ranking_2020 None
using best alpha for rr
Interpolation {R(rel=2)@10: 0.37349994843573076, RR(rel=2)@10: 0.797530864197531, nDCG@10: 0.6714497182268858, P(rel=2)@10: 0.5203703703703701, AP(rel=2)@10: 0.2876461836252215} f-score:0.43486913573863895
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 58.99it/s]


sparse_ranking_2020 None
using best alpha for ndcg
Interpolation {R(rel=2)@10: 0.391207940493183, RR(rel=2)@10: 0.814814814814815, nDCG@10: 0.6939603772252907, P(rel=2)@10: 0.5333333333333333, AP(rel=2)@10: 0.2980423955640365} f-score:0.4513465019601671
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 65.64it/s]


sparse_ranking_2020 None
using best alpha for map
Interpolation {R(rel=2)@10: 0.391207940493183, RR(rel=2)@10: 0.814814814814815, nDCG@10: 0.6939603772252907, P(rel=2)@10: 0.5333333333333333, AP(rel=2)@10: 0.2980423955640365} f-score:0.4513465019601671

TESTING RESULTS FOR BEST VALIDATION METRICS FOR normalized
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 61.90it/s]


sparse_ranking_2020 None
using best alpha for precision
Interpolation {R(rel=2)@10: 0.3747600495688947, RR(rel=2)@10: 0.8021604938271607, nDCG@10: 0.6727571514417632, P(rel=2)@10: 0.5203703703703704, AP(rel=2)@10: 0.2869471420830232} f-score:0.43572203882293675
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 57.73it/s]


sparse_ranking_2020 None
using best alpha for recall
Interpolation {R(rel=2)@10: 0.3833530296325536, RR(rel=2)@10: 0.8271604938271606, nDCG@10: 0.6920854730231987, P(rel=2)@10: 0.525925925925926, AP(rel=2)@10: 0.2941106059928702} f-score:0.4434619229522968
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 63.51it/s]


sparse_ranking_2020 None
using best alpha for fscore
Interpolation {R(rel=2)@10: 0.3833530296325536, RR(rel=2)@10: 0.8271604938271606, nDCG@10: 0.6920854730231987, P(rel=2)@10: 0.525925925925926, AP(rel=2)@10: 0.2941106059928702} f-score:0.4434619229522968
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 67.26it/s]


sparse_ranking_2020 None
using best alpha for rr
Interpolation {R(rel=2)@10: 0.36361577768128583, RR(rel=2)@10: 0.7949294532627867, nDCG@10: 0.652755006564619, P(rel=2)@10: 0.4999999999999999, AP(rel=2)@10: 0.2759705606429033} f-score:0.4210388312468706
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 70.04it/s]


sparse_ranking_2020 None
using best alpha for ndcg
Interpolation {R(rel=2)@10: 0.3747600495688947, RR(rel=2)@10: 0.8021604938271607, nDCG@10: 0.6727571514417632, P(rel=2)@10: 0.5203703703703704, AP(rel=2)@10: 0.2869471420830232} f-score:0.43572203882293675
Encoding queries for interpolation...


100%|██████████| 54/54 [00:00<00:00, 62.02it/s]


sparse_ranking_2020 None
using best alpha for map
Interpolation {R(rel=2)@10: 0.3747600495688947, RR(rel=2)@10: 0.8021604938271607, nDCG@10: 0.6727571514417632, P(rel=2)@10: 0.5203703703703704, AP(rel=2)@10: 0.2869471420830232} f-score:0.43572203882293675


In [31]:
with open(
    "../testing/msmarco-test2020-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")


for key in bestHyperparams2019_rrf.keys():
    print('\n'+'TESTING RESULTS FOR BEST VALIDATION METRICS FOR {}'.format('normalized' if key == 'ccNormalized' else 'unnormalized'))
    for metric in bestHyperparams2019_rrf[key].keys():
        # print(f"best {metric} for {key} is {bestHyperparams2019[key][metric]['value']} with alpha {bestHyperparams2019[key][metric]['alpha']}")
        eta1 = bestHyperparams2019_rrf[key][metric]['eta1']
        eta2 = bestHyperparams2019_rrf[key][metric]['eta2']
        alpha = 0.0
        result = in_memory_index.get_scores_rrf(
            sparse_ranking_2020,
            queries,
            alpha=alpha,
            cutoff=None,
            early_stopping=False,
            eta = eta1,
            eta2 = eta2
        )
        qrels = list(read_trec_qrels("../testing/2020qrels-pass.txt"))
        print('using best eta1={} and eta2={} for {}'.format(eta1, eta2, metric))
        interpolation2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P(rel=2)@10,R(rel=2)@10,AP(rel=2)@10], qrels, result[alpha].run)
        fscoreInterpolation2 = (2*interpolation2[P(rel=2)@10]*interpolation2[R(rel=2)@10])/(interpolation2[P(rel=2)@10]+interpolation2[R(rel=2)@10])
        print(
            f"RRF",
            interpolation2,
            'f-score:{}'.format(fscoreInterpolation2)
        )

loaded 200 queries

TESTING RESULTS FOR BEST VALIDATION METRICS FOR unnormalized
Encoding queries for RRF...


100%|██████████| 54/54 [00:01<00:00, 51.73it/s]


using best alpha for precision
RRF {R(rel=2)@10: 0.3702476168178879, RR(rel=2)@10: 0.79320987654321, nDCG@10: 0.6793628703705968, P(rel=2)@10: 0.5037037037037037, AP(rel=2)@10: 0.28518417374234645} f-score:0.42678600397865596
Encoding queries for RRF...


100%|██████████| 54/54 [00:00<00:00, 66.42it/s]


using best alpha for recall
RRF {R(rel=2)@10: 0.3702476168178879, RR(rel=2)@10: 0.79320987654321, nDCG@10: 0.6793628703705968, P(rel=2)@10: 0.5037037037037037, AP(rel=2)@10: 0.28518417374234645} f-score:0.42678600397865596
Encoding queries for RRF...


100%|██████████| 54/54 [00:00<00:00, 61.62it/s]


using best alpha for fscore
RRF {R(rel=2)@10: 0.3702476168178879, RR(rel=2)@10: 0.79320987654321, nDCG@10: 0.6793628703705968, P(rel=2)@10: 0.5037037037037037, AP(rel=2)@10: 0.28518417374234645} f-score:0.42678600397865596
Encoding queries for RRF...


100%|██████████| 54/54 [00:00<00:00, 63.22it/s]


KeyboardInterrupt: 

In [None]:
with open(
    "../testing/msmarco-test2020-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

for key in bestHyperparams2019_rrf.keys():
    result2 = in_memory_index.get_scores_rrf(
        sparse_ranking_2020,
        queries,
        alpha=alpha,
        cutoff=None,
        early_stopping=False,
        eta = eta,
        eta2 = eta2,
    )
    qrels = list(read_trec_qrels("../testing/2020qrels-pass.txt"))
    rrf2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result2[alpha].run)
    fscoreRRF2 = (2*rrf2[P@10]*rrf2[R@10])/(rrf2[P@10]+rrf2[R@10])
    print(
        f"RRF",
        rrf2,
        'f-score:{}'.format(fscoreRRF2)
    )

In [None]:
with open(
    "../testing/msmarco-test2019-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

# define dictionary to keep track of best results for tested hyperparameters
bestHyperparams2019 = {
    'ccUnnormalized': {'precision': {'value': -1, 'alpha': 0}, 'recall': {'value': -1, 'alpha': 0}, 'fscore': {'value': -1, 'alpha': 0}, 'rr': {'value': -1, 'alpha': 0}, 'ndcg': {'value': -1, 'alpha': 0}},
    'rrf': {'precision': {'value': -1, 'eta': 0}, 'recall': {'value': -1, 'eta': 0}, 'fscore': {'value': -1, 'eta': 0}, 'rr': {'value': -1, 'eta': 0}, 'ndcg': {'value': -1, 'eta': 0}},
    'ccNormalized': {'precision': {'value': -1, 'alpha': 0}, 'recall': {'value': -1, 'alpha': 0}, 'fscore': {'value': -1, 'alpha': 0}, 'rr': {'value': -1, 'alpha': 0}, 'ndcg': {'value': -1, 'alpha': 0}},
}


for i in tqdm(range(len(etas))):
    if alphas[i] == -1:
        alpha = 1
    else:
        alpha = alphas[i]
    eta = etas[i]

    result = in_memory_index.get_scores(
        sparse_ranking_2019,
        queries,
        alpha=alpha,
        cutoff=None,
        early_stopping=False,
        normalise=False
    )
    result2 = in_memory_index.get_scores_rrf(
        sparse_ranking_2019,
        queries,
        alpha=alpha,
        cutoff=None,
        early_stopping=False,
        eta = eta
    )
    result3 = in_memory_index.get_scores(
        sparse_ranking_2019,
        queries,
        alpha=alpha,
        cutoff=None,
        early_stopping=False,
        normalise=True
    )
    qrels = list(read_trec_qrels("../testing/2019qrels-pass.txt"))
    bm25Res = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, sparse_ranking_2019.run)
    fscoreBM25Res = (2*bm25Res[P@10]*bm25Res[R@10])/(bm25Res[P@10]+bm25Res[R@10])
    # print(list(bm25Res.keys())[0])
    # print(
    #     "BM25",
    #     bm25Res,
    #     'f-score:{}'.format(fscoreBM25Res)
    # )

    interpolationRes = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result[alpha].run)
    fscoreInterpolationRes = (2*interpolationRes[P@10]*interpolationRes[R@10])/(interpolationRes[P@10]+interpolationRes[R@10])
    # print(
    #     f"Interpolation",
    #     interpolationRes,
    #     'f-score:{}'.format(fscoreInterpolationRes)
    # )

    rrfRes = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result2[alpha].run)
    fscoreRRRes = (2*rrfRes[P@10]*rrfRes[R@10])/(rrfRes[P@10]+rrfRes[R@10])
    # print(
    #     f"RRF",
    #     rrfRes,
    #     'f-score:{}'.format(fscoreRRRes)
    # )

    interpolationMinMax = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result3[alpha].run)
    fscoreInterpolationMinMax = (2*interpolationMinMax[P@10]*interpolationMinMax[R@10])/(interpolationMinMax[P@10]+interpolationMinMax[R@10])
    # print(
    #     f"Interpolation w/ minimax",
    #     interpolationMinMax,
    #     'f-score:{}'.format(fscoreInterpolationMinMax)
    # )

    # update dictionary of best parameters per method
    if interpolationRes[P@10] > bestHyperparams2019['ccUnnormalized']['precision']['value']:
        bestHyperparams2019['ccUnnormalized']['precision']['value'] = interpolationRes[P@10]
        bestHyperparams2019['ccUnnormalized']['precision']['alpha'] = alpha

    if interpolationRes[R@10] > bestHyperparams2019['ccUnnormalized']['recall']['value']:
        bestHyperparams2019['ccUnnormalized']['recall']['value'] = interpolationRes[R@10]
        bestHyperparams2019['ccUnnormalized']['recall']['alpha'] = alpha

    # try:
    #     if interpolationRes[nDCG@10] > bestHyperparams2019['ccUnnormalized']['ndcg']['value']:
    #         bestHyperparams2019['ccUnnormalized']['ndcg']['value'] = interpolationRes[nDCG@10]
    #         bestHyperparams2019['ccUnnormalized']['recall']['alpha'] = alpha
    # except:
    #     print(interpolationRes)
    #     print(bestHyperparams2019['ccNormalized']['ndcg']['value'])
    if interpolationRes[nDCG@10] > bestHyperparams2019['ccUnnormalized']['ndcg']['value']:
            bestHyperparams2019['ccUnnormalized']['ndcg']['value'] = interpolationRes[nDCG@10]
            bestHyperparams2019['ccUnnormalized']['recall']['alpha'] = alpha

    if interpolationRes[RR(rel=2)@10] > bestHyperparams2019['ccUnnormalized']['rr']['value']:
        bestHyperparams2019['ccUnnormalized']['rr']['value'] = interpolationRes[RR(rel=2)@10]
        bestHyperparams2019['ccUnnormalized']['recall']['alpha'] = alpha
    
    if fscoreInterpolationRes > bestHyperparams2019['ccUnnormalized']['fscore']['value']:
        bestHyperparams2019['ccUnnormalized']['fscore']['value'] = fscoreInterpolationRes
        bestHyperparams2019['ccUnnormalized']['recall']['alpha'] = alpha

    if rrfRes[P@10] > bestHyperparams2019['rrf']['precision']['value']:
        bestHyperparams2019['rrf']['precision']['value'] = rrfRes[P@10]
        bestHyperparams2019['rrf']['precision']['eta'] = eta


    if rrfRes[R@10] > bestHyperparams2019['rrf']['recall']['value']:
        bestHyperparams2019['rrf']['recall']['value'] = rrfRes[R@10]
        bestHyperparams2019['rrf']['recall']['eta'] = eta


    if rrfRes[nDCG@10] > bestHyperparams2019['rrf']['ndcg']['value']:
        bestHyperparams2019['rrf']['ndcg']['value'] = rrfRes[nDCG@10]
        bestHyperparams2019['rrf']['ndcg']['eta'] = eta

    if rrfRes[RR(rel=2)@10] > bestHyperparams2019['rrf']['rr']['value']:
        bestHyperparams2019['rrf']['rr']['value'] = rrfRes[RR(rel=2)@10]
        bestHyperparams2019['rrf']['rr']['eta'] = eta
    
    if fscoreRRRes > bestHyperparams2019['rrf']['fscore']['value']:
        bestHyperparams2019['rrf']['fscore']['value'] = fscoreRRRes
        bestHyperparams2019['rrf']['fscore']['eta'] = eta
    
    if interpolationMinMax[P@10] > bestHyperparams2019['ccNormalized']['precision']['value']:
        bestHyperparams2019['ccNormalized']['precision']['value'] = interpolationMinMax[P@10]
        bestHyperparams2019['ccNormalized']['precision']['alpha'] = alpha

    if interpolationMinMax[R@10] > bestHyperparams2019['ccNormalized']['recall']['value']:
        bestHyperparams2019['ccNormalized']['recall']['value'] = interpolationMinMax[R@10]
        bestHyperparams2019['ccNormalized']['recall']['alpha'] = alpha

    if interpolationMinMax[nDCG@10] > bestHyperparams2019['ccNormalized']['ndcg']['value']:
        bestHyperparams2019['ccNormalized']['ndcg']['value'] = interpolationMinMax[nDCG@10]
        bestHyperparams2019['ccNormalized']['ndcg']['alpha'] = alpha

    if interpolationMinMax[RR(rel=2)@10] > bestHyperparams2019['ccNormalized']['rr']['value']:
        bestHyperparams2019['ccNormalized']['rr']['value'] = interpolationMinMax[RR(rel=2)@10]
        bestHyperparams2019['ccNormalized']['rr']['alpha'] = alpha
    
    if fscoreInterpolationMinMax > bestHyperparams2019['ccNormalized']['fscore']['value']:
        bestHyperparams2019['ccNormalized']['fscore']['value'] = fscoreInterpolationMinMax
        bestHyperparams2019['ccNormalized']['fscore']['alpha'] = alpha

loaded 200 queries


  0%|          | 0/11 [00:00<?, ?it/s]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:01<00:00, 35.89it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 64.91it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 47.91it/s]


sparse_ranking_2019 None


  9%|▉         | 1/11 [00:26<04:28, 26.83s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 45.29it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 50.99it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 43.88it/s]


sparse_ranking_2019 None


 18%|█▊        | 2/11 [00:53<03:59, 26.57s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 61.01it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 53.63it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 67.74it/s]


sparse_ranking_2019 None


 27%|██▋       | 3/11 [01:18<03:29, 26.18s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 58.27it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 53.79it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 51.93it/s]


sparse_ranking_2019 None


 36%|███▋      | 4/11 [01:44<03:02, 26.06s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 48.65it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 55.87it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 50.32it/s]


sparse_ranking_2019 None


 45%|████▌     | 5/11 [02:11<02:36, 26.11s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 50.21it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 49.76it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 62.78it/s]


sparse_ranking_2019 None


 55%|█████▍    | 6/11 [02:36<02:10, 26.02s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 44.64it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 47.38it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 45.37it/s]


sparse_ranking_2019 None


 64%|██████▎   | 7/11 [03:03<01:44, 26.19s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 45.84it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 53.08it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:01<00:00, 26.74it/s]


sparse_ranking_2019 None


 73%|███████▎  | 8/11 [03:31<01:19, 26.65s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 60.08it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 57.98it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:01<00:00, 35.19it/s]


sparse_ranking_2019 None


 82%|████████▏ | 9/11 [03:57<00:53, 26.56s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 53.31it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 45.42it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:01<00:00, 39.45it/s]


sparse_ranking_2019 None


 91%|█████████ | 10/11 [04:24<00:26, 26.75s/it]

Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 52.89it/s]


sparse_ranking_2019 None
Encoding queries for RRF...


100%|██████████| 43/43 [00:00<00:00, 64.42it/s]


sparse_ranking_2019 None
Encoding queries for interpolation...


100%|██████████| 43/43 [00:00<00:00, 54.53it/s]


sparse_ranking_2019 None


100%|██████████| 11/11 [04:50<00:00, 26.39s/it]


In [None]:
print(bestHyperparams2019['ccNormalized'])
print(bestHyperparams2019['ccUnnormalized'])
print(bestHyperparams2019['rrf'])

{'precision': {'value': 0.8162790697674417, 'alpha': 0.2}, 'recall': {'value': 0.1738814669940891, 'alpha': 0.3}, 'fscore': {'value': 0.2862453288247246, 'alpha': 0.2}, 'rr': {'value': 0.9085271317829459, 'alpha': 0.3}, 'ndcg': {'value': 0.7229942609361282, 'alpha': 0.2}}
{'precision': {'value': 0.8186046511627906, 'alpha': 0.1}, 'recall': {'value': 0.17310336499562762, 'alpha': 0.2}, 'fscore': {'value': 0.28505912645948106, 'alpha': 0}, 'rr': {'value': 0.901937984496124, 'alpha': 0}, 'ndcg': {'value': 0.7165204423861108, 'alpha': 0}}
{'precision': {'value': 0.786046511627907, 'eta': 30}, 'recall': {'value': 0.16940453859382448, 'eta': 30}, 'fscore': {'value': 0.2787371400862628, 'eta': 30}, 'rr': {'value': 0.8976744186046512, 'eta': 20}, 'ndcg': {'value': 0.6941555201129092, 'eta': 30}}


In [None]:
with open(
    "../../dev/testing/msmarco-test2020-queries.tsv",
    encoding="utf-8",
    newline=""
) as fp:
    queries = {q_id: q for q_id, q in csv.reader(fp, delimiter="\t")}
print(f"loaded {len(queries)} queries")

# define dictionary to keep track of best results for tested hyperparameters
bestHyperparams2020 = {
    'ccUnnormalized': {'precision': -1, 'recall': -1, 'fscore': -1, 'rr': -1, 'ndcg': -1},
    'rrf': {'precision': -1, 'recall': -1, 'fscore': -1, 'rr': -1, 'ndcg': -1},
    'ccNormalized': {'precision': -1, 'recall': -1, 'fscore': -1, 'rr': -1, 'ndcg': -1}
}

alpha = 0.2
eta = 60
# for i in range(len(etas)):
#     if alphas[i] == -1:
#         alpha = 1
#     eta = etas[i]

result = in_memory_index.get_scores(
    sparse_ranking_2020,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=False
)
result2 = in_memory_index.get_scores_rrf(
    sparse_ranking_2020,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    eta = eta
)
result3 = in_memory_index.get_scores(
    sparse_ranking_2020,
    queries,
    alpha=alpha,
    cutoff=None,
    early_stopping=False,
    normalise=True
)
qrels = list(read_trec_qrels("../../dev/testing/2020qrels-pass.txt"))
bm25Res2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, sparse_ranking_2020.run)
fscoreBM25Res2 = (2*bm25Res2[P@10]*bm25Res2[R@10])/(bm25Res2[P@10]+bm25Res2[R@10])
print(
    "BM25",
    bm25Res2,
    'f-score:{}'.format(fscoreBM25Res2)
)


interpolation2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result[alpha].run)
fscoreInterpolation2 = (2*interpolation2[P@10]*interpolation2[R@10])/(interpolation2[P@10]+interpolation2[R@10])
print(
    f"Interpolation",
    interpolation2,
    'f-score:{}'.format(fscoreInterpolation2)
)

rrf2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result2[alpha].run)
fscoreRRF2 = (2*rrf2[P@10]*rrf2[R@10])/(rrf2[P@10]+rrf2[R@10])
print(
    f"RRF",
    rrf2,
    'f-score:{}'.format(fscoreRRF2)
)


interpolationMinmax2 = calc_aggregate([nDCG@10, RR(rel=2)@10,P@10,R@10], qrels, result3[alpha].run)
fscoreInterpolationMinMax2 = (2*interpolationMinmax2[P@10]*interpolationMinmax2[R@10])/(interpolationMinmax2[P@10]+interpolationMinmax2[R@10])
print(
    f"Interpolation w/ minimax",
    interpolationMinmax2,
    'f-score:{}'.format(fscoreInterpolationMinMax2)
)

   # update dictionary of best parameters per method
    # if interpolation2[P@10] > bestHyperparams2020['ccUnnormalized']['precision']:
    #     bestHyperparams2020['ccUnnormalized']['precision'] = interpolation2[P@10]

    # if interpolation2[R@10] > bestHyperparams2020['ccUnnormalized']['recall']:
    #     bestHyperparams2020['ccUnnormalized']['recall'] = interpolation2[R@10]

    # if interpolation2[nDCG@10] > bestHyperparams2020['ccUnnormalized']['ndcg']:
    #     bestHyperparams2020['ccUnnormalized']['ndcg'] = interpolation2[nDCG@10]

    # if interpolation2[RR(rel=2)@10] > bestHyperparams2020['ccUnnormalized']['rr']:
    #     bestHyperparams2020['ccUnnormalized']['rr'] = interpolation2[RR(rel=2)@10]
    
    # if fscoreInterpolation2 > bestHyperparams2020['ccUnnormalized']['fscore']:
    #     bestHyperparams2020['ccUnnormalized']['fscore'] = fscoreInterpolation2

    # if rrf2[P@10] > bestHyperparams2020['rrf']['precision']:
    #     bestHyperparams2020['rrf']['precision'] = rrf2[P@10]

    # if rrf2[R@10] > bestHyperparams2020['rrf']['recall']:
    #     bestHyperparams2020['rrf']['recall'] = rrf2[R@10]

    # if rrf2[nDCG@10] > bestHyperparams2020['rrf']['ndcg']:
    #     bestHyperparams2020['rrf']['ndcg'] = rrf2[nDCG@10]

    # if rrf2[RR(rel=2)@10] > bestHyperparams2020['rrf']['rr']:
    #     bestHyperparams2020['rrf']['rr'] = rrf2[RR(rel=2)@10]
    
    # if fscoreRRF2 > bestHyperparams2020['rrf']['fscore']:
    #     bestHyperparams2020['rrf']['fscore'] = fscoreRRF2
    
    # if interpolationMinmax2[P@10] > bestHyperparams2020['ccNormalized']['precision']:
    #     bestHyperparams2020['ccNormalized']['precision'] = interpolationMinmax2[P@10]

    # if interpolationMinmax2[R@10] > bestHyperparams2020['ccNormalized']['recall']:
    #     bestHyperparams2020['ccNormalized']['recall'] = interpolationMinmax2[R@10]

    # if interpolationMinmax2[nDCG@10] > bestHyperparams2020['ccNormalized']['ndcg']:
    #     bestHyperparams2020['ccNormalized']['ndcg'] = interpolationMinmax2[nDCG@10]

    # if interpolationMinmax2[RR(rel=2)@10] > bestHyperparams2020['ccNormalized']['rr']:
    #     bestHyperparams2020['ccNormalized']['rr'] = interpolationMinmax2[RR(rel=2)@10]
    
    # if fscoreInterpolationMinMax2 > bestHyperparams2020['ccNormalized']['fscore']:
    #     bestHyperparams2020['ccNormalized']['fscore'] = fscoreInterpolationMinMax2

    