In [11]:
import pyterrier as pt
import os
os.environ["JAVA_HOME"] = "/home/andrew/Java"
os.environ["JVM_PATH"] = '/home/andrew/Java/jre/lib/server/libjvm.so'
if not pt.started():
    pt.init()
import json
import pandas as pd
import numpy as np
import itertools
from ir_measures import *
from ir_measures import evaluator
import ir_datasets as irds
from collections import defaultdict

# Fusion

In [146]:
with open('/home/andrew/Documents/Data/paced/scores.json') as f:
    score_dict = json.load(f)

In [168]:
bm25 = pd.read_csv('/home/andrew/Documents/Data/paced/bm25.csv', index_col=False).rename(columns={'qid': 'query_id', 'docid': 'doc_id'})

In [149]:
all_comb = map(lambda x : [x], score_dict.keys())
all_comb.append(list(score_dict.keys()))

In [151]:
DEFAULT = 10000

In [152]:
#score_dict = {k : {_k : defaultdict(lambda : (DEFAULT, DEFAULT), _v) for _k, _v in v.items()} for k, v in score_dict.items()}

In [153]:
def get_fusion_scores(target_sets, qids, C=0):
    records = []
    if len(target_sets) == 1:
        target = target_sets[0]
        for qid in qids:
            for doc_id, (score, rank) in target[qid].items():
                records.append({
                    'query_id': str(qid),
                    'doc_id': str(doc_id),
                    'score': score,
                })
        return pd.DataFrame.from_records(records)
    for qid in qids:
        all_sets = [set(target[qid].keys()) for target in target_sets]
        candidates = all_sets[0].union(*all_sets[1:])
        for candidate in candidates:
            for target in target_sets:
                if candidate not in target[qid]:
                    target[qid][candidate] = (0, 10000)
            scores = [1 / (C + target[qid][candidate][1] + 1) for target in target_sets]
            score = np.mean(scores)
            records.append({
                'query_id': str(qid),
                'doc_id': str(candidate),
                'score': score,
            })   
    return pd.DataFrame.from_records(records, dtype)

In [154]:
from tqdm.notebook import tqdm
def get_res(targetx, qids, evaluate_func):
    all = []
    for comb in tqdm(targetx):
        name = '_'.join(comb)
        targets = [score_dict[c] for c in comb]
        print(comb)
        res = get_fusion_scores(targets, qids)
        metrics = evaluate_func.calc_aggregate(res)
        metrics['name'] = name
        all.append(metrics)
        
    return pd.DataFrame.from_records(all)

In [196]:
qrels = irds.load('msmarco-passage/trec-dl-2019/judged').qrels_iter()
qidx = pd.DataFrame(qrels).drop_duplicates('query_id').query_id.to_list()

In [198]:
for q in list(qrels):
    print(q)

In [186]:
evaluate = evaluator([nDCG(cutoff=10), R(cutoff=100), R(cutoff=1000), AP(rel=2), RR, RR(cutoff=10), P(rel=2, cutoff=10), P(rel=2, cutoff=100)], qrels)

In [187]:
single = all_comb[:8]

In [188]:
sing = []
for comb in tqdm(single):
    name = '_'.join(comb)
    targets = [score_dict[c] for c in comb]
    res = get_fusion_scores(targets, qidx)
    metrics = evaluate.calc_aggregate(res)
    print(metrics)
    metrics['name'] = name
    sing.append(metrics)
    
sing_res = pd.DataFrame.from_records(sing)

  0%|          | 0/8 [00:00<?, ?it/s]

{RR: 0.795028067361668, R@1000: 0.7361896623436666, P(rel=2)@100: 0.19837209302325579, nDCG@10: 0.4795398322478302, AP(rel=2): 0.2892885548598652, P(rel=2)@10: 0.3883720930232557, RR@10: 0.7943798449612403, R@100: 0.44216959593012517}
{RR: 0.7999811597981155, R@1000: 0.7151644083941056, P(rel=2)@100: 0.19767441860465115, nDCG@10: 0.5025129769993442, AP(rel=2): 0.3122462915682076, P(rel=2)@10: 0.42558139534883715, RR@10: 0.7965116279069767, R@100: 0.4256396605563712}
{RR: 0.7901227131092042, R@1000: 0.7542767732682121, P(rel=2)@100: 0.20860465116279067, nDCG@10: 0.5155947995973377, AP(rel=2): 0.3122628149078852, P(rel=2)@10: 0.44186046511627913, RR@10: 0.7894056847545219, R@100: 0.46365963585710324}
{RR: 0.7881239079364931, R@1000: 0.7510698325512497, P(rel=2)@100: 0.20441860465116277, nDCG@10: 0.508619963656044, AP(rel=2): 0.3093109273058641, P(rel=2)@10: 0.4232558139534884, RR@10: 0.7874677002583979, R@100: 0.4541612675508345}
{RR: 0.7594837713242525, R@1000: 0.761899003921345, P(rel=

In [189]:
sing_res

Unnamed: 0,RR,R@1000,P(rel=2)@100,nDCG@10,AP(rel=2),P(rel=2)@10,RR@10,R@100,name
0,0.795028,0.73619,0.198372,0.47954,0.289289,0.388372,0.79438,0.44217,bm25
1,0.799981,0.715164,0.197674,0.502513,0.312246,0.425581,0.796512,0.42564,dph
2,0.790123,0.754277,0.208605,0.515595,0.312263,0.44186,0.789406,0.46366,bm25_rm3
3,0.788124,0.75107,0.204419,0.50862,0.309311,0.423256,0.787468,0.454161,bm25_bo1
4,0.759484,0.761899,0.222093,0.523149,0.345362,0.455814,0.75742,0.462245,dph_rm3
5,0.755152,0.772479,0.223488,0.530888,0.349507,0.467442,0.749806,0.463462,dph_bo1
6,0.798129,0.735844,0.199767,0.480234,0.289903,0.388372,0.797481,0.445889,smd_bm25
7,0.788481,0.713555,0.19814,0.49869,0.314384,0.42093,0.784884,0.429089,smd_dph


In [169]:
bm25

Unnamed: 0,query_id,doc_id,docno,rank,score,query
0,156493,8182161,8182161,0,31.156444,do goldfish grow
1,156493,6139386,6139386,1,31.032333,do goldfish grow
2,156493,3288600,3288600,2,30.953843,do goldfish grow
3,156493,3288596,3288596,3,30.754021,do goldfish grow
4,156493,2259183,2259183,4,30.362477,do goldfish grow
...,...,...,...,...,...,...
42000,146187,4239826,4239826,995,13.695066,difference between a mcdouble and a double che...
42001,146187,4874233,4874233,996,13.695066,difference between a mcdouble and a double che...
42002,146187,6065118,6065118,997,13.695066,difference between a mcdouble and a double che...
42003,146187,7392175,7392175,998,13.695066,difference between a mcdouble and a double che...


In [170]:
bm25 = bm25[['query_id', 'doc_id', 'score']].astype({'query_id': str, 'doc_id': str})

In [165]:
# convert bm25 to dict of qids to dict of doci to scores
bm25 = bm25.groupby('query_id').apply(lambda x: x.set_index('doc_id')['score'].to_dict()).to_dict()

In [183]:
import ir_measures
for x in ir_measures.iter_calc([nDCG@10, P@5, P(rel=2)@5, Judged@10], qrels, bm25):
    print(x)

Metric(query_id='156493', measure=P@5, value=1.0)
Metric(query_id='156493', measure=nDCG@10, value=0.9634117275431489)
Metric(query_id='1110199', measure=P@5, value=0.6)
Metric(query_id='1110199', measure=nDCG@10, value=0.3706750815692151)
Metric(query_id='1063750', measure=P@5, value=0.2)
Metric(query_id='1063750', measure=nDCG@10, value=0.14281093555271182)
Metric(query_id='130510', measure=P@5, value=1.0)
Metric(query_id='130510', measure=nDCG@10, value=0.6073129006331783)
Metric(query_id='489204', measure=P@5, value=0.4)
Metric(query_id='489204', measure=nDCG@10, value=0.3233688044978225)
Metric(query_id='573724', measure=P@5, value=0.6)
Metric(query_id='573724', measure=nDCG@10, value=0.34278733477024653)
Metric(query_id='168216', measure=P@5, value=1.0)
Metric(query_id='168216', measure=nDCG@10, value=1.0)
Metric(query_id='1133167', measure=P@5, value=1.0)
Metric(query_id='1133167', measure=nDCG@10, value=0.5707912167619575)
Metric(query_id='527433', measure=P@5, value=0.4)
Metri

In [190]:
all = []
for comb in tqdm(all_comb):
    name = '_'.join(comb)
    targets = [score_dict[c] for c in comb]
    print(comb)
    res = get_fusion_scores(targets, qidx)
    metrics = evaluate.calc_aggregate(res)
    metrics['name'] = name
    all.append(metrics)
    
all_res = pd.DataFrame.from_records(all)

  0%|          | 0/255 [00:00<?, ?it/s]

['bm25']
['dph']
['bm25_rm3']
['bm25_bo1']
['dph_rm3']
['dph_bo1']
['smd_bm25']
['smd_dph']
['bm25', 'dph']
['bm25', 'bm25_rm3']
['bm25', 'bm25_bo1']
['bm25', 'dph_rm3']
['bm25', 'dph_bo1']
['bm25', 'smd_bm25']
['bm25', 'smd_dph']
['dph', 'bm25_rm3']
['dph', 'bm25_bo1']
['dph', 'dph_rm3']
['dph', 'dph_bo1']
['dph', 'smd_bm25']
['dph', 'smd_dph']
['bm25_rm3', 'bm25_bo1']
['bm25_rm3', 'dph_rm3']
['bm25_rm3', 'dph_bo1']
['bm25_rm3', 'smd_bm25']
['bm25_rm3', 'smd_dph']
['bm25_bo1', 'dph_rm3']
['bm25_bo1', 'dph_bo1']
['bm25_bo1', 'smd_bm25']
['bm25_bo1', 'smd_dph']
['dph_rm3', 'dph_bo1']
['dph_rm3', 'smd_bm25']
['dph_rm3', 'smd_dph']
['dph_bo1', 'smd_bm25']
['dph_bo1', 'smd_dph']
['smd_bm25', 'smd_dph']
['bm25', 'dph', 'bm25_rm3']
['bm25', 'dph', 'bm25_bo1']
['bm25', 'dph', 'dph_rm3']
['bm25', 'dph', 'dph_bo1']
['bm25', 'dph', 'smd_bm25']
['bm25', 'dph', 'smd_dph']
['bm25', 'bm25_rm3', 'bm25_bo1']
['bm25', 'bm25_rm3', 'dph_rm3']
['bm25', 'bm25_rm3', 'dph_bo1']
['bm25', 'bm25_rm3', 'smd_bm25

In [191]:
all_res.to_csv('/home/andrew/Documents/Data/paced/all_res.tsv', sep='\t', index=False)

In [193]:
all_res.head(20)

Unnamed: 0,RR,R@1000,P(rel=2)@100,nDCG@10,AP(rel=2),P(rel=2)@10,RR@10,R@100,name
0,0.795028,0.73619,0.198372,0.47954,0.289289,0.388372,0.79438,0.44217,bm25
1,0.799981,0.715164,0.197674,0.502513,0.312246,0.425581,0.796512,0.42564,dph
2,0.790123,0.754277,0.208605,0.515595,0.312263,0.44186,0.789406,0.46366,bm25_rm3
3,0.788124,0.75107,0.204419,0.50862,0.309311,0.423256,0.787468,0.454161,bm25_bo1
4,0.759484,0.761899,0.222093,0.523149,0.345362,0.455814,0.75742,0.462245,dph_rm3
5,0.755152,0.772479,0.223488,0.530888,0.349507,0.467442,0.749806,0.463462,dph_bo1
6,0.798129,0.735844,0.199767,0.480234,0.289903,0.388372,0.797481,0.445889,smd_bm25
7,0.788481,0.713555,0.19814,0.49869,0.314384,0.42093,0.784884,0.429089,smd_dph
8,0.819929,0.73485,0.204651,0.500367,0.304606,0.425581,0.819897,0.449876,bm25_dph
9,0.78233,0.756277,0.205814,0.494619,0.299851,0.423256,0.824289,0.459397,bm25_bm25_rm3


# Standard Eval

In [12]:
import os

In [13]:
DIR = '/home/andrew/Documents/Data/paced/members/'

In [14]:
files = [f for f in os.listdir(DIR)]

In [15]:
qrels = pd.DataFrame(irds.load('msmarco-passage/trec-dl-2019/judged').qrels_iter())
#qidx = pd.DataFrame(qrels).drop_duplicates('query_id').query_id.to_list()

In [16]:
evaluate = evaluator([nDCG(cutoff=10), R(cutoff=100), R(cutoff=1000), AP(rel=2), RR, RR(cutoff=10), P(rel=2, cutoff=10), P(rel=2, cutoff=100)], qrels)

In [17]:
all_res = {}
for f in files:
    tmp = []
    res = pd.read_csv(os.path.join(DIR, f), index_col=False)
    for row in res.itertuples():
        tmp.append(
            {
                'query_id': str(row.qid),
                'doc_id': str(row.docno),
                'score' : row.score
            }
        )
    all_res[f] = pd.DataFrame.from_records(tmp)

In [18]:
all_metric = []
for f in files:
    res = all_res[f]
    print(f)
    print(res.head())
    metrics = evaluate.calc_aggregate(res)
    metrics['name'] = f
    all_metric.append(metrics)
    
all_metric = pd.DataFrame.from_records(all_metric)

bo1_dir_LM.csv
  query_id   doc_id      score
0  1037798  3620983  33.590197
1  1037798  8760873  29.805606
2  1037798  8760864  28.535555
3  1037798  8760867  27.323485
4  1037798  3641634  26.766174
smd_bm25.csv
  query_id   doc_id      score
0  1037798  8760864  26.007429
1  1037798  8760867  25.904425
2  1037798  3641634  25.410048
3  1037798  3620983  24.271134
4  1037798  4788864  24.007855
rm3_DPH.csv
  query_id   doc_id      score
0  1037798  3641634  17.549590
1  1037798  8760864  16.607452
2  1037798  3620983  13.873839
3  1037798  8760867  13.870762
4  1037798  8760873  12.866632
smd_dir_LM.csv
  query_id   doc_id      score
0  1037798  3641634  16.182809
1  1037798  8760864  15.746913
2  1037798  3620983  15.611668
3  1037798  8760867  15.226994
4  1037798  8760873  14.638440
DPH.csv
  query_id   doc_id      score
0   156493  2411918  14.820482
1   156493  6139386  14.592342
2   156493  8182166  14.150628
3   156493  1277727  13.628203
4   156493   885944  13.615840
kl_DPH.

In [19]:
all_metric

Unnamed: 0,P(rel=2)@100,AP(rel=2),R@1000,R@100,nDCG@10,RR,RR@10,P(rel=2)@10,name
0,0.162558,0.257223,0.68396,0.366888,0.461009,0.69968,0.695349,0.397674,bo1_dir_LM.csv
1,0.203023,0.310669,0.735046,0.463474,0.522755,0.805261,0.805233,0.444186,smd_bm25.csv
2,0.222093,0.344307,0.761899,0.462245,0.523149,0.759464,0.75742,0.455814,rm3_DPH.csv
3,0.205581,0.311458,0.740987,0.458714,0.509298,0.792534,0.792534,0.448837,smd_dir_LM.csv
4,0.197674,0.308977,0.715164,0.42564,0.502513,0.799962,0.796512,0.425581,DPH.csv
5,0.222791,0.34741,0.769047,0.461981,0.529974,0.76735,0.761849,0.465116,kl_DPH.csv
6,0.19814,0.311069,0.713555,0.429089,0.49869,0.788462,0.784884,0.42093,smd_DPH.csv
7,0.163721,0.25151,0.666246,0.348112,0.443779,0.696516,0.692276,0.388372,rm3_dir_LM.csv
8,0.216977,0.339016,0.77242,0.481293,0.530915,0.782787,0.777907,0.448837,rm3_bm25.csv
9,0.223488,0.348516,0.772479,0.463462,0.530888,0.755133,0.749806,0.467442,bo1_DPH.csv


In [21]:
all_metric.to_csv('/home/andrew/Documents/Data/paced/members.csv', index=False)