# Create the Table Evaluation Retrieval Effectiveness

In [14]:
import pandas as pd


df = pd.read_json('../../best-and-worst-case-evaluation.jsonl', lines=True, orient='records')
df

Unnamed: 0,name,map,recip_rank,P_10,ndcg_cut_10,map +,map -,map p-value,map reject,map p-value corrected,...,P_10 +,P_10 -,P_10 p-value,P_10 reject,P_10 p-value corrected,ndcg_cut_10 +,ndcg_cut_10 -,ndcg_cut_10 p-value,ndcg_cut_10 reject,ndcg_cut_10 p-value corrected
0,t5 (original),0.512556,0.943798,0.832558,0.726904,,,,False,,...,,,,False,,,,,False,
1,t5 (best_case),0.588901,0.959302,0.890698,0.754914,41.0,0.0,0.0,True,0.0,...,15.0,0.0,0.000358,True,0.001073,23.0,11.0,0.002245,True,0.006734
2,t5 (worst_case),0.475261,0.908915,0.74186,0.669099,0.0,41.0,1.0231e-06,True,3.0693e-06,...,0.0,22.0,2e-06,True,7e-06,0.0,27.0,2e-06,True,6e-06
3,electra (original),0.535566,0.947674,0.830233,0.730119,,,,False,,...,,,,False,,,,,False,
4,electra (best_case),0.577848,0.96124,0.855814,0.742733,41.0,0.0,3e-10,True,9e-10,...,10.0,0.0,0.001462,True,0.004385,20.0,10.0,0.018893,False,0.056679
5,electra (worst_case),0.514689,0.932171,0.795349,0.710839,0.0,41.0,1.0762e-06,True,3.2287e-06,...,0.0,12.0,0.000562,True,0.001687,0.0,16.0,0.001353,True,0.004058


In [25]:
def score(measure, approach):
    l = df[df['name'] == approach]
    if len(l) != 1:
        raise ValueError(f'Expected one match, got {len(l)}')
    l = l.iloc[0].to_dict()
    significant = l[f'{measure} reject'] and 'original' not in approach

    return '{:.3f}'.format(l[measure]) + ('$^{*}$' if significant else '$\\phantom{^{*}}$')


def table_line(model):
    ret = []
    for measure in ['ndcg_cut_10', 'P_10']:
        for approach in [f'{model} (worst_case)', f'{model} (original)', f'{model} (best_case)']:
            ret += [score(measure, approach)]
    return ' & '.join(ret) + ' & --- & --- & --- & --- & --- & ---'

def table():
    return '''
\\begin{table*}[tb]
\\setlength{\\tabcolsep}{1.7pt}
\\renewcommand{\\arraystretch}{1.}
\\caption{The retrieval effectiveness when adversarial attacks are applied to only non-relevant documents (worst case), to no documents (original case), or to only relevant documents (best case). We report nDCG@10 and Precision@10. The asterisk $^{*}$ marks Bonferroni corrected significant changes to the original case without adversarial attacks.}
\\footnotesize
\\label{table-retrieval-effectiveness}
\\begin{tabular*}{\\textwidth}{@{}lcccccc@{\\quad}cccccc@{}}

    \\toprule
    
    & \\multicolumn{6}{@{}c@{}}{\\textbf{TREC DL 19}} & \\multicolumn{6}{@{}c@{}}{\\textbf{TREC DL 20}} \\\\
    
    \\cmidrule(r{20pt}){2-7} \\cmidrule(){8-13}
    & \\multicolumn{3}{@{}c@{}}{\\textbf{nDCG@10}} & \\multicolumn{3}{@{}c@{}}{\\textbf{Precision@10\\phantom{mm}}} & \\multicolumn{3}{@{}c@{}}{\\textbf{nDCG@10}} & \\multicolumn{3}{@{}c@{}}{\\textbf{Precision@10}} \\\\

    \\cmidrule(r{4pt}){2-4} \\cmidrule(r{20pt}){5-7} \\cmidrule(r{4pt}){8-10} \\cmidrule{11-13} 

    & Worst & Ori.\\ & Best & Worst & Ori.\\ & Best & Worst & Ori.\\ & Best & Worst & Ori.\\ & Best \\\\
    \\midrule

    Electra & ''' + table_line('electra') + ''' \\\\
    MonoT5 & ''' + table_line('t5') + ''' \\\\

    \\bottomrule
\\end{tabular*}
\\end{table*}
 
'''

print(table())


\begin{table*}[tb]
\setlength{\tabcolsep}{1.7pt}
\renewcommand{\arraystretch}{1.}
\caption{The retrieval effectiveness when adversarial attacks are applied to only non-relevant documents (worst case), to no documents (original case), or to only relevant documents (best case). We report nDCG@10 and Precision@10. The asterisk $^{*}$ marks Bonferroni corrected significant changes to the original case without adversarial attacks.}
\footnotesize
\label{table-retrieval-effectiveness}
\begin{tabular*}{\textwidth}{@{}lcccccc@{\quad}cccccc@{}}

    \toprule
    
    & \multicolumn{6}{@{}c@{}}{\textbf{TREC DL 19}} & \multicolumn{6}{@{}c@{}}{\textbf{TREC DL 20}} \\
    
    \cmidrule(r{20pt}){2-7} \cmidrule(){8-13}
    & \multicolumn{3}{@{}c@{}}{\textbf{nDCG@10}} & \multicolumn{3}{@{}c@{}}{\textbf{Precision@10\phantom{mm}}} & \multicolumn{3}{@{}c@{}}{\textbf{nDCG@10}} & \multicolumn{3}{@{}c@{}}{\textbf{Precision@10}} \\

    \cmidrule(r{4pt}){2-4} \cmidrule(r{20pt}){5-7} \cmidrule(r{4pt}){8-10} 