# Fairness Metrics Implementations on Fair Ranking Track 2020 Dataset for Reranking Task 

In [17]:
import pandas as pd
import numpy as np
from bookgender.config import data_dir
from bookgender.nbutils import *
import plotnine as p9
import plotnine.data as p9d
from itertools import product
import math
import matplotlib as plt

In [18]:
import bookgender.metric_utils.groupinfo as gi
import bookgender.metric_utils.position as pos

In [19]:
import bookgender.metric_utils.load_fairtrec20 as lf20
#import bookgender.metrics.load_fairtrec as lf

In [20]:
from tqdm.auto import tqdm
tqdm.pandas()

In [21]:
import seaborn as sns

## Loading Data

In [22]:
reranked_path = 'data/trec2020-fair-archive/submission_parquet/reranking/'
authorinfo_path = 'data/trec2020-fair-archive/submission_parquet/author_annot_soft.parquet'
ratings_path = 'data/trec2020-fair-archive/submission_parquet/document_relevance.parquet'

In [23]:
recs = lf20.load_recs(reranked_path)
#selected_recs =

In [24]:
recs['participant'].nunique()

23

In [25]:
recs = recs.loc[recs['participant'].isin(['Deltr-gammas', 'LM-rel-groups', 'MacEwan-base', 'NLE_META_99_1'
                                         'UW_Kr_r0g100c0', 'UoGTrBComFu', 'umd_relfair_ltr'])]

### Soft Association (items are associated with multiple groups)

In [26]:
authors = lf20.load_authors(authorinfo_path)
fairtrec = lf20.process_recs(recs, authors, ratings_path)

In [27]:
fairtrec

Unnamed: 0,sequence,qid,system,item,rank,rating,Advanced,Developing,DocLevel,Unknown
0,0.0,9087,Deltr-gammas,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,1,0,0.0,1.0,Developing,0.0
1,0.1,9087,Deltr-gammas,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,1,0,0.0,1.0,Developing,0.0
2,0.2,9087,Deltr-gammas,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,1,0,0.0,1.0,Developing,0.0
3,0.3,9087,Deltr-gammas,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,1,0,0.0,1.0,Developing,0.0
4,0.4,9087,Deltr-gammas,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,1,0,0.0,1.0,Developing,0.0
...,...,...,...,...,...,...,...,...,...,...
1623745,199.95,12887,umd_relfair_ltr,3739bc58c2f162830c8e9a8db1007c05a9f2c290,6,0,0.0,1.0,Developing,0.0
1623746,199.96,12887,umd_relfair_ltr,3739bc58c2f162830c8e9a8db1007c05a9f2c290,6,0,0.0,1.0,Developing,0.0
1623747,199.97,12887,umd_relfair_ltr,3739bc58c2f162830c8e9a8db1007c05a9f2c290,6,0,0.0,1.0,Developing,0.0
1623748,199.98,12887,umd_relfair_ltr,3739bc58c2f162830c8e9a8db1007c05a9f2c290,6,0,0.0,1.0,Developing,0.0


In [28]:
test_rates = lf20.load_test(authors, ratings_path)
test_rates.head()

Unnamed: 0,qid,item,rating,Advanced,Developing,DocLevel,Unknown
0,9087,4a1472c575488ed9acee6c5481f68b95a0c34092,0,0.0,1.0,Developing,0.0
1,9087,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,0,0.0,1.0,Developing,0.0
2,9087,cbc33164f9db97698901f130f399f2898d856267,0,0.571429,0.428571,Mixed,0.0
3,9087,34ba4f3d2a38b88153308f2a7f12651b440f5812,0,1.0,0.0,Advanced,0.0
4,74172,883495c182b26e83557c57d82e9e730e3faa1806,0,1.0,0.0,Advanced,0.0


### Dataset Stats

Number of participant provided algorithms.

In [29]:
fairtrec['system'].nunique()

5

In [30]:
fairtrec['system'].unique()

array(['Deltr-gammas', 'LM-rel-groups', 'MacEwan-base', 'UoGTrBComFu',
       'umd_relfair_ltr'], dtype=object)

Number of items in each demographic groups.

In [31]:
fairtrec.groupby('DocLevel')['item'].count()

DocLevel
Advanced      1224000
Developing     220500
Mixed           98250
Unknown         81000
Name: item, dtype: int64

Number of items

In [32]:
fairtrec['item'].nunique()

2112

Number of sequences per query

In [33]:
fairtrec.groupby('qid')['sequence'].nunique()

qid
95        150
612       150
657       150
771       150
822       150
         ... 
119231    150
123512    150
126740    150
128050    150
128604    150
Name: sequence, Length: 195, dtype: int64

### Group distribution in dataset

In [34]:
G = lf20.G(authors)
G

Advanced      3499.220238
Developing     660.779762
Unknown        210.000000
dtype: float64

#### Target population

In [35]:
group = gi.GroupInfo(G, 'Advanced', 'Developing', 'Unknown', 'DocLevel')
group.tgt_p_binomial

0.15884128891941393

## Metric Implementations [soft group association]

### Loading the metrics

#### Excluding IAA and Sign and Joachim metrics since they don't work for soft group association

In [36]:
from bookgender.fair_metrics.Run_metrics_IR import metric_analysis as ma
MA = ma(fairtrec, test_rates, group, IAA = False, DRR = False)

#### Default Settings

In [37]:
default_results= MA.run_default_setting(listsize=10)
default_results

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=956.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=956.0), HTML(value='')))






Unnamed: 0,system,Metric,value,ranked_size
0,Deltr-gammas,AWRF_equal,0.413282,10
1,LM-rel-groups,AWRF_equal,0.401035,10
2,MacEwan-base,AWRF_equal,0.410881,10
3,UoGTrBComFu,AWRF_equal,0.381742,10
4,umd_relfair_ltr,AWRF_equal,0.412495,10
5,Deltr-gammas,FAIR,0.722141,10
6,LM-rel-groups,FAIR,0.69525,10
7,MacEwan-base,FAIR,0.666328,10
8,UoGTrBComFu,FAIR,0.709711,10
9,umd_relfair_ltr,FAIR,0.684414,10


In [38]:
default_results.to_parquet('results/SIGIR22/rerank_soft_default_up.parquet')

## Sensitivity Analysis

### Ranked List Size Changing

In [28]:
rankedlist_results=pd.DataFrame()
for listsize in [10, 20, 30, 40, 50, 100]:
    results = MA.run_default_setting(listsize)
    rankedlist_results=rankedlist_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))






In [29]:
rankedlist_results

Unnamed: 0,system,Metric,value,ranked_size
0,Deltr-gammas,AWRF_equal,0.414093,10
1,LM-rel-groups,AWRF_equal,0.401319,10
2,MacEwan-base,AWRF_equal,0.411123,10
3,UoGTrBComFu,AWRF_equal,0.381682,10
4,umd_relfair_ltr,AWRF_equal,0.411747,10
...,...,...,...,...
25,Deltr-gammas,EER,2.116917,100
26,LM-rel-groups,EER,0.996484,100
27,MacEwan-base,EER,1.118969,100
28,UoGTrBComFu,EER,1.117784,100


In [22]:
rankedlist_results.to_parquet('results/SIGIR22/fairrerank_rankedlist_sens_size_up.parquet')

### Position Weight Changing

In [39]:
position_results=pd.DataFrame()
for position in [pos.geometric, pos.cascade, pos.logarithmic, pos.rbp]:
    results = MA.run_sensitivity_analysis(position_weight=position)
    position_results=position_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=956.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=941.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=956.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=941.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=956.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=941.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=956.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=941.0), HTML(value='')))




In [40]:
position_result = position_results[['system', 'Metric', 'value', 'pos_weight']]

In [41]:
position_result

Unnamed: 0,system,Metric,value,pos_weight
0,Deltr-gammas,AWRF,0.211584,geometric
1,LM-rel-groups,AWRF,0.236611,geometric
2,MacEwan-base,AWRF,0.238134,geometric
3,UoGTrBComFu,AWRF,0.248278,geometric
4,umd_relfair_ltr,AWRF,0.265781,geometric
...,...,...,...,...
15,Deltr-gammas,EER,3.816530,rbp
16,LM-rel-groups,EER,2.531330,rbp
17,MacEwan-base,EER,2.701125,rbp
18,UoGTrBComFu,EER,2.781621,rbp


In [42]:
position_result.to_parquet('results/SIGIR22/fairrerank_sensitive_position_up.parquet')

### Parameter Changing (Stopping Probability and Patience Parameter)

#### Cascade and RBP use patience parameter
#### Cascade and Geometrics user stopping probability

In [34]:
stop_results=pd.DataFrame()
for position in [pos.geometric, pos.cascade]:
    for stop in range(1, 10, 1):
        results = MA.run_sensitivity_analysis(position_weight = position, arg='stop', arg_val=stop/10)
        stop_results=stop_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




In [35]:
stop_results

Unnamed: 0,system,Metric,value,stop,pos_weight,ranked_size
0,Deltr-gammas,AWRF,0.179012,0.1,geometric,10
1,LM-rel-groups,AWRF,0.187474,0.1,geometric,10
2,MacEwan-base,AWRF,0.186874,0.1,geometric,10
3,UoGTrBComFu,AWRF,0.183553,0.1,geometric,10
4,umd_relfair_ltr,AWRF,0.192241,0.1,geometric,10
...,...,...,...,...,...,...
15,Deltr-gammas,EER,1.440507,0.9,cascade,10
16,LM-rel-groups,EER,0.536356,0.9,cascade,10
17,MacEwan-base,EER,0.628106,0.9,cascade,10
18,UoGTrBComFu,EER,0.611277,0.9,cascade,10


In [36]:
stop_results.to_parquet('results/SIGIR22/fairrerank_sensitivity_stop_up.parquet')

In [37]:
patience_results=pd.DataFrame()
for position in [pos.rbp, pos.cascade]:
    for patience in range(1, 10, 1):
        results = MA.run_sensitivity_analysis(position_weight=position, arg='patience', arg_val=patience/10)
        patience_results=patience_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=975.0), HTML(value='')))




In [38]:
patience_results.to_parquet('results/SIGIR22/fairrerank_sensitivity_patience_up.parquet')

## Hard Association (each item is associated with a particular group)

We aggregate the membership score to generate hard group association for each item. Below is an example of implemneting the metrics in hard group membership.

In [27]:
authors = lf20.load_authors('data/trec2020-fair-archive/submission_parquet/author_annot_hard.parquet')
fairtrec_hard = lf20.process_recs(recs, authors)

In [28]:
test_rates_hard = lf20.load_test(authors)
test_rates_hard.head()

Unnamed: 0,qid,item,rating,Advanced,Developing,DocLevel,Unknown
0,9087,4a1472c575488ed9acee6c5481f68b95a0c34092,0,0,1,Developing,0
1,9087,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,0,0,1,Developing,0
2,9087,cbc33164f9db97698901f130f399f2898d856267,0,1,0,Mixed,0
3,9087,34ba4f3d2a38b88153308f2a7f12651b440f5812,0,1,0,Advanced,0
4,74172,883495c182b26e83557c57d82e9e730e3faa1806,0,1,0,Advanced,0


In [31]:
G = lf20.G(authors)
G

Advanced      3531
Developing     629
Unknown        210
dtype: int64

In [32]:
group = gi.GroupInfo(G, 'Advanced', 'Developing', 'Unknown', 'DocLevel')
group.tgt_p_binomial

0.15120192307692307

In [33]:
MA_2 = ma(fairtrec_hard, test_rates_hard, group, IAA = False)

In [34]:
default_results= MA_2.run_default_setting()
default_results

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4485.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4485.0), HTML(value='')))






Unnamed: 0,system,Metric,value
0,Deltr-gammas,AWRF_equal,0.425943
1,LM-rel-groups,AWRF_equal,0.410796
2,LM-rel-year-100,AWRF_equal,0.432742
3,LM-relev-year,AWRF_equal,0.429665
4,LM-relevance,AWRF_equal,0.395315
...,...,...,...
202,UoGTrBComPro,logRUR,-5.585742
203,UoGTrBComRel,logRUR,-5.587713
204,UoGTrBRel,logRUR,-5.577152
205,UoGTrComRel,logRUR,-5.603341


In [35]:
default_results.to_parquet('results/fairtrec20/rerank_hard_default.parquet')