In [1]:
import pandas as pd
import numpy as np
from bookgender.config import data_dir
from bookgender.nbutils import *
import plotnine as p9
import plotnine.data as p9d
from itertools import product
import math
import matplotlib as plt

import bookgender.fair_metrics.singh_joachims as sj
import bookgender.fair_metrics.sapiezynski as sp
import bookgender.fair_metrics.biega as bg
import bookgender.fair_metrics.diaz as dz
import bookgender.fair_metrics.zehlike as zh

In [None]:
import bookgender.metric_utils.groupinfo as gi
import bookgender.metric_utils.position as pos

In [4]:
import bookgender.metric_utils.load_fairtrec20 as lf20

In [5]:
from tqdm.auto import tqdm
tqdm.pandas()

# Soft Association (Each item can be associated with multiple groups)

## Load Data

### Retrieval: each run retrieves 100-item rankings from the corpus in response to query

In [6]:
retrieved_path = 'data/trec2020-fair-archive/submission_parquet/retrieval/'
authorinfo_path = 'data/trec2020-fair-archive/submission_parquet/author_annot_soft.parquet'
ratings_path = 'data/trec2020-fair-archive/submission_parquet/document_relevance.parquet'

In [7]:
recs = lf20.load_recs(retrieved_path)
authors = lf20.load_authors(authorinfo_path)
fairtrec = lf20.process_recs(recs, authors, ratings_path)

In [8]:
fairtrec

Unnamed: 0,qid,sequence,system,item,rank,rating,Advanced,Developing,DocLevel,Unknown
0,9087,0.0,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0.0,1.0,Developing,0.0
1,9087,0.1,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0.0,1.0,Developing,0.0
2,9087,0.2,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0.0,1.0,Developing,0.0
3,9087,0.3,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0.0,1.0,Developing,0.0
4,9087,0.4,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0.0,1.0,Developing,0.0
...,...,...,...,...,...,...,...,...,...,...
1350745,12887,199.145,UW_t_bm25,f6f0758b228b359fa425d12d977d7250d2da0f55,15,0,0.0,1.0,Developing,0.0
1350746,12887,199.146,UW_t_bm25,f6f0758b228b359fa425d12d977d7250d2da0f55,15,0,0.0,1.0,Developing,0.0
1350747,12887,199.147,UW_t_bm25,f6f0758b228b359fa425d12d977d7250d2da0f55,15,0,0.0,1.0,Developing,0.0
1350748,12887,199.148,UW_t_bm25,f6f0758b228b359fa425d12d977d7250d2da0f55,15,0,0.0,1.0,Developing,0.0


In [12]:
test_rates = lf20.load_test(authors, ratings_path)
test_rates.head()

Unnamed: 0,qid,item,rating,Advanced,Developing,DocLevel,Unknown
0,9087,4a1472c575488ed9acee6c5481f68b95a0c34092,0,0.0,1.0,Developing,0.0
1,9087,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,0,0.0,1.0,Developing,0.0
2,9087,cbc33164f9db97698901f130f399f2898d856267,0,0.571429,0.428571,Mixed,0.0
3,9087,34ba4f3d2a38b88153308f2a7f12651b440f5812,0,1.0,0.0,Advanced,0.0
4,74172,883495c182b26e83557c57d82e9e730e3faa1806,0,1.0,0.0,Advanced,0.0


### Dataset Stat

In [10]:
fairtrec['item'].nunique()

1783

In [35]:
#fairtrec.isna().sum()

In [13]:
test_rates['item'].nunique()

2112

In [37]:
#len(test_rates['DocLevel'].loc[test_rates['DocLevel']=='Advanced'])
#test_rates['Developing'].count()

In [36]:
#test_rates.isna().sum()

In [17]:
#fairtrec['Algorithm'].replace(['UW_Kt_r0g0c100', 'UW_Kt_r25g25c50','UW_Kt_r60g20c20',
 #                                        'UW_t_bm25', 'UW_Kt_r80g10c10'],
  #                                       ['1', '2', '3', '4', '5'], inplace=True)

In [38]:
#type(fairtrec['rank'][0])

In [40]:
group = gi.GroupInfo(G, 'Advanced', 'Developing', 'Unknown', 'DocLevel')
group.tgt_p_binomial

0.15884128891941393

### Group infomration in dataset

In [39]:
G = lf20.G(authors)
G

Advanced      3499.220238
Developing     660.779762
Unknown        210.000000
dtype: float64

#### Target population

In [41]:
fairtrec.columns

Index(['qid', 'sequence', 'system', 'item', 'rank', 'rating', 'Advanced',
       'Developing', 'DocLevel', 'Unknown'],
      dtype='object')

In [22]:
#sorted(fairtrec['rank'].unique())

## Metrics Implementation

### Loading Metrics
#### Excluding IAA and Sign and Joachim metrics in implemnetation since they don't work for soft group association

In [23]:
from bookgender.fair_metrics.Run_metrics_IR import metric_analysis as ma
MA = ma(fairtrec, test_rates, group, IAA = False, DRR = False)

### Defaults settings of each metric

In [54]:
default_results= MA.run_default_setting(listsize=100)
default_results

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




Unnamed: 0,system,Metric,value,ranked_size
0,UW_Kt_r0g0c100,AWRF_equal,0.464469,100
1,UW_Kt_r25g25c50,AWRF_equal,0.434657,100
2,UW_Kt_r60g20c20,AWRF_equal,0.436589,100
3,UW_Kt_r80g10c10,AWRF_equal,0.432874,100
4,UW_t_bm25,AWRF_equal,0.435783,100
5,UW_Kt_r0g0c100,FAIR,0.499824,100
6,UW_Kt_r25g25c50,FAIR,0.570933,100
7,UW_Kt_r60g20c20,FAIR,0.571244,100
8,UW_Kt_r80g10c10,FAIR,0.571979,100
9,UW_t_bm25,FAIR,0.572641,100


In [25]:
#default_results.to_parquet('results/fairtrec20/retrieval_soft_default.parquet')

In [55]:
default_results.to_parquet('results/SIGIR22/retrieval_soft_default_up.parquet')

## Sensitivity Analysis

### Ranked List Size Changing

In [27]:
rankedlist_results=pd.DataFrame()
for listsize in [10, 20, 30, 40, 50, 100]:
    results = MA.run_default_setting(listsize)
    rankedlist_results=rankedlist_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=899.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=899.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=914.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=914.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=925.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=925.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=928.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=928.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




In [42]:
#rankedlist_results.groupby(['system', 'Metric', 'ranked_size'])['value'].unique()

In [43]:
rankedlist_results.to_parquet('results/SIGIR22/fair_rankedlist_sens_size_up.parquet')

## Position Weight Changing

In [44]:
position_results=pd.DataFrame()
for position in [pos.geometric, pos.cascade, pos.logarithmic, pos.rbp]:
    results = MA.run_sensitivity_analysis(position_weight=position)
    position_results=position_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




In [47]:
position_result = position_results[['system', 'Metric', 'value', 'pos_weight']]
position_result

Unnamed: 0,system,Metric,value,pos_weight
0,UW_Kt_r0g0c100,AWRF,0.240878,geometric
1,UW_Kt_r25g25c50,AWRF,0.245832,geometric
2,UW_Kt_r60g20c20,AWRF,0.249097,geometric
3,UW_Kt_r80g10c10,AWRF,0.258523,geometric
4,UW_t_bm25,AWRF,0.263063,geometric
...,...,...,...,...
15,UW_Kt_r0g0c100,EER,1.543643,rbp
16,UW_Kt_r25g25c50,EER,2.523540,rbp
17,UW_Kt_r60g20c20,EER,2.483548,rbp
18,UW_Kt_r80g10c10,EER,2.399915,rbp


In [48]:
position_result.to_parquet('results/SIGIR22/fair_sensitive_position_up.parquet')

## Parameter Changes (stopping probability and patience parameter)

In [49]:
#MA.run_sensitivity_analysis(position_weight = pos.geometric, arg='stop', arg_val=0.1)

In [50]:
stop_results=pd.DataFrame()
for position in [pos.geometric, pos.cascade]:
    for stop in range(1, 10, 1):
        results = MA.run_sensitivity_analysis(position_weight = position, arg='stop', arg_val=stop/10)
        stop_results=stop_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




In [23]:
stop_results

Unnamed: 0,system,Metric,value,stop,pos_weight
0,UW_Kt_r0g0c100,AWRF,0.225565,0.1,geometric
1,UW_Kt_r25g25c50,AWRF,0.202644,0.1,geometric
2,UW_Kt_r60g20c20,AWRF,0.205109,0.1,geometric
3,UW_Kt_r80g10c10,AWRF,0.206864,0.1,geometric
4,UW_t_bm25,AWRF,0.207883,0.1,geometric
...,...,...,...,...,...
15,UW_Kt_r0g0c100,EER,0.474063,0.9,cascade
16,UW_Kt_r25g25c50,EER,0.779119,0.9,cascade
17,UW_Kt_r60g20c20,EER,0.629964,0.9,cascade
18,UW_Kt_r80g10c10,EER,0.624245,0.9,cascade


In [51]:
patience_results=pd.DataFrame()
for position in [pos.rbp, pos.cascade]:
    for patience in range(1, 10, 1):
        results = MA.run_sensitivity_analysis(position_weight=position, arg='patience', arg_val=patience/10)
        patience_results=patience_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=837.0), HTML(value='')))




In [52]:
patience_results

Unnamed: 0,system,Metric,value,patience,pos_weight,ranked_size
0,UW_Kt_r0g0c100,AWRF,0.244575,0.1,rbp,10
1,UW_Kt_r25g25c50,AWRF,0.273748,0.1,rbp,10
2,UW_Kt_r60g20c20,AWRF,0.275269,0.1,rbp,10
3,UW_Kt_r80g10c10,AWRF,0.289499,0.1,rbp,10
4,UW_t_bm25,AWRF,0.291519,0.1,rbp,10
...,...,...,...,...,...,...
15,UW_Kt_r0g0c100,EER,3.590863,0.9,cascade,10
16,UW_Kt_r25g25c50,EER,5.260052,0.9,cascade,10
17,UW_Kt_r60g20c20,EER,5.191910,0.9,cascade,10
18,UW_Kt_r80g10c10,EER,5.163059,0.9,cascade,10


In [53]:
stop_results.to_parquet('results/SIGIR22/fairret_sensitivity_stop_up.parquet')
patience_results.to_parquet('results/SIGIR22/fairret_sensitivity_patience_up.parquet')

## Hard association (each item is associated with a particular group)

We aggregate the membership score to generate hard group association for each item. Below is an example of implemneting the metrics in hard group membership.

In [17]:
authors = lf20.load_authors('data/trec2020-fair-archive/submission_parquet/author_annot_hard.parquet')
fairtrec_hard = lf20.process_recs(recs, authors)
fairtrec_hard

Unnamed: 0,qid,sequence,system,item,rank,rating,Advanced,Developing,DocLevel,Unknown
0,9087,0.0,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0,1,Developing,0
1,9087,0.1,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0,1,Developing,0
2,9087,0.2,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0,1,Developing,0
3,9087,0.3,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0,1,Developing,0
4,9087,0.4,UW_Kt_r0g0c100,4a1472c575488ed9acee6c5481f68b95a0c34092,2,0,0,1,Developing,0
...,...,...,...,...,...,...,...,...,...,...
1350745,12887,199.145,UW_Kt_r80g10c10,f6f0758b228b359fa425d12d977d7250d2da0f55,16,0,0,1,Developing,0
1350746,12887,199.146,UW_Kt_r80g10c10,f6f0758b228b359fa425d12d977d7250d2da0f55,16,0,0,1,Developing,0
1350747,12887,199.147,UW_Kt_r80g10c10,f6f0758b228b359fa425d12d977d7250d2da0f55,16,0,0,1,Developing,0
1350748,12887,199.148,UW_Kt_r80g10c10,f6f0758b228b359fa425d12d977d7250d2da0f55,16,0,0,1,Developing,0


In [18]:
test_rates_hard = lf20.load_test(authors)
test_rates_hard.head()

Unnamed: 0,qid,item,rating,Advanced,Developing,DocLevel,Unknown
0,9087,4a1472c575488ed9acee6c5481f68b95a0c34092,0,0,1,Developing,0
1,9087,ff8df0745136fb183ea72b783ff38f8a7ad5ceec,0,0,1,Developing,0
2,9087,cbc33164f9db97698901f130f399f2898d856267,0,1,0,Mixed,0
3,9087,34ba4f3d2a38b88153308f2a7f12651b440f5812,0,1,0,Advanced,0
4,74172,883495c182b26e83557c57d82e9e730e3faa1806,0,1,0,Advanced,0


In [21]:
G = lf20.G(authors)
G

Advanced      3531
Developing     629
Unknown        210
dtype: int64

In [22]:
group = gi.GroupInfo(G, 'Advanced', 'Developing', 'Unknown', 'DocLevel')
group.tgt_p_binomial

0.15120192307692307

In [23]:
MA2 = ma(fairtrec_hard, test_rates_hard, group, IAA = False)

In [24]:
default_results_hard= MA2.run_default_setting()
default_results_hard

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=945.0), HTML(value='')))




Unnamed: 0,system,Metric,value
0,UW_Kt_r0g0c100,AWRF_equal,0.479919
1,UW_Kt_r25g25c50,AWRF_equal,0.45203
2,UW_Kt_r60g20c20,AWRF_equal,0.451646
3,UW_Kt_r80g10c10,AWRF_equal,0.446389
4,UW_t_bm25,AWRF_equal,0.447159
5,UW_Kt_r0g0c100,FAIR,0.439195
6,UW_Kt_r25g25c50,FAIR,0.526644
7,UW_Kt_r60g20c20,FAIR,0.527116
8,UW_Kt_r80g10c10,FAIR,0.527823
9,UW_t_bm25,FAIR,0.528304


In [25]:
default_results_hard.to_parquet('results/fairtrec20/retrieval_hard_default.parquet')