# Fairness Metrics Implementation on GoodReads Book Dataset

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from itertools import product
import plotnine as p
import plotnine.data as p9d
import math

In [2]:
from tqdm.auto import tqdm
tqdm.pandas()

In [3]:
import ujson
import glob

In [4]:
from bookgender.config import data_dir
from bookgender.nbutils import *

In [5]:
from bookgender.nbutils import init_figs, make_plot, theme_paper
from bookgender.algorithms import normalize_algo_names

## Loading the metrics

In [9]:
import bookgender.fair_metrics.singh_joachims as sj
import bookgender.fair_metrics.sapiezynski as sp
import bookgender.fair_metrics.biega as bg
import bookgender.fair_metrics.diaz as dz
import bookgender.fair_metrics.zehlike as zh

In [6]:
import bookgender.metric_utils.groupinfo as gi
import bookgender.metric_utils.position as pos
#import bookgender.metrics.load_goodreads as lg

## Loading GoodReads Data

In [7]:
import bookgender.metric_utils.load_goodreads as lg

### Loading Rrelevance, Recommendations & Test Set

#### Relevance

In [8]:
relev = lg.load_relevance('eval5')
relev

Unnamed: 0,user,Algorithm,Set,gender,score
0,42,user-user,GR-I,unknown,26299.126282
1,104,user-user,GR-I,unknown,2050.588836
2,201,user-user,GR-I,unknown,18403.946865
3,400,user-user,GR-I,unknown,14964.566917
4,608,user-user,GR-I,unknown,20167.016383
...,...,...,...,...,...
139995,875040,bpr,GR-I,unknown,19068.335938
139996,875144,bpr,GR-I,unknown,1255.072021
139997,875483,bpr,GR-I,unknown,-48783.281250
139998,875881,bpr,GR-I,unknown,25581.185547


#### Recommendations

using load_goodreads module:

In [9]:
recs = lg.process_recs('eval5')

In [18]:
recs

Unnamed: 0,item,score,user,rank,Algorithm,Set,gender,rating,male,female,unknown
0,108699671,11.723493,42,1,user-user,GR-I,male,0.0,1,0,0
1,103134984,11.719078,42,2,user-user,GR-I,female,0.0,0,1,0
2,103493375,11.712346,42,3,user-user,GR-I,female,0.0,0,1,0
3,101115553,11.711375,42,4,user-user,GR-I,female,1.0,0,1,0
4,101259216,11.710964,42,5,user-user,GR-I,female,0.0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...
19945071,112207201,6.767267,876024,996,bpr,GR-I,male,0.0,1,0,0
19945072,308910384,6.767073,876024,997,bpr,GR-I,female,0.0,0,1,0
19945073,418585957,6.766818,876024,998,bpr,GR-I,unknown,0.0,0,0,1
19945074,110642663,6.763775,876024,999,bpr,GR-I,unknown,0.0,0,0,1


## Dataset stat

Unique item-gender count

In [19]:
recs.groupby('gender')['item'].nunique()  

gender
female      97058
male       135839
unknown    342646
Name: item, dtype: int64

Number of recommendations per user by each algorithm

In [20]:
recs.groupby(['user','Algorithm'])['rank'].count()

user    Algorithm
42      bpr          1000
        item-item    1000
        user-user    1000
        wrls         1000
104     bpr          1000
                     ... 
875881  wrls         1000
876024  bpr          1000
        item-item    1000
        user-user    1000
        wrls         1000
Name: rank, Length: 19997, dtype: int64

Number of total recommended items

In [21]:
recs['item'].nunique()

575543

### Test Set

In [22]:
test_rates = lg.process_tests('eval5')
test_rates

Unnamed: 0,user,item,first_time,last_time,last_rating,nactions,rating,gender,male,female,unknown
0,42,104796095,1476577,1476577,3.0,1,1.0,unknown,0,0,1
1,42,101115553,1402798,1402798,4.0,1,1.0,female,0,1,0
2,42,442940015,1456456,1456456,,1,1.0,unknown,0,0,1
3,42,104286693,1406080,1406080,5.0,1,1.0,male,1,0,0
4,42,100094831,1420400,1420400,,1,1.0,male,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...
24995,876024,104809846,1474701,1474701,5.0,1,1.0,female,0,1,0
24996,876024,109934767,1472472,1472472,4.0,1,1.0,female,0,1,0
24997,876024,106411649,1338805,1338805,1.0,1,1.0,male,1,0,0
24998,876024,107335925,1362127,1362127,,1,1.0,male,1,0,0


In [23]:
test_rates.groupby(['user'])['item'].count()

user
42        5
104       5
201       5
400       5
608       5
         ..
875040    5
875144    5
875483    5
875881    5
876024    5
Name: item, Length: 5000, dtype: int64

### Group information in dataset

In [24]:
G = lg.G()
G

female     5    177359
Name: female, dtype: int64
male         5    282857
Name: male, dtype: int64
unknown                 5    1049445
dtype: int64
dtype: object

#### Target population

In [25]:
group = gi.GroupInfo(G, 'male', 'female', 'unknown', 'gender')
group.tgt_p_binomial

5    0.385382
dtype: float64

## Implementing the metrics

In [26]:
from bookgender.fair_metrics.Run_metrics_RecSys import metric_analysis as ma
MA = ma(recs, test_rates, group, original_relev=relev)

### With default metric settings

In [27]:
default_results = MA.run_default_setting()
default_results

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






Unnamed: 0,Algorithm,Metric,value,ranked_size
0,bpr,AWRF_equal,0.393765,100
1,item-item,AWRF_equal,0.391799,100
2,user-user,AWRF_equal,0.378053,100
3,wrls,AWRF_equal,0.366965,100
4,bpr,FAIR,0.352792,100
5,item-item,FAIR,0.426208,100
6,user-user,FAIR,0.379223,100
7,wrls,FAIR,0.45625,100
8,bpr,AWRF,0.385593,100
9,item-item,AWRF,0.4234,100


In [28]:
default_results.to_parquet('results/SIGIR22/rec_default_up.parquet')

# Sensitivity Analysis

## Ranked list size changing

In [29]:
#old_rankedlist_results=pd.read_parquet('results/rankedlist_sens.parquet')
#old_rankedlist_results

In [30]:
rankedlist_results=pd.DataFrame()
for listsize in [10, 20, 30, 40, 50, 100, 500, 1000]:
    results = MA.run_default_setting(listsize)
    rankedlist_results=rankedlist_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))






In [31]:
rankedlist_results

Unnamed: 0,Algorithm,Metric,value,ranked_size
0,bpr,AWRF_equal,0.392382,10
1,item-item,AWRF_equal,0.391042,10
2,user-user,AWRF_equal,0.377818,10
3,wrls,AWRF_equal,0.366986,10
4,bpr,FAIR,0.491807,10
...,...,...,...,...
35,wrls,logEUR,-0.348566,1000
36,bpr,logRUR,-0.357889,1000
37,item-item,logRUR,-0.444947,1000
38,user-user,logRUR,-0.443223,1000


In [32]:
rankedlist_results.to_parquet('results/SIGIR22/rec_rankedlist_sens_size_up.parquet')

In [2]:
#AWRF_FAIR = old_rankedlist_results.loc[old_rankedlist_results['Metric'].isin(['AWRF', 'FAIR'])]
#final_rank_metric = pd.concat([AWRF_FAIR, rankedlist_results], ignore_index=True)
#final_rank_metric

In [3]:
#final_rank_metric.to_parquet('results/rankedlist_sens_IAA.parquet')

## Position Weight changing

In [None]:
#old_position_result=pd.read_parquet('results/position_sensitivity.parquet')

In [33]:
position_results=pd.DataFrame()
for position in [pos.geometric, pos.cascade, pos.logarithmic, pos.rbp]:
    results = MA.run_sensitivity_analysis(position_weight=position)
    position_results=position_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




In [34]:
position_results

Unnamed: 0,Algorithm,Metric,value,NaN,pos_weight,ranked_size
0,bpr,AWRF,0.385593,,geometric,100
1,item-item,AWRF,0.423400,,geometric,100
2,user-user,AWRF,0.373271,,geometric,100
3,wrls,AWRF,0.357054,,geometric,100
4,bpr,IAA,0.270639,,geometric,100
...,...,...,...,...,...,...
27,wrls,logEUR,-0.389596,,rbp,100
28,bpr,logRUR,-0.229228,,rbp,100
29,item-item,logRUR,-0.396353,,rbp,100
30,user-user,logRUR,-1.038855,,rbp,100


In [35]:
position_results.reset_index(inplace=True)
position_results.drop(columns=['index'], inplace=True)
position_results

Unnamed: 0,Algorithm,Metric,value,NaN,pos_weight,ranked_size
0,bpr,AWRF,0.385593,,geometric,100
1,item-item,AWRF,0.423400,,geometric,100
2,user-user,AWRF,0.373271,,geometric,100
3,wrls,AWRF,0.357054,,geometric,100
4,bpr,IAA,0.270639,,geometric,100
...,...,...,...,...,...,...
123,wrls,logEUR,-0.389596,,rbp,100
124,bpr,logRUR,-0.229228,,rbp,100
125,item-item,logRUR,-0.396353,,rbp,100
126,user-user,logRUR,-1.038855,,rbp,100


In [36]:
position_result = position_results[['Algorithm', 'Metric', 'value', 'pos_weight']]

In [37]:
#AWRF_FAIR = old_position_result.loc[old_position_result['Metric'].isin(['AWRF', 'FAIR'])]
#final_pos_metric = pd.concat([AWRF_FAIR, position_result], ignore_index=True)
#final_pos_metric

In [38]:
position_result.to_parquet('results/SIGIR22/rec_position_sensitivity_up.parquet')

## Parameter changes (Stopping Probability and Patience Parameter)

#### Cascade and RBP use patience parameter
#### Cascade and Geometrics user stopping probability

In [39]:
#old_stop_results=pd.read_parquet('results/sensitivity_stop.parquet')
#old_patience_results=pd.read_parquet('results/sensitivity_patience.parquet')

In [40]:
stop_results=pd.DataFrame()
for position in [pos.geometric, pos.cascade]:
    for stop in range(1, 10, 2):
        results = MA.run_sensitivity_analysis(position_weight = position, arg='stop', arg_val=stop/10)
        stop_results=stop_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




In [41]:
stop_results

Unnamed: 0,Algorithm,Metric,value,stop,pos_weight,ranked_size
0,bpr,AWRF,0.298536,0.1,geometric,100
1,item-item,AWRF,0.277957,0.1,geometric,100
2,user-user,AWRF,0.263613,0.1,geometric,100
3,wrls,AWRF,0.266748,0.1,geometric,100
4,bpr,IAA,0.241686,0.1,geometric,100
...,...,...,...,...,...,...
27,wrls,logEUR,-0.287855,0.9,cascade,100
28,bpr,logRUR,-0.641245,0.9,cascade,100
29,item-item,logRUR,1.510909,0.9,cascade,100
30,user-user,logRUR,-1.632212,0.9,cascade,100


In [42]:
stop_results.to_parquet('results/SIGIR22/rec_sensitivity_stop_up.parquet')

AWRF_FAIR = old_stop_results.loc[old_stop_results['Metric'].isin(['AWRF', 'FAIR'])]
final_stop_metric = pd.concat([AWRF_FAIR, stop_results], ignore_index=True)
final_stop_metric

In [44]:
patience_results=pd.DataFrame()
for position in [pos.rbp, pos.cascade]:
    for patience in range(1, 10, 1):
        results = MA.run_sensitivity_analysis(position_weight=position, arg='patience', arg_val=patience/10)
        patience_results=patience_results.append(results)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=19997.0), HTML(value='')))




In [45]:
patience_results

Unnamed: 0,Algorithm,Metric,value,patience,pos_weight,ranked_size
0,bpr,AWRF,0.468693,0.1,rbp,100
1,item-item,AWRF,0.509987,0.1,rbp,100
2,user-user,AWRF,0.469742,0.1,rbp,100
3,wrls,AWRF,0.459422,0.1,rbp,100
4,bpr,IAA,0.277618,0.1,rbp,100
...,...,...,...,...,...,...
27,wrls,logEUR,-0.338693,0.9,cascade,100
28,bpr,logRUR,-0.302799,0.9,cascade,100
29,item-item,logRUR,-0.286585,0.9,cascade,100
30,user-user,logRUR,-0.458082,0.9,cascade,100


In [46]:
patience_results.to_parquet('results/SIGIR22/rec_sensitivity_patience_up.parquet')

AWRF_FAIR = old_patience_results.loc[old_patience_results['Metric'].isin(['AWRF', 'FAIR'])]
final_pat_metric = pd.concat([AWRF_FAIR, patience_results], ignore_index=True)
final_pat_metric

In [None]:
#final_stop_metric.to_parquet('results/sensitivity_stop_IAA.parquet')
#final_pat_metric.to_parquet('results/sensitivity_patience_IAA.parquet')