In [2]:
# imports
import json
import numpy as np

### datasets and the threshold for each prediction file
tafeng: [5,5,5,5,5], [7,7,7,7,7], [11,11,11,11,11] <br>
dunnhumby: [7,7,7,7,7], [11,11,10,11,11], [19,19,18,19,19] <br>
instacart: [11,11,11,11,11], [16,16,16,17,17], [26,26,27,28,26]

In [16]:
# define dataset and which threshold to use for each prediction file
dataset = "dunnhumby"
print("dataset: ", dataset)
thresholds = [7,7,7,7,7]

#open future and history files
with open("jsondata/" + dataset + "_future.json") as f:
    future = json.load(f)
with open("jsondata/" + dataset + "_history.json") as f:
    history = json.load(f)

SPD = [0, 0]
DI = [0, 0]
# determine SPD and DI for each predictcion file
for i in range(5):
    print("prediction file number: ", i)

    f = open("methods/pred/" + dataset + "_attention_pred" + str(i) + ".json")
    predictions = json.load(f)
    purchase_numbers = []
    test_key_list = []
    for key in history:
        # only take test users, sum lenght of each basket(without the [-1] baskets)
        if key in predictions.keys():
            test_key_list.append(key)
            baskets = history[key][1:-1]
            items = 0
            for basket in baskets:
                items += len(basket)

            purchase_numbers.append(items)
    
        else:
            continue

    # create list of user ids who purchased less than 'threshold' items
    minority_user_ids = np.where(np.array(purchase_numbers) < thresholds[i])[0]
    for j in range(len(minority_user_ids)):
        minority_user_ids[j] = test_key_list[minority_user_ids[j]]
    
    # some constants needed multiple times
    test_size = len(predictions.keys())
    minority_size = len(minority_user_ids)
    majority_size = len(purchase_numbers) - minority_size   
    
    #print percentage as sanity check
    percent = len(minority_user_ids) / test_size
    print("minority user percentage for predictions: ", percent)

    # metrics for minor and major groups
    recall_10_minor, recall_20_minor, precision_10_minor, precision_20_minor = 0,0,0,0
    recall_10_major, recall_20_major, precision_10_major, precision_20_major = 0,0,0,0
    # loop through all predictions
    for key in predictions:
        predicted_basket = predictions[key]
        ground_truth = future[key][1]
        #minor users
        if int(key) in minority_user_ids:
            recall_10_minor += np.count_nonzero(np.isin(predicted_basket[:10], ground_truth)) / len(ground_truth)
            recall_20_minor += np.count_nonzero(np.isin(predicted_basket[:20], ground_truth)) / len(ground_truth)
            precision_10_minor += np.count_nonzero(np.isin(predicted_basket[:10], ground_truth)) / 10
            precision_20_minor += np.count_nonzero(np.isin(predicted_basket[:20], ground_truth)) / 20
        #major users
        else:
            recall_10_major += np.count_nonzero(np.isin(predicted_basket[:10], ground_truth)) / len(ground_truth)
            recall_20_major += np.count_nonzero(np.isin(predicted_basket[:20], ground_truth)) / len(ground_truth)
            precision_10_major += np.count_nonzero(np.isin(predicted_basket[:10], ground_truth)) / 10
            precision_20_major += np.count_nonzero(np.isin(predicted_basket[:20], ground_truth)) / 20
    
    #take average for each metric over all users
    recall_10_minor = recall_10_minor / minority_size
    recall_20_minor = recall_20_minor / minority_size
    precision_10_minor = precision_10_minor / minority_size
    precision_20_minor = precision_20_minor / minority_size
    recall_10_major = recall_10_major / majority_size
    recall_20_major = recall_20_major / majority_size
    precision_10_major = precision_10_major / majority_size
    precision_20_major = precision_20_major / majority_size
    
    #determine f-scores for each group @10 and 20
    f_score_10_minor = 2*(precision_10_minor*recall_10_minor)/(precision_10_minor+recall_10_minor)
    f_score_20_minor = 2*(precision_20_minor*recall_20_minor)/(precision_20_minor+recall_20_minor)
    f_score_10_major = 2*(precision_10_major*recall_10_major)/(precision_10_major+recall_10_major)
    f_score_20_major = 2*(precision_20_major*recall_20_major)/(precision_20_major+recall_20_major)
    
    #determine fairness metrics
    SPD[0] += f_score_10_minor - f_score_10_major
    SPD[1] += f_score_20_minor - f_score_20_major
    DI[0] += f_score_10_minor/f_score_10_major
    DI[1] += f_score_20_minor/f_score_20_major
    print("")
    
#print results
print("average scores over all prediction files:")
print("(SPD)f-score@10 difference: ", SPD[0]/5)
print("(SPD)f-score@20 difference: ", SPD[1]/5)
print("(DI) f-score@10: ", DI[0]/5)
print("(DI) f-score@20: ", DI[1]/5)

dataset:  dunnhumby
prediction file number:  0
minority user percentage for predictions:  0.050377274744784734

prediction file number:  1
minority user percentage for predictions:  0.05015534842432313

prediction file number:  2
minority user percentage for predictions:  0.05370616955170883

prediction file number:  3
minority user percentage for predictions:  0.050821127385707945

prediction file number:  4
minority user percentage for predictions:  0.0494895694629383

average scores over all prediction files:
(SPD)f-score@10 difference:  -0.03273933420128277
(SPD)f-score@20 difference:  -0.03544482990526627
(DI) f-score@10:  0.6089747651363522
(DI) f-score@20:  0.533635074989412
