## Apply FA*IR post-processing re-ranking (inspired by Zehlike et al.) to improve group fairness in top-K recommendations

In [86]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# === Step 1: Load training interactions to calculate item frequency ===
train_inter_df = pd.read_csv('../datasets/split_datasets/ml-1m/ml-1m.train.inter', sep='\t')

# Count item frequency in training data
item_freq = train_inter_df['item_id:token'].value_counts()
item_freq.index = item_freq.index.astype(str)
print(f"Total unique items in training: {len(item_freq)}")

Total unique items in training: 3683


In [87]:
# Sort items by frequency (high to low)
item_freq_sorted = item_freq.sort_values(ascending=False)

# Define long-tail cutoff (e.g., bottom 20%)
tail_ratio = 0.2
tail_cutoff_index = int((1 - tail_ratio) * len(item_freq_sorted))
tail_item_ids = set(item_freq_sorted.index[tail_cutoff_index:])

print(f"Defined long-tail as bottom {tail_ratio*100:.0f}% of items")
print(f"Tail threshold frequency (‚â§): {item_freq_sorted.iloc[tail_cutoff_index]}")
print(f"Number of long-tail items: {len(tail_item_ids)}")

Defined long-tail as bottom 20% of items
Tail threshold frequency (‚â§): 19
Number of long-tail items: 737


In [88]:
# Load top-50 recommendation results ===
topk_df = pd.read_csv('../rank_results/ml-1m/ml_top50.csv')  # Format: user_id, gender, topk_itemsÔºå topk_scores
user_df = pd.read_csv('../datasets/atomic_datasets/ml-1m/ml-1m.user', sep='\t')
user2gender = dict(zip(user_df['user_id:token'], user_df['gender:token']))

print(f"Loaded Top-50 recommendations for {len(topk_df)} users")

Loaded Top-50 recommendations for 6040 users


In [89]:
K = 10  # Only evaluate the top-10
total_tail_before = 0

for items in topk_df['topk_items']:
    item_list = str(items).split(',')[:K]  # only use top-10
    tail_count = sum(i in tail_item_ids for i in item_list)
    total_tail_before += tail_count

num_users = len(topk_df)
avg_tail_items = total_tail_before / num_users
tail_ratio = avg_tail_items / K

print("\n=== Long-Tail Ratio in Top-10 ===")
print(f"Average long-tail items per user: {avg_tail_items:.4f}")
print(f"Long-tail item ratio in Top-10: {tail_ratio:.2%}")


=== Long-Tail Ratio in Top-10 ===
Average long-tail items per user: 0.0025
Long-tail item ratio in Top-10: 0.02%


In [90]:
# === Step 4: Apply FA*IR-like re-ranking to increase long-tail item exposure ===
desired_tail_ratio = 0.1  # e.g., at least 1 item in Top-10 are from the tail
reranked_result = []
modified_users = 0
total_tail_after = 0

def is_tail(item_id):
    return item_id in tail_item_ids

for _, row in topk_df.iterrows():
    user_id = row['user_id']
    top_items = str(row['topk_items']).split(',')
    top10 = top_items[:K]
    
    # Case 1: already contains tail item ‚Äî keep as-is
    if any(is_tail(i) for i in top10):
        reranked = top10
    else:
        # Case 2: find first tail item in top-30
        tail_candidate = next((i for i in top_items if is_tail(i) and i not in top10), None)
        if tail_candidate:
            reranked = top10[:-1] + [tail_candidate]  # Replace 10th
            modified_users += 1
        else:
            reranked = top10  # no tail found in top-30, leave unchanged        

    tail_count = sum(1 for i in reranked if is_tail(i))
    total_tail_after += tail_count

    total_tail_after += sum(i in tail_item_ids for i in reranked)

    reranked_result.append({
        'user_id': user_id,
        'gender': user2gender.get(user_id, 'UNK'),
        'topk_items': ','.join(reranked)
    })


In [91]:
fair_top10_df = pd.DataFrame(reranked_result)
display(fair_top10_df.head())
avg_tail = total_tail_after / len(fair_top10_df)

print("\n=== Minimal FA*IR Re-ranking Summary ===")
print(f"Total users processed: {len(fair_top10_df)}")
print(f"Users modified (tail added): {modified_users}")
print(f"Average long-tail items in Top-10: {avg_tail:.4f}")
print(f"Long-tail ratio in Top-10: {avg_tail / K:.2%}")

Unnamed: 0,user_id,gender,topk_items
0,1,F,59534364919158820813114318594
1,2,M,20281183590341852716103183496081393
2,3,M,121026011961198127048035615802716110
3,4,M,119626012101198858121412404805412028
4,5,M,299726922908285823332599295922323952318



=== Minimal FA*IR Re-ranking Summary ===
Total users processed: 6040
Users modified (tail added): 20
Average long-tail items in Top-10: 0.0116
Long-tail ratio in Top-10: 0.12%


In [92]:
# === Step 5: Save and report results ===
fair_top10_df.to_csv('../rank_results/ml-1m/ml_top10_fair.csv', index=False)
print("Saved re-ranked results to: ../rank_results/ml-1m/ml_top10_fair.csv")

Saved re-ranked results to: ../rank_results/ml-1m/ml_top10_fair.csv


# Apply post-processing calibration (inspired by Steck)to improve fairness in recommendation exposure

In [93]:
# === Step 1: Load user and interaction data ===
# User file should contain gender information
user_df = pd.read_csv('../datasets/atomic_datasets/ml-1m/ml-1m.user', sep='\t')  # contains 'user_id:token', 'gender:token'
inter_df = pd.read_csv('../datasets/split_datasets/ml-1m/ml-1m.train.inter', sep='\t')  # contains 'user_id:token', 'item_id:token'

print(f"Loaded {len(inter_df)} training interactions.")
print(f"Unique users: {inter_df['user_id:token'].nunique()}, Unique items: {inter_df['item_id:token'].nunique()}")

Loaded 805443 training interactions.
Unique users: 6040, Unique items: 3683


In [94]:
# === Step 2: Merge user gender into interactions ===
inter_df = inter_df[inter_df['label:float'] == 1.0]
inter_df = inter_df.merge(user_df[['user_id:token', 'gender:token']], on='user_id:token', how='left')
display(inter_df.head())
print("\n‚úÖ Gender successfully merged. Gender distribution:")
print(inter_df['gender:token'].value_counts())

Unnamed: 0,user_id:token,item_id:token,timestamp:float,label:float,gender:token
0,1791,3949,974700700.0,1.0,M
1,1791,1084,974702340.0,1.0,M
2,1791,3897,974701200.0,1.0,M
3,1791,3741,974702400.0,1.0,M
4,1791,39,974701400.0,1.0,M



‚úÖ Gender successfully merged. Gender distribution:
gender:token
M    505428
F    168129
Name: count, dtype: int64


In [95]:
# Step 4: Count number of positive interactions per item for each gender
item_gender_count = inter_df.groupby(['item_id:token', 'gender:token']).size().unstack(fill_value=0)
display(item_gender_count.head())

gender:token,F,M
item_id:token,Unnamed: 1_level_1,Unnamed: 2_level_1
1,457,1135
2,115,307
3,82,182
4,49,39
5,74,101


In [96]:
# Step 5: Normalize female/male interactions by global group size
global_female_total = inter_df[inter_df['gender:token'] == 'F'].shape[0]
global_male_total = inter_df[inter_df['gender:token'] == 'M'].shape[0]

# Normalize to relative exposure per gender group
item_gender_count['F_norm'] = item_gender_count['F'] / global_female_total
item_gender_count['M_norm'] = item_gender_count['M'] / global_male_total

# Compute female_bias_ratio: how much female > male normalized preference
item_gender_count['female_bias_ratio'] = item_gender_count['F_norm'] / (item_gender_count['F_norm'] + item_gender_count['M_norm'])
display(item_gender_count.head())

gender:token,F,M,F_norm,M_norm,female_bias_ratio
item_id:token,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1,457,1135,0.002718,0.002246,0.547598
2,115,307,0.000684,0.000607,0.529655
3,82,182,0.000488,0.00036,0.57527
4,49,39,0.000291,7.7e-05,0.790664
5,74,101,0.00044,0.0002,0.687749


In [97]:
# Classification logic: now based on relative bias after normalization
def classify_bias(ratio):
    if ratio >= 0.68:
        return 'female'
    elif ratio <= 0.33:
        return 'male'
    else:
        return 'neutral'

item_gender_count['group'] = item_gender_count['female_bias_ratio'].apply(classify_bias)
display(item_gender_count.head())

gender:token,F,M,F_norm,M_norm,female_bias_ratio,group
item_id:token,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1,457,1135,0.002718,0.002246,0.547598,neutral
2,115,307,0.000684,0.000607,0.529655,neutral
3,82,182,0.000488,0.00036,0.57527,neutral
4,49,39,0.000291,7.7e-05,0.790664,female
5,74,101,0.00044,0.0002,0.687749,female


In [98]:
import pandas as pd
from collections import defaultdict
from collections import Counter

# === Step 1: Load data ===
topk_df = pd.read_csv('../rank_results/ml-1m/ml_top50.csv')  # user_id, topk_itemsÔºåtopk_scores
user_df = pd.read_csv('../datasets/atomic_datasets/ml-1m/ml-1m.user', sep='\t')
user2gender = dict(zip(user_df['user_id:token'], user_df['gender:token']))

In [99]:
# === Step 2: Prepare item2group mapping ===
item2group = item_gender_count['group'].to_dict()
group_counts = Counter(item2group.values())
total_items = sum(group_counts.values())
print("=== Item Gender Group Distribution ===")
for group, count in group_counts.items():
    ratio = count / total_items
    print(f"{group.capitalize():<8} ‚Üí {count:>5} items ({ratio:.3%})")

=== Item Gender Group Distribution ===
Neutral  ‚Üí  2313 items (64.197%)
Female   ‚Üí   558 items (15.487%)
Male     ‚Üí   732 items (20.316%)


In [100]:
# === Step 3: Set gender-specific exposure targets ===
female_target = {'female': 0.3, 'male': 0.1, 'neutral': 0.6}
male_target = {'female': 0.1, 'male': 0.4, 'neutral': 0.5}
default_target = {'female': 0.16, 'male': 0.20, 'neutral': 0.64}

In [101]:
K = 10
calibrated_result = []
modified_users = 0

# === Step 4: Re-rank with gender-based calibration ===
for _, row in topk_df.iterrows():
    user_id = row['user_id']
    top_items = str(row['topk_items']).split(',')
    gender = user2gender.get(user_id, 'UNK')

    # Choose target based on user gender
    if gender == 'F':
        target_distribution = female_target
    elif gender == 'M':
        target_distribution = male_target
    else:
        target_distribution = default_target

    # Compute desired number of items per group
    group_target_count = {g: int(K * r) for g, r in target_distribution.items()}

    selected_items = []
    group_count = defaultdict(int)

    # Traverse top-50 in original order
    for item in top_items:
        g = item2group.get(int(item), 'neutral')
        if group_count[g] < group_target_count[g]:
            selected_items.append(item)
            group_count[g] += 1
        if len(selected_items) == K:
            break

    # Fill remaining with original order if needed
    if len(selected_items) < K:
        for item in top_items:
            if item not in selected_items:
                selected_items.append(item)
            if len(selected_items) == K:
                break

    if selected_items != top_items[:K]:
        modified_users += 1

    calibrated_result.append({
        'user_id': user_id,
        'gender': gender,
        'topk_items': ','.join(selected_items)
    })

   

In [102]:
# === Step 5: Report summary ===
fair_top10_df = pd.DataFrame(calibrated_result)
display(fair_top10_df.head())
print("\n‚úÖ Gender-aware Steck-style calibration complete")
print(f"Total users: {len(fair_top10_df)}")
print(f"Users re-ranked (modified): {modified_users}") 

Unnamed: 0,user_id,gender,topk_items
0,1,F,595343649191588485319142081
1,2,M,202811835903418527123336541610318349
2,3,M,1210260119611981270122248035615802716
3,4,M,1196260121011988581222370312141240480
4,5,M,299726922908285823331041259929592232395



‚úÖ Gender-aware Steck-style calibration complete
Total users: 6040
Users re-ranked (modified): 4096


In [103]:
fair_top10_df.to_csv('../rank_results/ml-1m/ml_top10_calibrated.csv', index=False)
print("Saved re-ranked results to: ../rank_results/ml-1m/ml_all_user_top10_calibrated.csv")

Saved re-ranked results to: ../rank_results/ml-1m/ml_all_user_top10_calibrated.csv


# Apply Biega et al. (2018) Equity of Attention post-processing for fair exposure

In [116]:
import pandas as pd
import numpy as np
from collections import Counter, defaultdict

# === Step 1: Load data ===
topk_df = pd.read_csv('../rank_results/ml-1m/ml_top50.csv')  # user_id,gender, topk_itemsÔºåtopk_scores
# Convert string-formatted lists into actual lists
topk_df['topk_items'] = topk_df['topk_items'].apply(eval)
topk_df['topk_scores'] = topk_df['topk_scores'].apply(eval)

In [117]:
# === Step 2: Build historical exposure count ===
# Flatten all recommended items and count their occurrences
all_items = topk_df['topk_items'].explode().tolist()
historical_exposure = Counter(all_items)  # {item_id: frequency in top-Ks}
print(dict(list(historical_exposure.items())[:10]))

{595: 477, 34: 1813, 364: 369, 919: 1266, 1: 2223, 588: 589, 2081: 460, 3114: 1302, 318: 2341, 594: 261}


In [118]:
# === Step 3: Build relevance lookup table ===
# Create a nested dict: relevance_scores[user_id][item_id] = score
relevance_scores = {}
for _, row in topk_df.iterrows():
    user_id = row['user_id']
    items = row['topk_items']
    scores = row['topk_scores']
    relevance_scores[user_id] = {item: score for item, score in zip(items, scores)}

In [121]:
def re_rank_equity_of_attention(user_id, candidates, relevance_scores, historical_exposure, topk=10, lambda_tradeoff=0.5):
    """
    Re-rank top-K items for a user by balancing relevance and fairness (attention equity).

    Parameters:
        user_id (int): The ID of the user.
        candidates (list): The original top-K recommended items.
        relevance_scores (dict of dict): User-item relevance scores.
        historical_exposure (dict): Item exposure frequency across all users.
        K (int): Number of items to re-rank.
        lambda_tradeoff (float): Trade-off between relevance and fairness.

    Returns:
        list: Re-ranked list of K items.
    """
    selected = []
    candidate_pool = set(candidates)

    for _ in range(topk):
        best_item = None
        best_score = -float('inf')

        for item in candidate_pool:
            relevance = relevance_scores[user_id].get(item, 0)
            exposure = historical_exposure.get(item, 0) + 1  # Add 1 to avoid division by zero
            score = relevance - lambda_tradeoff * np.log(exposure)  # Fairness penalty via log

            if score > best_score:
                best_score = score
                best_item = item

        selected.append(best_item)
        candidate_pool.remove(best_item)
        historical_exposure[best_item] += 1  # Update exposure after selection

    return selected

In [122]:
# === Step 6: Apply to all users ===
equity_result = []

for idx, row in topk_df.iterrows():
    user_id = row['user_id']
    gender = user2gender.get(user_id, 'UNK')  # Ëé∑ÂèñÊÄßÂà´ÔºåËã•Êó†ÂàôÊ†áËÆ∞‰∏∫ UNK
    top_items = row['topk_items']
    re_ranked = re_rank_equity_of_attention(user_id, top_items, relevance_scores, historical_exposure, topk=10, lambda_tradeoff=0.3)

    equity_result.append({
        'user_id': user_id,
        'gender': gender,
        'topk_items': ','.join(map(str, re_ranked))  # ËΩ¨‰∏∫ÈÄóÂè∑ÂàÜÈöîÂ≠óÁ¨¶‰∏≤
    })

In [123]:
# === Step 7: Save result ===
equity_top10_df = pd.DataFrame(equity_result)
display(equity_top10_df.head())

Unnamed: 0,user_id,gender,topk_items
0,1,F,59536453159434128220815881035919
1,2,M,1616471511408350118359034183492501
2,3,M,20001210260119659210361291122021741198
3,4,M,119626012101198121485812405419241200
4,5,M,23372332318233329081916260026925621885


In [124]:
equity_top10_df.to_csv('../rank_results/ml-1m/ml_top10_equity.csv', index=False)
print("‚úÖ Fair top-10 re-ranking complete and saved to 'ml_top10_equity.csv'")

‚úÖ Fair top-10 re-ranking complete and saved to 'ml_top10_equity.csv'


In [125]:
# === Step 8: Compare with original top-10 to count modified users ===
topk_df['original_top10'] = topk_df['topk_items'].apply(lambda x: x[:10])
equity_top10_df['original_top10'] = topk_df['original_top10']

equity_top10_df['modified'] = equity_top10_df.apply(lambda row: row['topk_items']!= row['original_top10'], axis=1)
display(equity_top10_df.head())
num_modified_users = equity_top10_df['modified'].sum()

print(f"üîÑ Users re-ranked (changed top-10): {num_modified_users} out of {len(equity_top10_df)} users")

Unnamed: 0,user_id,gender,topk_items,original_top10,modified
0,1,F,59536453159434128220815881035919,"(595, 34, 364, 919, 1, 588, 2081, 3114, 318, 594)",True
1,2,M,1616471511408350118359034183492501,"(2028, 1183, 590, 3418, 527, 1610, 318, 349, 6...",True
2,3,M,20001210260119659210361291122021741198,"(1210, 260, 1196, 1198, 1270, 480, 356, 1580, ...",True
3,4,M,119626012101198121485812405419241200,"(1196, 260, 1210, 1198, 858, 1214, 1240, 480, ...",True
4,5,M,23372332318233329081916260026925621885,"(2997, 2692, 2908, 2858, 2333, 2599, 2959, 223...",True


üîÑ Users re-ranked (changed top-10): 6040 out of 6040 users
