## Apply FA*IR post-processing re-ranking (inspired by Zehlike et al.) to improve group fairness in top-K recommendations

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# === Step 1: Load training interactions to calculate item frequency ===
train_inter_df = pd.read_csv('../datasets/split_datasets/lastfm-nl/lastfm-nl.train.inter', sep='\t')

# Count item frequency in training data
item_freq = train_inter_df['artist_id:token'].value_counts()
item_freq.index = item_freq.index.astype(str)
print(f"Total unique items in training: {len(item_freq)}")

Total unique items in training: 32926


In [2]:
# Sort items by frequency (high to low)
item_freq_sorted = item_freq.sort_values(ascending=False)

# Define long-tail cutoff (e.g., bottom 20%)
tail_ratio = 0.2
tail_cutoff_index = int((1 - tail_ratio) * len(item_freq_sorted))
tail_item_ids = set(item_freq_sorted.index[tail_cutoff_index:])

print(f"Defined long-tail as bottom {tail_ratio*100:.0f}% of items")
print(f"Tail threshold frequency (≤): {item_freq_sorted.iloc[tail_cutoff_index]}")
print(f"Number of long-tail items: {len(tail_item_ids)}")

Defined long-tail as bottom 20% of items
Tail threshold frequency (≤): 1
Number of long-tail items: 6586


In [3]:
# Load top-50 recommendation results ===
topk_df = pd.read_csv('../rank_results/lastfm-nl/lastfm_top50.csv')  # Format: user_id, gender, topk_items， topk_scores
print(f"Loaded Top-50 recommendations for {len(topk_df)} users")

Loaded Top-50 recommendations for 8792 users


In [4]:
K = 10  # Only evaluate the top-10
total_tail_before = 0

for items in topk_df['topk_items']:
    item_list = str(items).split(',')[:K]  # only use top-10
    tail_count = sum(i in tail_item_ids for i in item_list)
    total_tail_before += tail_count

num_users = len(topk_df)
avg_tail_items = total_tail_before / num_users
tail_ratio = avg_tail_items / K

print("\n=== Long-Tail Ratio in Top-10 ===")
print(f"Average long-tail items per user: {avg_tail_items:.4f}")
print(f"Long-tail item ratio in Top-10: {tail_ratio:.2%}")


=== Long-Tail Ratio in Top-10 ===
Average long-tail items per user: 0.0656
Long-tail item ratio in Top-10: 0.66%


In [5]:
# === Step 4: Apply FA*IR-like re-ranking to increase long-tail item exposure ===
desired_tail_ratio = 0.1  # e.g., at least 1 item in Top-10 are from the tail
reranked_result = []
modified_users = 0
total_tail_after = 0

def is_tail(artist_id):
    return artist_id in tail_item_ids

for _, row in topk_df.iterrows():
    user_id = row['user_id']
    top_items = str(row['topk_items']).split(',')
    top10 = top_items[:K]
    
    # Case 1: already contains tail item — keep as-is
    if any(is_tail(i) for i in top10):
        reranked = top10
    else:
        # Case 2: find first tail item in top-30
        tail_candidate = next((i for i in top_items if is_tail(i) and i not in top10), None)
        if tail_candidate:
            reranked = top10[:-1] + [tail_candidate]  # Replace 10th
            modified_users += 1
        else:
            reranked = top10  # no tail found in top-30, leave unchanged        

    tail_count = sum(1 for i in reranked if is_tail(i))
    total_tail_after += tail_count

    total_tail_after += sum(i in tail_item_ids for i in reranked)

    reranked_result.append({
        'user_id': user_id,
        'gender': row['gender'],
        'topk_items': ','.join(reranked)
    })


In [6]:
fair_top10_df = pd.DataFrame(reranked_result)
display(fair_top10_df.head())
avg_tail = total_tail_after / len(fair_top10_df)

print("\n=== Minimal FA*IR Re-ranking Summary ===")
print(f"Total users processed: {len(fair_top10_df)}")
print(f"Users modified (tail added): {modified_users}")
print(f"Average long-tail items in Top-10: {avg_tail:.4f}")
print(f"Long-tail ratio in Top-10: {avg_tail / K:.2%}")

Unnamed: 0,user_id,gender,topk_items
0,0004de6c3c32daa599bd03a37ce2356d5e7cc23f,M,"31745282-b1ea-4d62-939f-226b14d68e7c,7fa7fc04-..."
1,0007e26aafcfc0b6dcb87d7041583fbb7cced88a,F,"cc197bad-dc9c-440d-a5b5-d52ba2e14234,b10bbbfc-..."
2,0009f70b52e48eca23a446d8a6f8bd2663691c54,F,"14b22b4b-06d5-4b82-8284-29d29b58945f,a505bb48-..."
3,001078f2f557a1afd9b9618144fc0b442481fd13,M,"cc197bad-dc9c-440d-a5b5-d52ba2e14234,b10bbbfc-..."
4,00292157f9c4e4b11464e886f27fff42f1ff442d,M,"9a709693-b4f8-4da9-8cc1-038c911a61be,14b22b4b-..."



=== Minimal FA*IR Re-ranking Summary ===
Total users processed: 8792
Users modified (tail added): 147
Average long-tail items in Top-10: 0.1647
Long-tail ratio in Top-10: 1.65%


In [7]:
# === Step 5: Save and report results ===
fair_top10_df.to_csv('../rank_results/lastfm-nl/lastfm_top10_fair.csv', index=False)
print("Saved re-ranked results to: ../rank_results/ml-1m/ml_top10_fair.csv")

Saved re-ranked results to: ../rank_results/ml-1m/ml_top10_fair.csv


# Apply post-processing calibration (inspired by Steck)to improve fairness in recommendation exposure

In [8]:
# === Step 1: Load user and interaction data ===
# User file should contain gender information
user_df = pd.read_csv('datasets/lastfm-nl/lastfm-nl.user', sep='\t')  # contains 'user_id:token', 'gender:token'
inter_df = pd.read_csv('../datasets/split_datasets/lastfm-nl/lastfm-nl.train.inter', sep='\t')  # contains 'user_id:token','artist_id:token' 'label:float'

print(f"Loaded {len(inter_df)} training interactions.")
print(f"Unique users: {inter_df['user_id:token'].nunique()}, Unique items: {inter_df['artist_id:token'].nunique()}")

Loaded 356414 training interactions.
Unique users: 8792, Unique items: 32926


In [9]:
# === Step 2: Merge user gender into interactions ===
inter_df = inter_df[inter_df['label:float'] == 1.0]
inter_df = inter_df.merge(user_df[['user_id:token', 'gender:token']], on='user_id:token', how='left')
display(inter_df.head())
print("\n✅ Gender successfully merged. Gender distribution:")
print(inter_df['gender:token'].value_counts())

Unnamed: 0,user_id:token,artist_id:token,label:float,gender:token
0,7db20cb306f5c6ee0a7da4b2eabbb12f80a4577d,c7e90641-f441-4801-8e4a-d09e10f452b8,1.0,M
1,7db20cb306f5c6ee0a7da4b2eabbb12f80a4577d,41489644-58f8-47e7-a581-e24d5659baeb,1.0,M
2,7db20cb306f5c6ee0a7da4b2eabbb12f80a4577d,609e7afd-3552-4102-9501-7611858ea320,1.0,M
3,7db20cb306f5c6ee0a7da4b2eabbb12f80a4577d,5251b5a0-3e3b-4d07-a152-585009575310,1.0,M
4,7db20cb306f5c6ee0a7da4b2eabbb12f80a4577d,5f6ab597-f57a-40da-be9e-adad48708203,1.0,M



✅ Gender successfully merged. Gender distribution:
gender:token
M    285760
F     68880
Name: count, dtype: int64


In [10]:
# Step 4: Count number of positive interactions per item for each gender
item_gender_count = inter_df.groupby(['artist_id:token', 'gender:token']).size().unstack(fill_value=0)
display(item_gender_count.head())

gender:token,F,M
artist_id:token,Unnamed: 1_level_1,Unnamed: 2_level_1
0001cd84-2a11-4699-8d6b-0abf969c5f06,0,3
00034ede-a1f1-4219-be39-02f36853373e,0,2
0004537a-4b12-43eb-a023-04009e738d2e,0,7
00048fb7-31f0-40e5-b240-ae8174d59147,3,4
0006c824-595a-45af-9374-238ce585fa3a,0,2


In [11]:
# Step 5: Normalize female/male interactions by global group size
global_female_total = inter_df[inter_df['gender:token'] == 'F'].shape[0]
global_male_total = inter_df[inter_df['gender:token'] == 'M'].shape[0]

# Normalize to relative exposure per gender group
item_gender_count['F_norm'] = item_gender_count['F'] / global_female_total
item_gender_count['M_norm'] = item_gender_count['M'] / global_male_total

# Compute female_bias_ratio: how much female > male normalized preference
item_gender_count['female_bias_ratio'] = item_gender_count['F_norm'] / (item_gender_count['F_norm'] + item_gender_count['M_norm'])
display(item_gender_count.head())

gender:token,F,M,F_norm,M_norm,female_bias_ratio
artist_id:token,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0001cd84-2a11-4699-8d6b-0abf969c5f06,0,3,0.0,1e-05,0.0
00034ede-a1f1-4219-be39-02f36853373e,0,2,0.0,7e-06,0.0
0004537a-4b12-43eb-a023-04009e738d2e,0,7,0.0,2.4e-05,0.0
00048fb7-31f0-40e5-b240-ae8174d59147,3,4,4.4e-05,1.4e-05,0.75678
0006c824-595a-45af-9374-238ce585fa3a,0,2,0.0,7e-06,0.0


In [12]:
# Classification logic: now based on relative bias after normalization
def classify_bias(ratio):
    if ratio >= 0.68:
        return 'female'
    elif ratio <= 0.33:
        return 'male'
    else:
        return 'neutral'

item_gender_count['group'] = item_gender_count['female_bias_ratio'].apply(classify_bias)
display(item_gender_count.head())

gender:token,F,M,F_norm,M_norm,female_bias_ratio,group
artist_id:token,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
0001cd84-2a11-4699-8d6b-0abf969c5f06,0,3,0.0,1e-05,0.0,male
00034ede-a1f1-4219-be39-02f36853373e,0,2,0.0,7e-06,0.0,male
0004537a-4b12-43eb-a023-04009e738d2e,0,7,0.0,2.4e-05,0.0,male
00048fb7-31f0-40e5-b240-ae8174d59147,3,4,4.4e-05,1.4e-05,0.75678,female
0006c824-595a-45af-9374-238ce585fa3a,0,2,0.0,7e-06,0.0,male


In [13]:
import pandas as pd
from collections import Counter, defaultdict

# === Step 1: Load data ===
topk_df = pd.read_csv('../rank_results/lastfm-nl/lastfm_top50.csv')  # user_id, gender, topk_items，topk_scores

In [14]:
# === Step 2: Prepare item2group mapping ===
item2group = item_gender_count['group'].to_dict()
group_counts = Counter(item2group.values())
total_items = sum(group_counts.values())
print("=== Item Gender Group Distribution ===")
for group, count in group_counts.items():
    ratio = count / total_items
    print(f"{group.capitalize():<8} → {count:>5} items ({ratio:.3%})")

=== Item Gender Group Distribution ===
Male     → 21506 items (65.370%)
Female   →  5869 items (17.839%)
Neutral  →  5524 items (16.791%)


In [15]:
K = 10
all_counter = Counter()
female_counter = Counter()
male_counter = Counter()
total_female_users = 0
total_male_users = 0

for _, row in topk_df.iterrows():
    uid = row['user_id']
    gender = row['gender']
    items = str(row['topk_items']).split(',')[:K]
    groups = [item2group.get(i) for i in items]

    all_counter.update(groups)
    
    if gender == 'F':
        female_counter.update(groups)
        total_female_users += 1
    elif gender == 'M':
        male_counter.update(groups)
        total_male_users += 1

In [16]:
def normalize(counter, total_users, k):
    total_recommendations = total_users * k
    return {g: counter[g] / total_recommendations for g in ['female', 'male', 'neutral']}

all_current = normalize(all_counter, len(topk_df), K)
female_current = normalize(female_counter, total_female_users, K)
male_current = normalize(male_counter, total_male_users, K)

print("=== Current Exposure (All Users) ===")
print(all_current)

print("\n=== Current Exposure (Female Users) ===")
print(female_current)

print("\n=== Current Exposure (Male Users) ===")
print(male_current)

=== Current Exposure (All Users) ===
{'female': 0.042641037306642406, 'male': 0.17349863512283895, 'neutral': 0.7838489535941765}

=== Current Exposure (Female Users) ===
{'female': 0.10080784766301212, 'male': 0.05499134448932487, 'neutral': 0.844200807847663}

=== Current Exposure (Male Users) ===
{'female': 0.028360957642725598, 'male': 0.20259243518912026, 'neutral': 0.7690324408556453}


In [17]:
# === Step 3: Set gender-specific exposure targets ===
female_target = {'female': 0.3, 'male': 0.1, 'neutral': 0.6}
male_target = {'female': 0.1, 'male': 0.3, 'neutral': 0.6}

In [18]:
K = 10
calibrated_result = []
modified_users = 0

# === Step 4: Re-rank with gender-based calibration ===
for _, row in topk_df.iterrows():
    user_id = row['user_id']
    top_items = str(row['topk_items']).split(',')
    gender = row['gender']

    # Choose target based on user gender
    if gender == 'M':
        target_distribution = male_target
    else:
        target_distribution = female_target

    # Compute desired number of items per group
    group_target_count = {g: int(K * r) for g, r in target_distribution.items()}

    selected_items = []
    group_count = defaultdict(int)

    # Traverse top-50 in original order
    for item in top_items:
        g = item2group.get(item, 'neutral')  # Default to 'neutral' if not found
        if group_count[g] < group_target_count[g]:
            selected_items.append(item)
            group_count[g] += 1
        if len(selected_items) == K:
            break

    # Fill remaining with original order if needed
    if len(selected_items) < K:
        for item in top_items:
            if item not in selected_items:
                selected_items.append(item)
            if len(selected_items) == K:
                break

    if selected_items != top_items[:K]:
        modified_users += 1

    calibrated_result.append({
        'user_id': user_id,
        'gender': gender,
        'topk_items': ','.join(selected_items)
    })

   

In [19]:
# === Step 5: Report summary ===
fair_top10_df = pd.DataFrame(calibrated_result)
display(fair_top10_df.head())
print("\n✅ Gender-aware Steck-style calibration complete")
print(f"Total users: {len(fair_top10_df)}")
print(f"Users re-ranked (modified): {modified_users}") 

Unnamed: 0,user_id,gender,topk_items
0,0004de6c3c32daa599bd03a37ce2356d5e7cc23f,M,"31745282-b1ea-4d62-939f-226b14d68e7c,7fa7fc04-..."
1,0007e26aafcfc0b6dcb87d7041583fbb7cced88a,F,"cc197bad-dc9c-440d-a5b5-d52ba2e14234,b10bbbfc-..."
2,0009f70b52e48eca23a446d8a6f8bd2663691c54,F,"14b22b4b-06d5-4b82-8284-29d29b58945f,a505bb48-..."
3,001078f2f557a1afd9b9618144fc0b442481fd13,M,"cc197bad-dc9c-440d-a5b5-d52ba2e14234,b10bbbfc-..."
4,00292157f9c4e4b11464e886f27fff42f1ff442d,M,"9a709693-b4f8-4da9-8cc1-038c911a61be,14b22b4b-..."



✅ Gender-aware Steck-style calibration complete
Total users: 8792
Users re-ranked (modified): 8205


In [20]:
fair_top10_df.to_csv('../rank_results/lastfm-nl/lastfm_top10_calibrated.csv', index=False)
print("Saved re-ranked results to: ../rank_results/lastfm-nl/lastfm_top10_calibrated.csv")

Saved re-ranked results to: ../rank_results/lastfm-nl/lastfm_top10_calibrated.csv


# Apply Biega et al. (2018) Equity of Attention post-processing for fair exposure

In [21]:
import pandas as pd
import numpy as np
from collections import Counter, defaultdict

# === Step 1: Load data ===
topk_df = pd.read_csv('../rank_results/lastfm-nl/lastfm_top50.csv')  # user_id,gender, topk_items，topk_scores
# Convert comma-separated strings to proper lists
topk_df['topk_items'] = topk_df['topk_items'].astype(str).str.split(',')
topk_df['topk_scores'] = topk_df['topk_scores'].astype(str).str.split(',').apply(lambda x: [float(s) for s in x])

In [22]:
print(topk_df[['topk_items', 'topk_scores']].head())
print(topk_df['topk_items'].apply(len).value_counts().head())

                                          topk_items  \
0  [31745282-b1ea-4d62-939f-226b14d68e7c, 7fa7fc0...   
1  [cc197bad-dc9c-440d-a5b5-d52ba2e14234, b10bbbf...   
2  [14b22b4b-06d5-4b82-8284-29d29b58945f, a505bb4...   
3  [cc197bad-dc9c-440d-a5b5-d52ba2e14234, b10bbbf...   
4  [9a709693-b4f8-4da9-8cc1-038c911a61be, 14b22b4...   

                                         topk_scores  
0  [8.1162, 8.0042, 7.8219, 7.7762, 7.7741, 7.610...  
1  [10.2432, 9.7123, 9.6197, 9.4391, 9.0732, 9.00...  
2  [8.0561, 7.9366, 7.5794, 7.5463, 7.3653, 7.300...  
3  [10.5511, 10.355, 10.0145, 9.8123, 9.7214, 9.5...  
4  [7.7429, 7.6171, 7.5318, 7.5233, 7.4354, 7.339...  
topk_items
50    8792
Name: count, dtype: int64


In [23]:
# === Step 2: Build historical exposure count ===
# Flatten all recommended items and count their occurrences
all_items = topk_df['topk_items'].explode().tolist()
historical_exposure = Counter(all_items)  # {item_id: frequency in top-Ks}
print(dict(list(historical_exposure.items())[:10]))

{'31745282-b1ea-4d62-939f-226b14d68e7c': 1025, '7fa7fc04-1011-4876-8095-ecd232edea87': 517, '5b687684-ad34-4a9f-b425-0e7aa81fbd38': 549, '319b1175-ced9-438f-986b-9239c3edd92d': 698, 'f57e14e4-b030-467c-b202-539453f504ec': 741, 'e631bb92-3e2b-43e3-a2cb-b605e2fb53bd': 634, '00a9f935-ba93-4fc8-a33a-993abe9c936b': 790, '17167af8-c1da-45cc-bba2-9d23f068b7a3': 550, 'efaefde1-e09b-4d49-9d8e-b1304d2ece8d': 343, '8000598a-5edb-401c-8e6d-36b167feaf38': 328}


In [24]:
# === Step 3: Build relevance lookup table ===
# Create a nested dict: relevance_scores[user_id][item_id] = score
relevance_scores = {}
for _, row in topk_df.iterrows():
    user_id = row['user_id']
    items = row['topk_items']
    scores = row['topk_scores']
    relevance_scores[user_id] = dict(zip(items, scores))

In [25]:
def re_rank_equity_of_attention(user_id, candidates, relevance_scores, historical_exposure, topk=10, lambda_tradeoff=0.5):
    """
    Re-rank top-K items for a user by balancing relevance and fairness (attention equity).

    Parameters:
        user_id (int): The ID of the user.
        candidates (list): The original top-K recommended items.
        relevance_scores (dict of dict): User-item relevance scores.
        historical_exposure (dict): Item exposure frequency across all users.
        K (int): Number of items to re-rank.
        lambda_tradeoff (float): Trade-off between relevance and fairness.

    Returns:
        list: Re-ranked list of K items.
    """
    selected = []
    candidate_pool = set(candidates)

    for _ in range(topk):
        best_item = None
        best_score = -float('inf')

        for item in candidate_pool:
            relevance = relevance_scores[user_id].get(item, 0)
            exposure = historical_exposure.get(item, 0) + 1  # Add 1 to avoid division by zero
            score = relevance - lambda_tradeoff * np.log(exposure)  # Fairness penalty via log

            if score > best_score:
                best_score = score
                best_item = item

        selected.append(best_item)
        candidate_pool.remove(best_item)
        historical_exposure[best_item] += 1  # Update exposure after selection

    return selected

In [26]:
# === Step 6: Apply to all users ===
equity_result = []
modified_users = 0

for idx, row in topk_df.iterrows():
    user_id = row['user_id']  
    top_items = row['topk_items']
    re_ranked = re_rank_equity_of_attention(user_id, top_items, relevance_scores, historical_exposure, topk=10, lambda_tradeoff=0.3)
    if re_ranked != top_items[:10]:
        modified_users += 1

    equity_result.append({
        'user_id': user_id,
        'gender': row['gender'],
        'topk_items': ','.join(map(str, re_ranked))  # 转为逗号分隔字符串
    })

In [27]:
# === Step 7: Save result ===
equity_top10_df = pd.DataFrame(equity_result)
display(equity_top10_df.head())

print("\n✅  Equity-aware Biega-style rerank complete")
print(f"Total users: {len(fair_top10_df)}")
print(f"Users re-ranked (modified): {modified_users}") 

Unnamed: 0,user_id,gender,topk_items
0,0004de6c3c32daa599bd03a37ce2356d5e7cc23f,M,"7fa7fc04-1011-4876-8095-ecd232edea87,31745282-..."
1,0007e26aafcfc0b6dcb87d7041583fbb7cced88a,F,"cc197bad-dc9c-440d-a5b5-d52ba2e14234,b10bbbfc-..."
2,0009f70b52e48eca23a446d8a6f8bd2663691c54,F,"a505bb48-ad65-4af4-ae47-29149715bff9,14b22b4b-..."
3,001078f2f557a1afd9b9618144fc0b442481fd13,M,"cc197bad-dc9c-440d-a5b5-d52ba2e14234,b10bbbfc-..."
4,00292157f9c4e4b11464e886f27fff42f1ff442d,M,"9a709693-b4f8-4da9-8cc1-038c911a61be,9ddce51c-..."



✅  Equity-aware Biega-style rerank complete
Total users: 8792
Users re-ranked (modified): 8736


In [28]:
equity_top10_df.to_csv('../rank_results/lastfm-nl/lastfm_top10_equity.csv', index=False)
print("✅ Fair top-10 re-ranking complete and saved to 'lastfm_top10_equity.csv'")

✅ Fair top-10 re-ranking complete and saved to 'lastfm_top10_equity.csv'
