In [29]:
# ml-1m

In [27]:
# Apply fairness-aware data re-label (inspired by Kamiran et al.) on ml-1m dataset for RecBole
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

sns.set(style="whitegrid")
plt.rcParams['figure.figsize'] = (10, 6)

# 1. Load user profile, label interactions and original atomic interactions
user_df = pd.read_csv('../datasets/atomic_datasets/ml-1m/ml-1m.user', sep='\t')  # user_id:token, gender, age, etc.
inter_df_label = pd.read_csv('../datasets/split_datasets/ml-1m/ml-1m.train.inter', sep='\t')  # user_id:token, item_id:token, timestamp, label
inter_df_atomic = pd.read_csv('../datasets/atomic_datasets/ml-1m/ml-1m.inter', sep='\t')  # user_id:token, item_id:token, rating, timestamp

# 2. Merge rating into train.inter
inter_df = inter_df_label.merge(
    inter_df_atomic[['user_id:token', 'item_id:token', 'rating:float']],
    how='left',
    left_on=['user_id:token', 'item_id:token'],
    right_on=['user_id:token', 'item_id:token']
)

# 3. Merge gender info
merged_df = inter_df.merge(
    user_df[['user_id:token', 'gender:token']],
    on='user_id:token',
    how='left'
)

merged_df.rename(columns={'gender:token': 'gender'}, inplace=True)

display(merged_df.head())

Unnamed: 0,user_id:token,item_id:token,timestamp:float,label:float,rating:float,gender
0,1791,3949,974700700.0,1.0,4,M
1,1791,1084,974702340.0,1.0,3,M
2,1791,1805,974761150.0,0.0,2,M
3,1791,3897,974701200.0,1.0,4,M
4,1791,3741,974702400.0,1.0,4,M


In [28]:
# Count total users per gender (based on unique user-gender pairs)
user_gender_map = merged_df[['user_id:token', 'gender']].drop_duplicates()
gender_total = user_gender_map['gender'].value_counts().to_dict()
display(gender_total)

{'M': 4331, 'F': 1709}

In [29]:
# Total label=1 count per gender
print(f"Total interactions: {len(merged_df)}")
label_counts = merged_df['gender'].value_counts().to_dict()
display(label_counts)

# Compute global target ratio
target_ratio = merged_df['label:float'].mean()
print(f"\nGlobal target positive ratio (label==1): {target_ratio:.4f}")

Total interactions: 805443


{'M': 606823, 'F': 198620}


Global target positive ratio (label==1): 0.8363


In [30]:
label_1_counts = merged_df[merged_df['label:float'] == 1]['gender'].value_counts().to_dict()
current_male_1 = label_1_counts.get('M', 0)
current_female_1 = label_1_counts.get('F', 0)
# Compute target positive counts per gender group
target_male_1 = int(target_ratio * label_counts.get('M', 1))
target_female_1 = int(target_ratio * label_counts.get('F', 1))

flip_male_0_to_1 = max(0, target_male_1 - current_male_1)
flip_female_1_to_0 = max(0, current_female_1 - target_female_1)
print(f"  Target male label=1 count: {target_male_1}, current: {current_male_1}, {flip_male_0_to_1} to flip")
print(f"  Target female label=1 count: {target_female_1}, current: {current_female_1}, {flip_female_1_to_0} to flip")

  Target male label=1 count: 507459, current: 505428, 2031 to flip
  Target female label=1 count: 166097, current: 168129, 2032 to flip


In [31]:
# Apply relabeling
# Flip female label=1 → 0
def flip_female_1_to_0(n):
    flipped = pd.Index([])
    for rating in [3, 4, 5]:
        candidates = merged_df[
            (merged_df['gender'] == 'F') &
            (merged_df['label:float'] == 1) &
            (merged_df['rating:float'] == rating) &
            (~merged_df.index.isin(flipped))
        ]
        to_flip = candidates.sample(n=min(n, len(candidates)), random_state=42)
        merged_df.loc[to_flip.index, 'label:float'] = 0
        flipped = flipped.union(to_flip.index)
        n -= len(to_flip)
        if n <= 0:
            break
    print(f"Flipped {len(flipped)} female label=1 → 0")

# Flip male label=0 → 1
def flip_male_0_to_1(n):
    flipped = pd.Index([])
    for rating in [2, 1]:
        candidates = merged_df[
            (merged_df['gender'] == 'M') &
            (merged_df['label:float'] == 0) &
            (merged_df['rating:float'] == rating) &
            (~merged_df.index.isin(flipped))
        ]
        to_flip = candidates.sample(n=min(n, len(candidates)), random_state=42)
        merged_df.loc[to_flip.index, 'label:float'] = 1
        flipped = flipped.union(to_flip.index)
        n -= len(to_flip)
        if n <= 0:
            break
    print(f"Flipped {len(flipped)} male label=0 → 1")
    
if current_female_1 > target_female_1:
    n = current_female_1 - target_female_1
    flip_female_1_to_0(n)

if current_male_1 < target_male_1:
    n = target_male_1 - current_male_1
    flip_male_0_to_1(n)    

result = merged_df.groupby('gender')['label:float'].value_counts(normalize=True).unstack()
print("\nFinal label==1 ratio by gender:")
print(result)

Flipped 2032 female label=1 → 0
Flipped 2031 male label=0 → 1

Final label==1 ratio by gender:
label:float       0.0       1.0
gender                         
F            0.163745  0.836255
M            0.163745  0.836255


In [33]:
# 8. Save re-sampled interaction set
import os

save_path = 'datasets/ml-1m/ml-1m.train.inter'
os.makedirs(os.path.dirname(save_path), exist_ok=True)

# 保存
merged_df[['user_id:token', 'item_id:token', 'timestamp:float', 'label:float']].to_csv(
    save_path,
    sep='\t',
    index=False
)
print(f"✅ Fair resampled dataset saved to {save_path}")

✅ Fair resampled dataset saved to datasets/ml-1m/ml-1m.train.inter


In [34]:
import shutil

src_path = '../datasets/split_datasets/ml-1m/ml-1m.valid.inter'
dst_dir = 'datasets/ml-1m'
dst_path = os.path.join(dst_dir, 'ml-1m.valid.inter')

os.makedirs(dst_dir, exist_ok=True)

# 复制文件
shutil.copyfile(src_path, dst_path)

print(f"✅ File copied to {dst_path}")

src_path = '../datasets/split_datasets/ml-1m/ml-1m.test.inter'
dst_path = os.path.join(dst_dir, 'ml-1m.test.inter')

# 复制文件
shutil.copyfile(src_path, dst_path)

print(f"✅ File copied to {dst_path}")

✅ File copied to datasets/ml-1m/ml-1m.valid.inter
✅ File copied to datasets/ml-1m/ml-1m.test.inter


In [35]:
from recbole.quick_start import run_recbole

run_recbole(config_file_list=['ml-1m-relabel.yaml'])

08 Jun 20:23    INFO  ['/Users/huangjiaqing/Desktop/Recommender Systems/RScode/.venv/lib/python3.10/site-packages/ipykernel_launcher.py', '-f', '/Users/huangjiaqing/Library/Jupyter/runtime/kernel-102a3971-cd4d-40b3-8b2c-adf2c0a83ff3.json']
08 Jun 20:23    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 42
state = INFO
reproducibility = True
data_path = datasets/ml-1m
checkpoint_dir = ../checkpoint_saved/ml-1m/
show_progress = True
save_dataset = True
dataset_save_path = None
save_dataloaders = True
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 100
train_batch_size = 1024
learner = adam
learning_rate = 0.0005
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user',

Train     0:   0%|                                                          | 0/787 [00:00<?, ?it/s]:   1%|▍                                                 | 6/787 [00:00<00:13, 57.46it/s]:   3%|█▌                                              | 25/787 [00:00<00:05, 132.98it/s]:   6%|██▋                                             | 45/787 [00:00<00:04, 160.39it/s]:   8%|████                                            | 66/787 [00:00<00:04, 177.70it/s]:  11%|█████▎                                          | 87/787 [00:00<00:03, 187.09it/s]:  14%|██████▍                                        | 108/787 [00:00<00:03, 192.36it/s]:  16%|███████▋                                       | 128/787 [00:00<00:03, 190.92it/s]:  19%|████████▉                                      | 149/787 [00:00<00:03, 195.14it/s]:  22%|██████████▏                                    | 171/787 [00:00<00:03, 199.91it/s]:  24%|███████████▍                                   | 192/787 [00:01<00:02, 200.67it/s]:  27%|███

{'best_valid_score': 0.0773,
 'valid_score_bigger': True,
 'best_valid_result': OrderedDict([('precision@10', 0.0579),
              ('recall@10', 0.073),
              ('hit@10', 0.4474),
              ('ndcg@10', 0.0773),
              ('itemcoverage@10', 0.3976),
              ('averagepopularity@10', 1276.8976),
              ('shannonentropy@10', 0.004),
              ('giniindex@10', 0.9162),
              ('tailpercentage@10', 0.0002)]),
 'test_result': OrderedDict([('precision@10', 0.0569),
              ('recall@10', 0.0713),
              ('hit@10', 0.4411),
              ('ndcg@10', 0.0736),
              ('itemcoverage@10', 0.3976),
              ('averagepopularity@10', 1276.8976),
              ('shannonentropy@10', 0.004),
              ('giniindex@10', 0.9162),
              ('tailpercentage@10', 0.0002)])}

In [37]:
from recbole.quick_start import load_data_and_model
from recbole.utils.case_study import full_sort_topk
from tqdm import tqdm
import pandas as pd

# 1. 加载模型和数据
config, model, dataset, train_data, valid_data, test_data = load_data_and_model(
    model_file='../checkpoint_saved/ml-1m/BPR-Jun-08-2025_20-23-10.pth'
)

# Step 1: Get all internal user IDs
all_uids = list(range(dataset.user_num))

# Step 2: Filter out users who have no interactions in the test set
valid_uids = [uid for uid in tqdm(all_uids) if test_data.uid2history_item[uid] is not None]

# Step 3: Convert to Series
import numpy as np
uid_series = np.array(valid_uids)

# Step 4: Run full_sort_topk
topk_scores, topk_index = full_sort_topk(uid_series, model, test_data, k=10, device=config['device'])

# Step 5: Convert internal item IDs to external tokens
external_item_lists = [dataset.id2token(dataset.iid_field, row.cpu().tolist()) for row in topk_index]
external_user_list = [dataset.id2token(dataset.uid_field, [uid])[0] for uid in uid_series]

# Step 6: Save as DataFrame
df = pd.DataFrame({
    'user_id': external_user_list,
    'topk_items': [','.join(items) for items in external_item_lists]
})
display(df.head())
df.to_csv('outputs/ml_all_user_top10.csv', index=False)
print("save ml_all_user_top10 successfully")

08 Jun 20:33    INFO  
General Hyper Parameters:
gpu_id = 0
use_gpu = True
seed = 42
state = INFO
reproducibility = True
data_path = datasets/ml-1m
checkpoint_dir = ../checkpoint_saved/ml-1m/
show_progress = True
save_dataset = True
dataset_save_path = None
save_dataloaders = True
dataloaders_save_path = None
log_wandb = True

Training Hyper Parameters:
epochs = 100
train_batch_size = 1024
learner = adam
learning_rate = 0.0005
train_neg_sample_args = {'distribution': 'uniform', 'sample_num': 1, 'alpha': 1.0, 'dynamic': False, 'candidate_num': 0}
eval_step = 1
stopping_step = 10
clip_grad_norm = None
weight_decay = 0.0
loss_decimal_place = 4

Evaluation Hyper Parameters:
eval_args = {'split': {'RS': [0.8, 0.1, 0.1]}, 'order': 'RO', 'group_by': 'user', 'mode': {'valid': 'full', 'test': 'full'}}
repeatable = True
metrics = ['Precision', 'Recall', 'Hit', 'NDCG', 'ItemCoverage', 'AveragePopularity', 'ShannonEntropy', 'GiniIndex', 'TailPercentage']
topk = [10]
valid_metric = NDCG@10
valid_me

Unnamed: 0,user_id,topk_items
0,1791,1621248368375029616171089121329121228
1,2496,245413342664121413273471264813879242455
2,2718,53935613458812652424364172111
3,4089,1961242035783448196817212003378529183753
4,1484,1621732296161722892997508001213778


save ml_all_user_top10 successfully


In [38]:
import pandas as pd
import numpy as np
from collections import defaultdict

# Step 1: 加载推荐结果 & 用户性别
topk_df = pd.read_csv('outputs/ml_all_user_top10.csv')
user_df = pd.read_csv('../datasets/atomic_datasets/ml-1m/ml-1m.user', sep='\t')
user2gender = dict(zip(user_df['user_id:token'], user_df['gender:token']))

# Step 2: 加载测试集 ground truth（正反馈）
test_df = pd.read_csv('datasets/ml-1m/ml-1m.test.inter', sep='\t')
test_df = test_df[test_df['label:float'] == 1.0]
user2ground_truth = test_df.groupby('user_id:token')['item_id:token'].agg(set).to_dict()

# Step 3: NDCG@10 计算函数
def ndcg_at_k(preds, true_items, k=10):
    dcg = 0.0
    for i, item in enumerate(preds[:k]):
        if item in true_items:
            dcg += 1.0 / np.log2(i + 2)
    ideal_len = min(len(true_items), k)
    idcg = sum(1.0 / np.log2(i + 2) for i in range(ideal_len))
    return dcg / idcg if idcg > 0 else 0.0

# Step 4: 分组统计
ndcg_male, ndcg_female, ndcg_all = [], [], []

for _, row in topk_df.iterrows():
    uid = row['user_id']
    pred_items = eval(row['topk_items']) if isinstance(row['topk_items'], str) else row['topk_items']
    true_items = user2ground_truth.get(uid, set())
    gender = user2gender.get(uid, None)

    if gender not in ('M', 'F') or not true_items:
        continue

    ndcg = ndcg_at_k(pred_items, true_items, k=10)
    if gender == 'M':
        ndcg_male.append(ndcg)
        ndcg_all.append(ndcg)
    else:
        ndcg_female.append(ndcg)
        ndcg_all.append(ndcg)

# Step 5: 输出
print(f'NDCG@10 (All):    {np.mean(ndcg_all):.4f} over {len(ndcg_all)} users')
print(f'NDCG@10 (Male):   {np.mean(ndcg_male):.4f} over {len(ndcg_male)} users')
print(f'NDCG@10 (Female): {np.mean(ndcg_female):.4f} over {len(ndcg_female)} users')

NDCG@10 (All):    0.0727 over 6012 users
NDCG@10 (Male):   0.0739 over 4312 users
NDCG@10 (Female): 0.0696 over 1700 users
