In [1]:
import pandas as pd
import os
import fairdiverse
import yaml
import numpy as np
from datetime import date
import json

In [2]:
cd fairdiverse

/media/github/school/master1/artifical-intelligence/recommender-systems/BSARec/FairDiverse/fairdiverse


In [3]:
!mkdir -p recommendation/dataset

In [4]:
def print_evaluation_results(model_name, dataset_name):
    if dataset_name !="":
        today = date.today()
        today_format = f"{today.year}-{today.month}-{today.day}"

        # read evaluation file
        evaluation_file = f"recommendation/log/{today_format}_{model_name}_{dataset_name}/test_result.json"

    else:
        evaluation_file = f"recommendation/log/{model_name}/test_result.json"

    with open(evaluation_file, "r", encoding="utf-8") as f:
        metrics = json.load(f)
    # format metrics as table for visualisation
    table = {}
    for metric_key, value in metrics.items():
        metric, k = metric_key.split("@")
        if metric not in table:
            table[metric] = {}
        table[metric][f"@{k}"] = value

    df = pd.DataFrame(table).T
    df = df[sorted(df.columns, key=lambda x: int(x[1:]))]

    print(df)

# 🧰 FairDiverse Tutorial
---

## **1. Add New Dataset 📁**
---

### Step 1: ⬇️ Download the Dataset from LastFM
Download link: [LastFM Dataset](https://files.grouplens.org/datasets/hetrec2011/hetrec2011-lastfm-2k.zip) OR execute cell below

#### What if the Dataset is Not in RecBole Format?
Follow the steps [here](https://recbole.io/docs/user_guide/usage/running_new_dataset.html) in order to convert your data files to RecBole format which uses atomic files. 

In [5]:
!wget https://files.grouplens.org/datasets/hetrec2011/hetrec2011-lastfm-2k.zip -O recommendation/dataset/LastFM.zip -nc
!mkdir -p recommendation/dataset/LastFM
!unzip -n recommendation/dataset/LastFM.zip -d recommendation/dataset/LastFM
!rm recommendation/dataset/LastFM.zip

--2025-06-16 14:12:47--  https://files.grouplens.org/datasets/hetrec2011/hetrec2011-lastfm-2k.zip
Resolving files.grouplens.org (files.grouplens.org)... 128.101.65.152
Connecting to files.grouplens.org (files.grouplens.org)|128.101.65.152|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2589075 (2.5M) [application/zip]
Saving to: ‘recommendation/dataset/LastFM.zip’


2025-06-16 14:12:48 (2.59 MB/s) - ‘recommendation/dataset/LastFM.zip’ saved [2589075/2589075]

Archive:  recommendation/dataset/LastFM.zip
  inflating: recommendation/dataset/LastFM/user_taggedartists-timestamps.dat  
  inflating: recommendation/dataset/LastFM/artists.dat  
  inflating: recommendation/dataset/LastFM/user_artists.dat  



#### 🎬 MovieLens Dataset
In this notebook we will use the MovieLens Dataset as an example.

GroupLens Research has collected and made available rating data sets from the MovieLens web site (https://movielens.org). This dataset describes 5-star rating and free-text tagging activity from MovieLens, a movie recommendation service.

**Download the MovieLens dataset from RecBole: [MovieLens Dataset (RecBole processed)](https://drive.google.com/file/d/1G7_XhdSi1BhIvRETg0nN0O5tuOvbEs65/view?usp=drive_link)**


In [6]:
dataset_name = "LastFM"

#### **Step 2:** Place the dataset files under `~/recommendation/dataset/LastFM`

We rename the downloaded files to match the RecBole format. The directory structure should look like this:

```text
fairdiverse
└── recommendation
        └── dataset
            └── LastFM
                ├── artists.dat -> LastFM.item
                ├── user_artists.dat -> LastFM.inter
                ├── user_taggedartists-timestamps.dat -> LastFM.user

In [7]:
!mv recommendation/dataset/LastFM/artists.dat recommendation/dataset/LastFM/LastFM.item
!mv recommendation/dataset/LastFM/user_artists.dat recommendation/dataset/LastFM/LastFM.inter
!mv recommendation/dataset/LastFM/user_taggedartists-timestamps.dat recommendation/dataset/LastFM/LastFM.user

In [8]:
data_path = f"recommendation/dataset/{dataset_name}"
os.makedirs(data_path, exist_ok=True)
# move the dataset files in the folder

#### Dataset Content 📁

---
**User Data**

The file user_taggedartists-timestamps.dat comprising the attributes of the user tagged artists.

Each record/line in the file has the following fields: 
 
- `userID`: the id of the users.
- `artistID`: the id of the artists.
- `tagID`: the id of the tags.
- `timestamp`: the timestamp of the user interaction.

---

In [9]:
user_path = os.path.join(data_path, f"{dataset_name}.user")
user_data = pd.read_csv(user_path,delimiter='\t')

print(f"Data Sample")
print(user_data.head())

Data Sample
   userID  artistID  tagID      timestamp
0       2        52     13  1238536800000
1       2        52     15  1238536800000
2       2        52     18  1238536800000
3       2        52     21  1238536800000
4       2        52     41  1238536800000


In [11]:
num_users = user_data["userID"].nunique()
print(f"Total Users: {num_users}")

Total Users: 1892


---
**Item Data**

The file artists.dat comprising the attributes of the artists.

Each record/line in the file has the following fields: 
 
- `artistID`: the id of the artists.
- `name`: the name of the artists.
- `url`: the url of the artists.
- `pictureURL`: the picture url of the artists.

---

In [30]:
item_path = os.path.join(data_path, f"{dataset_name}.item")
item_data = pd.read_csv(item_path, delimiter='\t', encoding='latin-1')
item_data.rename(columns={'id': 'artistID'}, inplace=True)

# Add the first tagID from the user data at the end of the item data where the artistID matches
item_data = item_data.merge(user_data[['artistID', 'tagID']], on='artistID', how='left')
# Convert tagID to int
item_data['tagID'] = item_data['tagID'].dropna().astype(int)

print(item_data.head())

   artistID          name                                    url  \
0         1  MALICE MIZER  http://www.last.fm/music/MALICE+MIZER   
1         1  MALICE MIZER  http://www.last.fm/music/MALICE+MIZER   
2         1  MALICE MIZER  http://www.last.fm/music/MALICE+MIZER   
3         1  MALICE MIZER  http://www.last.fm/music/MALICE+MIZER   
4         1  MALICE MIZER  http://www.last.fm/music/MALICE+MIZER   

                                        pictureURL   tagID  
0  http://userserve-ak.last.fm/serve/252/10808.jpg   552.0  
1  http://userserve-ak.last.fm/serve/252/10808.jpg  1219.0  
2  http://userserve-ak.last.fm/serve/252/10808.jpg   139.0  
3  http://userserve-ak.last.fm/serve/252/10808.jpg   141.0  
4  http://userserve-ak.last.fm/serve/252/10808.jpg  2850.0  


In [31]:
num_items = item_data["artistID"].nunique()
print(f"Total Items: {num_items}")

Total Items: 17632


In [32]:
# save item_data
item_data.to_csv(item_path,sep='\t', index=False)

---
**Interaction Data**

The file user_artists.dat comprising the weight of the user interaction with the artists.

Each record/line in the file has the following fields: 

userID	artistID	weight
- `userID`: the id of the users.
- `artistID`: the id of the artists.
- `weight`: the weight of the user interaction with the artist.
---

In [33]:
interaction_path = os.path.join(data_path, f"{dataset_name}.inter")
interaction_data = pd.read_csv(interaction_path,delimiter='\t')

# remove from interaction data items which were dropped
interaction_data = interaction_data[interaction_data['artistID'].isin(item_data['artistID'])]

# Add the weight from the user data to the interaction data, at the end of a row if userID and artistID match
interaction_data = interaction_data.merge(user_data, on=['userID', 'artistID'], how='left')
# Drop tagID column
interaction_data = interaction_data[['userID', 'artistID', 'weight', 'timestamp']]
# Drop NaN values in the timestamp column
interaction_data = interaction_data.dropna(subset=['timestamp'])
# Convert timestamp to integer
interaction_data['timestamp'] = interaction_data['timestamp'].astype(int)

print(interaction_data.head())

   userID  artistID  weight      timestamp
1       2        52   11690  1238536800000
2       2        52   11690  1238536800000
3       2        52   11690  1238536800000
4       2        52   11690  1238536800000
5       2        52   11690  1238536800000


In [34]:
print("Number of items interacted with: ", len(interaction_data["artistID"].unique()))
print("Number of users who performed an interaction: ", len(interaction_data["userID"].unique()))

Number of items interacted with:  6854
Number of users who performed an interaction:  1824


In [35]:
# Distirbution of ratings
interaction_data.groupby("weight").size().reset_index()

Unnamed: 0,weight,0
0,1,126
1,2,142
2,3,80
3,4,58
4,5,67
...,...,...
3899,203165,6
3900,227829,4
3901,320725,2
3902,324663,29


In [36]:
# save interaction_data
interaction_data.to_csv(interaction_path, sep='\t', index=False)

#### **Step 3:** Create a configuration file for the dataset under `~/recommendation/properties/dataset/ml-100k.yaml`

```yaml
{
    user_id: user_id:token, # column name of the user ID
    item_id: item_id:token, # column name of the item ID, in this case we recommend movies
    group_id: first_class:token, # column name of the groups to be considered for fairness, in this case we consider the genres of the movie
    label_id: rating:float, # column name for the label, indicating the interest of the user in the item
    timestamp: timestamp:float, # column name for the timestamp of when the interaction happened
    text_id: movie_title:token_seq, # column name for the text ID of the item (e.g. movie name, book title)
    label_threshold: 3, # if label exceed the value will be regarded as 1, otherwise, it will be accounted into 0 --> we consider a positive recommendation if a user rated a movie with a value higher than 3
    item_domain: movie, # description of the dataset domain (e.g. movie, music, jobs etc.)

   item_val: 5, # keep items which have at least this number of interactions
   user_val: 5, # keep users who have at least this number of interactions
   group_val: 5, # keep groups which have at least this number of interactions
   group_aggregation_threshold: 15, ##If the number of items owned by a group is less than this value, those groups will be merged into a single group called the 'infrequent group'. For example, Fantasy, War, Musician, ... will be merged into one group called 'infrequent group', as the number of items belonging to this group is under the threshold.
   sample_size: 1.0, ###Sample ratio of the whole dataset to form a new subset dataset for training.
   valid_ratio: 0.1, ### Samples to be used for validation
   test_ratio: 0.1, ### Samples to be used for test
   reprocess: True, ##do you need to re-process the dataset according to your personalized requirements
   sample_num: 300, # needs to be higher than the max number of positive samples per user
   history_length: 20, # length of historical interactions of a user - [item_1, item_2, item_3, ...] to be considered
}
```

In [37]:
config_data = {
    "user_id": "userID",
    "item_id": "artistID",
    "group_id": "tagID",
    "label_id": "weight",
    "timestamp": "timestamp",
    "text_id": "name",
    "label_threshold": 3,
    "item_domain": "music",


    "item_val": 5,
    "user_val": 5,
    "group_val": 5,
    "group_aggregation_threshold": 15,
    "sample_size": 1.0,
    "valid_ratio": 0.1,
    "test_ratio": 0.1,
    "reprocess": True,
    "sample_num": 350,
    "history_length": 20,
}

with open(f"./recommendation/properties/dataset/{dataset_name}.yaml", "w+") as file:
    yaml.dump(config_data, file, sort_keys=False)

In [38]:
# add dataset as a choice in main.py
with open("main.py", "r") as f:
    content = f.read()
content = content.replace("choices=[\"steam\", \"clueweb09\", \"compas\"]", f"choices=[\"steam\", \"clueweb09\", \"compas\", \"{dataset_name}\"]")
with open("main.py", "w") as f:
    f.write(content)

## **2. Base Recommender System**

---

To check that the set-up of the new dataset works well, let's train a base recommender system!

#### **Step 1:** Define your training configuration file: `~/recommendation/train-base-model.yaml`

You can change parameters specific to each model in the following configuration file: `recommendation/properties/models/<model_name>.yaml`

```yaml
{
   ############base model#########################
   model: SASRec, # define the model to train
   data_type: 'sequential', #[point, pair, sequential] # define the data_type needed by the model during training SASRec is a sequnetial recommender system, expecting the data_type to be 'sequential'
   #############################################################

   ##Should the preprocessing be redone based on the new parameters instead of using the cached files in ~/recommendation/process_dataset######
   reprocess: True,
   ###############################################

  ####fair-rank model settings --> set all to False as we want to only train the base model without any fairness/diversity intervention
   fair-rank: False, ##if you want to run a fair-rank module on the base models, you should set the value as True

  # LLM recommendation setting
   use_llm: False,

  #############log name, it will store the evaluation result in ~log/your_log_name/
   log_name: "SASRec_ml-100k",
  #################################################

   ###########################training parameters################################
   device: cpu,
   epoch: 20,
   batch_size: 64,
   learning_rate: 0.001,
   ###########################################################################


   ###################################evaluation parameters: overwrite from ~/properties/evaluation.yaml######################################
   mmf_eval_ratio: 0.5,
   decimals: 4,
   eval_step: 5,
   eval_type: 'ranking',
   watch_metric: 'mmf@20',
   topk: [ 5,10,20 ], # if you choose the ranking settings, you can choose your top-k list
   store_scores: True, #If set true, the all relevance scores will be stored in the ~/log/your_name/ for post-processing
   fairness_metrics: ['MinMaxRatio', "MMF", "GINI", "Entropy"],
   fairness_type: "Exposure", # ["Exposure", "Utility"], where Exposure only computes the exposure of item group while utility computes the ranking score of item groups
   ###########################################################################
}
```

In [39]:
# Experiment with the baselines models provided by FairDiverse
base_model_name = "SASRec"
config_base = {
    # ############ base model #########################
    "model": f"{base_model_name}",
    "data_type": "sequential",

    # Should preprocessing be redone (ignore cache)?
    "reprocess": True,

    # Fair-rank settings !!! Don't change - needs to be set to False for running the base model !!!
    "fair-rank": False,  # run fair-rank module or not

    # LLM recommendation setting !!! Don't change - needs to be set to False for running the base model !!!
    "use_llm": False,

    # Log name (results will be stored in ~/log/{log_name}/)
    "log_name": f"{base_model_name}_{dataset_name}",

    # ############# training parameters #################
    "device": "cpu",
    "epoch": 20,
    "batch_size": 64,
    "learning_rate": 0.001,

    # ############# evaluation parameters #################
    "mmf_eval_ratio": 0.5,
    "decimals": 4,
    "eval_step": 5,
    "eval_type": "ranking",
    "watch_metric": "mmf@20",
    "topk": [5, 10, 20],
    "store_scores": True,
    "fairness_metrics": ["MinMaxRatio", "MMF", "GINI", "Entropy"],
    "fairness_type": "Exposure"  # ["Exposure", "Utility"]
}

with open(f"./recommendation/train-base-model.yaml", "w") as file:
    yaml.dump(config_base, file, sort_keys=False)

#### **Step 2: Run the Base Recommender System**

In [40]:
! python "main.py" --task recommendation --stage "in-processing" --dataset "{dataset_name}" --train_config_file "train-base-model.yaml"

your training config...
{'model': 'SASRec', 'data_type': 'sequential', 'reprocess': True, 'fair-rank': False, 'use_llm': False, 'log_name': 'SASRec_LastFM', 'device': 'cpu', 'epoch': 20, 'batch_size': 64, 'learning_rate': 0.001, 'mmf_eval_ratio': 0.5, 'decimals': 4, 'eval_step': 5, 'eval_type': 'ranking', 'watch_metric': 'mmf@20', 'topk': [5, 10, 20], 'store_scores': True, 'fairness_metrics': ['MinMaxRatio', 'MMF', 'GINI', 'Entropy'], 'fairness_type': 'Exposure', 'dataset': 'LastFM', 'stage': 'in-processing', 'task': 'recommendation'}
your args: Namespace(task='recommendation', stage='in-processing', dataset='LastFM', train_config_file='train-base-model.yaml')
process config:
{'item_val': 5, 'user_val': 5, 'group_val': 5, 'group_aggregation_threshold': 15, 'sample_size': 1.0, 'valid_ratio': 0.1, 'test_ratio': 0.1, 'reprocess': True, 'sample_num': 350, 'history_length': 20, 'user_id': 'userID', 'item_id': 'artistID', 'group_id': 'tagID', 'label_id': 'weight', 'timestamp': 'timestamp', '

#### **Output files**
---

**Processed Dataset Structure**

The following files are generated during preprocessing and saved under `processed_dataset/ml-100k/`:

```text
fairdiverse
└── recommendation
    └──processed_dataset/
        └── ml-100k/
            ├── iid2pid.json              # Mapping from item ID to provider/group ID
            ├── iid2text.json             # Mapping from item ID to textual representation (e.g., title)
            ├── movie_lens.test.CTR       # Test set for click-through rate (CTR) evaluation
            ├── movie_lens.test.ranking   # Test set for ranking evaluation
            ├── movie_lens.train          # Training set
            ├── movie_lens.valid.CTR      # Validation set for CTR evaluation
            ├── movie_lens.valid.ranking  # Validation set for ranking evaluation
            └── process_config.yaml       # Configuration used during preprocessing
```
**Log Output Directory Structure**

After training, the following files are saved under the `log/` directory:
```text
fairdiverse
└── recommendation
    └──log/
        └── 2025-5-20_SASRec_ml-100k/
            ├── best_model.pth         # Saved PyTorch model weights
            ├── config.yaml            # Configuration used for training
            ├── ranking_scores.npz     # Numpy array of ranking scores
            └── test_result.json       # Evaluation metrics 

**Evaluation Results 📈**

---

In [None]:
print_evaluation_results(base_model_name, dataset_name)

## **3. Run Post-processing Model**

---

#### **3.1 With Input from FairDiverse**

---

Run the post-processing model on-top of the base recommender system that we have trained in Section 2. 

#### **Step 1:** Create a configuration file for running a post-processing intervention under 
You can change parameters specific to each model in the following configuration file: `recommendation/properties/models/<model_name>.yaml` 
```yaml
{
   ###############the ranking score stored path for the post-processing##################
   ranking_store_path: "ml-100k", 
   #######################################################################################

   ### !!! Don't change - needs to be set to False as we don't run a post-processing intervention !!!
   model: "CPFair",
   log_name: "CPFair_ml-100k",

   #########################Evaluation parameters#########################################
   topk: [5, 10, 20],
   fairness_metrics: ['MinMaxRatio', "MMF", "GINI", "Entropy"],
   fairness_type: "Exposure", # ["Exposure", "Utility"], where Exposure only computes the exposure of item group while utility computes the ranking score of item groups
   #####################################################################################
}
```

In [None]:
postprocessing_model_name = "CPFair"
today = date.today()
today_format = f"{today.year}-{today.month}-{today.day}"

config_model = {
    "ranking_store_path": f"{today_format}_{base_model_name}_{dataset_name}",  # Path to the ranking score file (required for post-processing)

    # Change to any of the supported post-processing methods in Fairdiverse
    "model": f"{postprocessing_model_name}",
     "fair-rank": True,

    "log_name": f"{postprocessing_model_name}_{dataset_name}", # path to save the evaluation and the output

    # Evaluation parameters
    "topk": [5, 10, 20],
    "fairness_metrics": ["MinMaxRatio", "MMF", "GINI", "Entropy"],
    "fairness_type": "Exposure"  # "Exposure" computes exposure of item group; "Utility" computes score differences
}

with open(f"./recommendation/postprocessing_with_fairdiverse.yaml", "w") as file:
    yaml.dump(config_model, file, sort_keys=False)

**Step 2: Run the post-processing model**

In [None]:
! python "main.py" --task recommendation --stage "post-processing" --dataset "{dataset_name}" --train_config_file "postprocessing_with_fairdiverse.yaml"

**Evaluation Results📈**

---

### NDCG as a Measure of Utility Loss

Here, **Normalized Discounted Cumulative Gain (NDCG)** is used to quantify the **loss in utility** resulting from the post-processing intervention.

Specifically, it compares the ranking produced by **CP-Fair** with the original ranking of the **base model** (e.g., *SASRec*).

The formula is:

$$
\text{Mean\_NDCG@k} = \frac{1}{|U|} \sum_{u \in U} \frac{DCG_u}{IDCG_u}
$$

Where:
-  *U* is the set of users,
- **DCG** is computed based on the ranking produced by the post-processing intervention (e.g. CP-Fair),
- **Ideal DCG** is computed based on the original ranking produced by the base model (e.g. SASRec).

An NDCG closer to 1 indicates minimal loss in utility due to the intervention.

### Mean Utility Loss

The **mean utility loss at rank k** across all users is defined as:

$$
U_{loss@k} = \frac{1}{|U|} \sum_{u \in U} \left[ \frac{1}{k} \left( \sum_{i=1}^{k} \text{score}_{base} {(u,i)} - \sum_{i=1}^{k} \text{score}_{post} {(u,i)} \right) \right]
$$

Where:
- *U* is the set of users,
- $ \text{score}_{base} {(u,i)} $  is the score assigned to the *i-th* item in the **base model's** top-*k* ranking for user *u*,
- $ \text{score}_{post} {(u,i)} $ is the score of the *i-th* item in the **post-processing model's** top-*k* ranking for user *u*.

This metric captures the **average per-item utility loss over all users**, reflecting how much the re-ranking procedure deviates from the base model in terms of utility.


In [None]:
# evaluation results of post-processing model
print_evaluation_results(postprocessing_model_name, dataset_name)

In [None]:
# evaluation results of the base model
print_evaluation_results(base_model_name, dataset_name)

#### ✅ CP-Fair improves fairness and diversity metrics over the base model SASRec, with only a small drop in NDCG and utility loss.

#### **3.2 Withoout Input from FairDiverse**

---


To simulate a scenario where you did not use FairDiverse to generate the required files let's rename the already generated folder.

In [None]:
os.rename("recommendation/processed_dataset/ml-100k", "recommendation/processed_dataset/ml-100k_fairdiverse")

**Expected Data Format**

If you want to use a model not supported by FairDiverse and run the evaluation metrics you need to have the following files:

(1) `iid2pid.json` - Mapping from item ID to provider/group ID 

(2) `ranking_scores.npz` - Numpy array of ranking scores

Otherwise use one of the Base Models or In-processing models supported by FairDiverse to generate those files.

In [None]:
# (1) Example of expected format for iid2pid.json file -- item_id:group_id
iid2pid = {"1488": "0", "42": "1", "2508": "2", "1084": "3", "1182": "0", "1468": "4", "2087": "3", "153": "0"}

In [None]:
# (2) Example of expected format for ranking_scores.npz file -- sparse matrix of users x items and the corresponding score

n_users = 50
n_items = 100
user_item_matrix = np.random.rand(n_users, n_items)
print(user_item_matrix)
print("Shape:", user_item_matrix.shape)

#### **Step 1:** Create the files needed for running the post-processing method.

To simulate a scenarion where you did not use FairDiverse to create the files, take the files generated by the previous base model (e.g. SASRec) and follow the steps below.

#### **Step 2:** Place `ranking_scores.npz` under `~/recommendation/log/ml-100k`

```text
fairdiverse
└── recommendation
    └──log/
        └── ml-100k/
            ├── ranking_scores.npz     # Numpy array of ranking scores

```
#### **Step 3:** Place `iid2pid.json` under `~/recommendation/processed_dataset/ml-100k`
```text
fairdiverse
└── recommendation
    └──processed_dataset/
        └── ml-100k/
            ├── iid2pid.json    # Mapping from item ID to provider/group ID
```

#### **Step 4:** Save data configuration file `process_config.yaml` under `~/recommendation/processed_dataset/ml-100k`
```text
fairdiverse
└── recommendation
    └──processed_dataset/
        └── ml-100k/
            ├── process_config.yaml   
```

In [None]:
os.makedirs(f"recommendation/log/{dataset_name}", exist_ok=True)
os.makedirs(f"recommendation/processed_dataset/{dataset_name}", exist_ok=True)

In [None]:
# run this if you did not use FairDiverse to create the score file
num_users = 822 # n rows of the matrix
num_items = 1345 # n columns of the matrix
num_groups = 14 # this should correspond to the unique values from iid2pid.json
config_data["item_num"] = num_items
config_data["user_num"] = num_users
config_data["group_num"] = num_groups

print(config_data)
with open(f"recommendation/processed_dataset/{dataset_name}/process_config.yaml", "w") as file:
    yaml.dump(config_data, file, sort_keys=False)

#### **Step 5:** Create a configuration file for running a post-processing intervention under 
You can change parameters specific to each model in the following configuration file: `recommendation/properties/models/<model_name>.yaml` 
```yaml
{
   ###############the ranking score stored path for the post-processing##################
   ranking_store_path: "ml-100k", 
   #######################################################################################

   ### !!! Don't change - needs to be set to False as we don't run a post-processing intervention !!!
   model: "CPFair",
   log_name: "CPFair_ml-100k",

   #########################Evaluation parameters#########################################
   topk: [5, 10, 20],
   fairness_metrics: ['MinMaxRatio', "MMF", "GINI", "Entropy"],
   fairness_type: "Exposure", # ["Exposure", "Utility"], where Exposure only computes the exposure of item group while utility computes the ranking score of item groups
   #####################################################################################
}
```

In [None]:
postprocessing_model_name = "CPFair"

config_model = {
    "ranking_store_path": f"{dataset_name}",  # Path to the ranking score file (required for post-processing)

    # Change to any of the supported post-processing methods in Fairdiverse
    "model": f"{postprocessing_model_name}",
    "fair-rank": True,

    "log_name": f"{postprocessing_model_name}_without_fairdiverse_{dataset_name}", # path to save the evaluation and the output

    # Evaluation parameters
    "topk": [5, 10, 20],
    "fairness_metrics": ["MinMaxRatio", "MMF", "GINI", "Entropy"],
    "fairness_type": "Exposure"  # "Exposure" computes exposure of item group; "Utility" computes score differences
}

with open(f"./recommendation/postprocessing_without_fairdiverse.yaml", "w") as file:
    yaml.dump(config_model, file, sort_keys=False)

#### **Step 6:** Run the post-processing model

In [None]:
! python "main.py" --task recommendation --stage "post-processing" --dataset "{dataset_name}" --train_config_file "postprocessing_without_fairdiverse.yaml"

##### **Evaluation Results 📈** 

---

### NDCG as a Measure of Utility Loss

Here, **Normalized Discounted Cumulative Gain (NDCG)** is used to quantify the **loss in utility** resulting from the post-processing intervention.

Specifically, it compares the ranking produced by **CP-Fair** with the original ranking of the **base model** (e.g., *SASRec*).

The formula is:

$$
\text{Mean\_NDCG@k} = \frac{1}{|U|} \sum_{u \in U} \frac{DCG_u}{IDCG_u}
$$

Where:
-  *U* is the set of users,
- **DCG** is computed based on the ranking produced by the post-processing intervention (e.g. CP-Fair),
- **Ideal DCG** is computed based on the original ranking produced by the base model (e.g. SASRec).

An NDCG closer to 1 indicates minimal loss in utility due to the intervention.

### Mean Utility Loss

The **mean utility loss at rank k** across all users is defined as:

$$
U_{loss@k} = \frac{1}{|U|} \sum_{u \in U} \left[ \frac{1}{k} \left( \sum_{i=1}^{k} \text{score}_{base} {(u,i)} - \sum_{i=1}^{k} \text{score}_{post} {(u,i)} \right) \right]
$$

Where:
- *U* is the set of users,
- $ \text{score}_{base} {(u,i)} $  is the score assigned to the *i-th* item in the **base model's** top-*k* ranking for user *u*,
- $ \text{score}_{post} {(u,i)} $ is the score of the *i-th* item in the **post-processing model's** top-*k* ranking for user *u*.

This metric captures the **average per-item utility loss over all users**, reflecting how much the re-ranking procedure deviates from the base model in terms of utility.


In [None]:
print_evaluation_results(postprocessing_model_name, dataset_name)

In [None]:
print_evaluation_results(base_model_name, dataset_name)

## **4. Run Evaluation 📈**

---

If you want to use a model not supported by FairDiverse and run the evaluation metrics you need to have the following files:

(1) `iid2pid.json` - Mapping from item ID to provider/group ID 

(2) `ranking_scores.npz` - Numpy array of ranking scores

To simulate a scenario where you did not use FairDiverse to generate the required files let's rename the already generated folder for the base model, and run again only the evaluation.


In [None]:
# no need to run if you did this in Section 3.1
# os.rename("recommendation/processed_dataset/ml-100k", "recommendation/processed_dataset/ml-100k_fairdiverse")

In [None]:
# (1) Example of expected format for iid2pid.json file -- item_id:group_id
iid2pid = {"1488": "0", "42": "1", "2508": "2", "1084": "3", "1182": "0", "1468": "4", "2087": "3", "153": "0"}

In [None]:
# (2) Example of expected format for ranking_scores.npz file -- sparse matrix of users x items and the corresponding score

users = 50
items = 100
user_item_matrix = np.random.rand(users, items)
print(user_item_matrix)
print("Shape:", user_item_matrix.shape)

#### **Step 1:** Create the files needed for running the evaluation

To simulate a scenarion where you did not use FairDiverse to create the files, take the files generated by the previous base model (e.g. SASRec) and follow the steps below.
#### **Step 2:** Place `ranking_scores.npz` under `~/recommendation/log/SASRec_ml-100k`

```text
fairdiverse
└── recommendation
    └──log/
        └── ml-100k/
            ├── ranking_scores.npz     # Numpy array of ranking scores

```
#### **Step 3:** Place `iid2pid.json` under `~/recommendation/processed_dataset/ml-100k`
```text
fairdiverse
└── recommendation
    └──processed_dataset/
        └── ml-100k/
            ├── iid2pid.json    # Mapping from item ID to provider/group ID
```


#### **Step 4:** Save data configuration file `process_config.yaml` under `~/recommendation/processed_dataset/ml-100k`
```text
fairdiverse
└── recommendation
    └──processed_dataset/
        └── ml-100k/
            ├── process_config.yaml   
```

In [None]:
os.makedirs(f"recommendation/log/{base_model_name}_{dataset_name}", exist_ok=True)
os.makedirs(f"recommendation/processed_dataset/{dataset_name}", exist_ok=True)

In [None]:
# run this if you did not use FairDiverse to create the score file
num_users = 822 # n rows of the matrix
num_items = 1345 # n columns of the matrix
num_groups = 14 # this should correspond to the unique values from iid2pid.json
config_data["item_num"] = num_items
config_data["user_num"] = num_users
config_data["group_num"] = num_groups

print(config_data)
with open(f"recommendation/processed_dataset/{dataset_name}/process_config.yaml", "w") as file:
    yaml.dump(config_data, file, sort_keys=False)

#### **Step 5:** Create a configuration file for the evaluation
```yaml
{
   ###############the ranking score stored path for the post-processing##################
   ranking_store_path: "SASRec_ml-100k", 
   #######################################################################################

   ### !!! Don't change - needs to be set to False as we don't run a post-processing intervention !!!
   model: False,
   use_llm: False,
   ###############eval output path##################
   log_name: "eval_ml-100k",

   #########################Evaluation parameters#########################################
   topk: [5, 10, 20],
   fairness_metrics: ['MinMaxRatio', "MMF", "GINI", "Entropy"],
   fairness_type: "Exposure", # ["Exposure", "Utility"], where Exposure only computes the exposure of item group while utility computes the ranking score of item groups
   #####################################################################################
}

In [None]:
config_eval = {
    "ranking_store_path": f"{base_model_name}_{dataset_name}",  # Path to the ranking score file (required for post-processing)

    # Do not change — no post-processing model used, and no base model used as we want to just perform evaluation
    "fair-rank": False,
    # output file for evaluation results
    "log_name": f"eval_{base_model_name}_without_fairdiverse_{dataset_name}", # path to save the evaluation

    # Evaluation parameters
    "topk": [5, 10, 20],
    "fairness_metrics": ["MinMaxRatio", "MMF", "GINI", "Entropy"],
    "fairness_type": "Exposure"  # "Exposure" computes exposure of item group; "Utility" computes score differences
}


with open(f"./recommendation/evaluation.yaml", "w") as file:
    yaml.dump(config_eval, file, sort_keys=False)

#### **Step 6:** Run the evaluation

In [None]:
! python "main.py" --task recommendation --stage "post-processing" --dataset "{dataset_name}" --train_config_file "evaluation.yaml"

##### **Evaluation Results📈**

---

In [None]:
print_evaluation_results(f"eval_{base_model_name}_without_fairdiverse", dataset_name)

## **5. Add Post-processing Model**
---

RAIF is a reranking approach based on MILP that selects N items from K candidates for each user. Its fairness objective aims to balance the overall exposure among different item groups. Now we introduce a new optimization objective—minimizing the disparity in average exposure across groups—and refer to this enhanced method as RAIFPro. These are the steps for adding RAIFPro to FairDiverse. You could also add your own post-processing model in the similar way.


**Step 1:** Create a python file inside: `fairdiverse/recommendation/rerank_model` which should have the name of the model (e.g. RAIFPro.py)

In [None]:
open(os.path.join("recommendation/rerank_model", "RAIFPro.py"), 'a').close()

**Step 2:** Implement a class which inherits `Abstract_Reranker` with the name of the model (e.g. RAIFPro). You can use the common parameters within the `Abstract_Reranker` class.

Input:

    relevance: numpy.ndarray, shape (num_users, num_items)
        A 2D array where each row corresponds to a user and contains item relevance scores.
    topk: int
        The number of top-ranked items to select per user.


Output: 

    rerank_list: list of list of int, shape (num_users, size)
        A list where each entry contains exactly `size` selected items for a user.

In [None]:
%%writefile recommendation/rerank_model/RAIFPro.py

import numpy as np
from .Abstract_Reranker import Abstract_Reranker
from gurobipy import Model, GRB, quicksum

r"""
RAIFPro changes the RAIF's optimization objective to minimizing the disparity in average exposure across groups.

"""

def get_results(num_users, size, topk, solution, topk_items):
    """
    Converts the solution matrix into selected item lists for multiple users.

    Parameters:
    ----------
    num_users: int
        The number of users.
    size: int
        The expected number of items per user in the final rerank list.
    topk: int
        The number of candidate items per user.
    solution: numpy.ndarray, shape (num_users, topk)
        A matrix indicating the final selected items.
    topk_items: list of list of int, shape (num_users, topk)
        A list where each entry contains candidate item IDs corresponding to a user.

    Returns:
    -------
    rerank: list of list of int, shape (num_users, size)
        A list where each entry contains exactly `size` selected items for a user.
    """

    rerank = []
    for i in range(num_users):

        rerank_user = []
        for j in range(topk):
            if solution[i, j] > 0.5:
                rerank_user.append(topk_items[i][j])

        assert len(rerank_user) == size
        rerank.append([int(x) for x in rerank_user])

    return rerank

def load_ranking_matrices(relevance, topk):
    """
    Generates ranking matrices by selecting the top-k relevant items for each user.

    Parameters:
    ----------
    relevance: numpy.ndarray, shape (num_users, num_items)
        A 2D array where each row corresponds to a user and contains item relevance scores.
    topk: int
        The number of top-ranked items to select per user.

    Returns:
    -------
    topk_items: numpy.ndarray, shape (num_users, topk)
        A 2D array where each row contains the indices of the top-k items for the corresponding user.
    topk_scores: numpy.ndarray, shape (num_users, topk)
        A 2D array where each row contains the relevance scores of the selected top-k items.
    num_users: int
        The total number of users.

    """

    num_users, num_items = relevance.shape

    topk_items = np.zeros((num_users, topk), dtype=int)
    topk_scores = np.zeros((num_users, topk))

    for user_idx in range(num_users):
        # Get the indices of the items sorted by their relevance score in descending order
        sorted_indices = np.argsort(relevance[user_idx])[::-1]

        # Select the top k indices and corresponding scores
        topk_items[user_idx] = sorted_indices[:topk]
        topk_scores[user_idx] = relevance[user_idx, sorted_indices[:topk]]

    return topk_items, topk_scores, num_users

def read_item_index(total_users, topk, no_item_groups, item_group_map, topk_items):
    """
    Creates a binary indicator matrix that maps items to their respective item groups.

    Parameters:
    ----------
    total_users: int
        The total number of users.
    topk: int
        The number of candidate items per user.
    no_item_groups: int
        The total number of item groups.
    item_group_map: dict
        A dictionary mapping item indices to their corresponding group IDs.
    topk_items: list of list of int, shape (total_users, topk)
        A list where each entry contains candidate item IDs corresponding to a user.

    Returns:
    -------
    Ihelp: numpy.ndarray, shape (total_users, topk, no_item_groups)
        A binary 3D array where `Ihelp[uid][lid][k] = 1` if the `lid`-th item for user `uid`
        belongs to item group `k`, otherwise `0`.
    """

    Ihelp = np.zeros((total_users, topk, no_item_groups))
    for uid in range(total_users):
        for lid in range(topk):
            for k in range(no_item_groups):
                top_ = topk_items[uid][lid]
                if top_ in item_group_map.keys():
                    if item_group_map[topk_items[uid][lid]] == k:
                        Ihelp[uid][lid][k] = 1

    return Ihelp

def fairness_optimisation(total_users, alpha, size, topk, group_num, Ihelp, topk_scores, mean):
    """
    Solves a fairness-aware ranking optimization problem using Gurobi.

    Parameters:
    ----------
    total_users: int
        The total number of users.
    alpha: float
        The fairness regularization parameter. A higher alpha increases fairness consideration.
    size: int
        The number of items to be selected per user.
    topk: int
        The number of candidate items per user.
    group_num: int
        The number of item groups.
    Ihelp: numpy.ndarray, shape (total_users, topk, group_num)
        A binary indicator matrix.
    topk_scores: numpy.ndarray, shape (total_users, topk)
        A 2D relevance score matrix.
    mean: list
        The ideal exposure across item groups.

    Returns:
    -------
    solution: numpy.ndarray, shape (num_users, topk)
        A matrix indicating the final selected items.

    """

    print(f"Running RAIFPro, {format(alpha, 'f')}")
    # V1: No. of users
    # V2: No. of top items (topk)
    # V4: no. of item groups
    V1, V2, V4 = range(total_users), range(topk), range(group_num)

    # initiate model
    model = Model()

    W = model.addVars(V1, V2, vtype=GRB.BINARY)
    item_group = model.addVars(V4, vtype=GRB.CONTINUOUS)
    item_fair = model.addVar(vtype=GRB.CONTINUOUS)
    abs_diff = model.addVars(V4, lb=0, name="abs_diff")

    model.setObjective(quicksum(topk_scores[i][j] * W[i, j] for i in V1 for j in V2) - alpha * item_fair, GRB.MAXIMIZE)

    for i in V1:
        model.addConstr(quicksum(W[i, j] for j in V2) == size)

    for k in V4:
        model.addConstr(item_group[k] == quicksum(W[i, j] * Ihelp[i][j][k] for i in V1 for j in V2))

    for k in V4:
        model.addConstr(abs_diff[k] >= item_group[k] - mean[k])
        model.addConstr(abs_diff[k] >= -(item_group[k] - mean[k]))

    model.addConstr(item_fair == quicksum(abs_diff[k] for k in V4))


    # optimizing
    model.optimize()
    if model.status == GRB.OPTIMAL:
        solution = model.getAttr('x', W)
        #fairness = model.getAttr('x', item_group)


    return solution

def ideal(matrix, num_users, k):

    group_count = [sum(row[i] for row in matrix) for i in range(len(matrix[0]))]
    total = sum(group_count)
    exposure = num_users * k
    distribution = [x / total * exposure for x in group_count]

    return distribution

class RAIFPro(Abstract_Reranker):
    def __init__(self, config, weights = None):
        super().__init__(config, weights)


    def rerank(self, ranking_score, k):
        ## its parameters
        topk = self.config['candidate']
        alpha = self.config['alpha']

        topk_items, topk_scores, num_users = load_ranking_matrices(ranking_score, topk)

        #ideal exposure across groups
        mean = ideal(self.M, num_users, k)

        Ihelp = read_item_index(total_users=num_users, topk=topk, no_item_groups=self.group_num, item_group_map=self.iid2pid, topk_items=topk_items)
        solution = fairness_optimisation(num_users, alpha, k, topk, self.group_num, Ihelp, topk_scores, mean)
        rerank_list = get_results(num_users, k, topk, solution, topk_items)

        return rerank_list


**Step 3:** Create the configuration file under `FairDiverse-master/fairdiverse/recommendation/properties/models/` with the name of the model.yaml (e.g. RAIFPro.yaml)

In [None]:
import yaml
congif_raifpro = {
    "alpha": 0.2,      #the weight parameter of item fairness term
    "candidate": 100   #the number of item candidates
}

# Define the file path
file_path = "recommendation/properties/models/RAIFPro.yaml"


# Write the dictionary to the YAML file
with open(file_path, 'w') as file:
    yaml.dump(congif_raifpro, file, default_flow_style=True)


**Step 4:** Create a running configuration file `postprocessing_new_model.yaml`.

In [None]:
model_name = "RAIFPro"

today = date.today()
today_format = f"{today.year}-{today.month}-{today.day}"

config_new_model = {
    "ranking_store_path": f"{today_format}_{base_model_name}_{dataset_name}",  # Path to the ranking score file (required for post-processing)

    # Change to any of the supported post-processing methods in Fairdiverse
    "model": f"{model_name}",
    "fair-rank": True,
    "log_name": f"{model_name}_{dataset_name}", # path to save the evaluation

    # Evaluation parameters
    "topk": [5, 10, 20],
    "fairness_metrics": ["MinMaxRatio", "MMF", "GINI", "Entropy"],
    "fairness_type": "Exposure"  # "Exposure" computes exposure of item group; "Utility" computes score differences
}

with open(f"recommendation/postprocessing_new_model.yaml", "w") as file:
    yaml.dump(config_new_model, file, sort_keys=False)

**Step 5:** Import your custom model package in the corresponding file `fairdiverse/recommendation/rerank_model/__init__.py`. 

In [None]:
# Define the file path
file_path = "recommendation/rerank_model/__init__.py"

# Read the current contents
with open(file_path, "r") as file:
    lines = file.readlines()

# Define the new line to append
new_import = "\nfrom .RAIFPro import RAIFPro\n"

# Append only if it's not already present
if new_import not in lines:
    lines.append(new_import)

# Write back to the file
with open(file_path, "w") as file:
    file.writelines(lines)

**Step 6:** Define the model in the script `FairDiverse-master/fairdiverse/recommendation/reranker.py`

In [None]:
# Path to the target Python file
file_path = "recommendation/reranker.py"

# Read the file
with open(file_path, 'r') as f:
    lines = f.readlines()

# Step 1: Update import line
for i, line in enumerate(lines):
    if "from .rerank_model import" in line:
        if "RAIFPro" not in line:
            lines[i] = line.strip() + ", RAIFPro\n"
        break  # Only modify the first matching import line

# Step 2: Add the RAIFPro elif clause
new_elif_block = [
    "elif config['model'] == 'RAIFPro':\n",
    "    Reranker = RAIFPro(config)\n"
]

# Insert just before the existing "else:" clause inside the rerank method
for i, line in enumerate(lines):
    if "else:" in line and "raise NotImplementedError" in lines[i + 1]:
        indent = " " * (len(line) - len(line.lstrip()))
        # Make sure to adjust indentation to match
        lines[i:i] = [indent + l for l in new_elif_block]
        break

# Save the modified file
with open(file_path, 'w') as f:
    f.writelines(lines)

print("RAIFPro support added successfully.")


**Step 7:** To solve RAIFPro, we need install and set Gurobi.

1. install packages in environment


In [None]:
!pip install mip
!pip install gurobipy


2. install Gurobi license "Named-User Academic" to your laptop
https://portal.gurobi.com/iam/licenses/request






In [None]:
# grbgetkey xxxxxxxxxx

**Step 8:** Run RAIFPro for fairness-aware reranking.

In [None]:
! python "main.py" --task recommendation --stage "post-processing" --dataset "steam" --train_config_file "postprocessing_new_model.yaml"

##### **Evaluation Results📈**

---

In [None]:
# evaluation on fairness re-ranking algorithm applied on <base_model>
print_evaluation_results(model_name, dataset_name)

In [None]:
print_evaluation_results(base_model_name, dataset_name)