In [None]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir)

from utils import read_line_number, extract_args

In [None]:
from reader.BaseReader import worker_init_func
from task.TopK import init_ranking_report, calculate_ranking_metric

def get_userwise_group(model, group_feature, fairness_control):
    eval_data = model.reader.get_eval_dataset()
    eval_loader = DataLoader(eval_data, worker_init_fn = worker_init_func,
                             batch_size = 1, shuffle = False, pin_memory = False, 
                             num_workers = eval_data.n_worker)
    user_groups = {}
    with torch.no_grad():
        for i, batch_data in enumerate(eval_loader):
            # sample user with record in eval data
            if "no_item" not in batch_data:
                # predict
                feed_dict = model.wrap_batch(batch_data)
                uid = feed_dict["user_UserID"].reshape(-1).detach().cpu().numpy()[0]
                user_groups[uid] = fairness_control.group_dict[uid]
    return user_groups

def get_userwise_performance(model, at_k_list, phase = 'test'):
    model.reader.set_phase(phase)
    eval_data = model.reader.get_eval_dataset()
    eval_loader = DataLoader(eval_data, worker_init_fn = worker_init_func,
                             batch_size = 1, shuffle = False, pin_memory = False, 
                             num_workers = eval_data.n_worker)
    user_results = {}
    with torch.no_grad():
        for i, batch_data in enumerate(eval_loader):
            # sample user with record in eval data
            if "no_item" not in batch_data:
                # predict
                feed_dict = model.wrap_batch(batch_data)
                out_dict = model.forward(feed_dict, return_prob = True)
                pos_preds, neg_preds = out_dict["probs"], out_dict["neg_probs"]
                if pos_preds.is_cuda:
                    pos_preds = pos_preds.detach().cpu()
                    neg_preds = neg_preds.detach().cpu()
                # metrics
                report = init_ranking_report(at_k_list)
                calculate_ranking_metric(pos_preds.view(-1), neg_preds.view(-1), at_k_list, report)
                uid = feed_dict["user_UserID"].reshape(-1).detach().cpu().numpy()[0]
                user_results[uid] = report
    return user_results

In [None]:
from reader.RecDataReader import RecDataReader
from model.fair_rec.FairUserGroupPerformance import FairUserGroupPerformance
from model.baselines import *
from model.fed_rec import *
import os
import torch

model_name_list = {'MF': 'MF', 'FedMF': 'FedMF', 'FairMF': 'MF', 'F2MF': 'FedMF'}
device = -1
if device >= 0 and torch.cuda.is_available():
    os.environ["CUDA_VISIBLE_DEVICES"] = str(device)
    torch.cuda.set_device(device)
    device = "cuda:" + str(device)
else:
    device = "cpu"
# params = {'at_k_list': [10,50], 'eval_sample_p': 1.0}

In [None]:
import numpy as np
from reader.RecDataReader import sample_negative
class EvalDataReader(RecDataReader):
    def get_user_feed_dict(self, uid, phase, n_neg = -1):
        if len(self.user_hist[uid]) == 0:
            return {"no_item": True}
        items, responses, times = zip(*self.user_hist[uid])
        start, end = self.pos_range[phase][uid]
        neg_items = sample_negative([self.get_item_feature(iid, "ItemID") for iid in items], self.n_items, n_neg = n_neg)
        items = items[start:end]
        user_data = {"resp": np.array(responses[start:end])}
        for k,v in self.get_user_meta(uid).items():
            user_data["user_" + k] = np.array(v)
        if len(items) > 0:
            for k,v in self.get_item_list_meta(items).items():
                user_data["item_" + k] = np.array(v)
            for k,v in self.get_item_list_meta(neg_items, from_idx = True).items():
                user_data["negi_" + k] = np.array(v)
        else:
            user_data["no_item"] = True
        return user_data

## Do Fairness Evaluation

In [None]:
group_feature = 'activity'
phase = 'test'
modelName = 'FairMF'

data_key = 'ml-1m'
# data_key = 'amz_Movies_and_TV'
# data_key = 'amz_Books'

from data.preprocess import ROOT
best_setting = {
    'ml-1m': {
        'MF': [
#             '/logs/f2rec_train_and_eval_MF_lr0.0001_reg0.1_losspairwisebpr.log'
        ], 
        'FedMF': [
#             '/logs/f2rec_train_and_eval_FedMF_lr0.003_reg0.1_losspairwisebpr_local1_fedavg.log'
        ], 
        'FairMF': [
            f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda-0.7_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda-0.5_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda-0.3_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda-0.1_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda0.1_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda0.3_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda0.5_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda0.7_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairMF_lr0.00003_reg0.1_losspairwisebpr_lambda0.9_g{group_feature}.log'
        ],
        'F2MF': [
            f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.7_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.5_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.3_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.1_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.1_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.3_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.5_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.7_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.9_sigma0_g{group_feature}.log'
#             f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.7_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.5_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.3_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda-0.1_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.1_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.3_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.5_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.7_sigma0.01_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg0.1_losspairwisebpr_lambda0.9_sigma0.01_g{group_feature}.log'
        ]
    },
    'amz_Movies_and_TV': {
        'MF': [
#             '/logs/f2rec_train_and_eval_MF_lr0.00003_reg1.0_losspairwisebpr.log'
        ], 
        'FedMF': [
#             '/logs/f2rec_train_and_eval_FedMF_lr0.003_reg1.0_losspairwisebpr_local1_fedavg.log'
        ], 
        'FairMF': [
            '/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda-0.7_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda-0.5_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda-0.3_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda-0.1_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda0.1_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda0.3_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda0.5_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda0.7_gactivity.log'
            ,'/logs/f2rec_train_and_eval_FairMF_lr0.00001_reg1.0_losspairwisebpr_lambda0.9_gactivity.log'
        ],
        'F2MF': [
            f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.7_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.5_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.3_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.1_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.1_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.3_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.5_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.7_sigma0_g{group_feature}.log'
            ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.9_sigma0_g{group_feature}.log'
#             f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.7_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.5_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.3_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda-0.1_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.1_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.3_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.5_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.7_sigma0.001_g{group_feature}.log'
#             ,f'/logs/f2rec_train_and_eval_FairFedMF_lr0.003_reg1.0_losspairwisebpr_lambda0.9_sigma0.001_g{group_feature}.log'
        ]
    }
}

In [None]:
import os
import datetime
from torch.utils.data import DataLoader
import torch
import numpy as np

measures = ['HR','P','RECALL','F1','NDCG']
k_list = [1,5,10,20,50]
metrics = [f'{m}@{k}' for m in measures for k in k_list] + ['AUC']
# for data_key in data_key_list:
result_file_path = ROOT + data_key + "/results/fairness_" + group_feature + "_" + modelName + "_" + phase + ".csv"
# result_file_path = ROOT + data_key + "/results/fairness_" + group_feature + "_" + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"
with open(result_file_path, 'w') as fout:
    count = 0
#     for modelName, modelDef in model_name_list.items():
    modelDef = model_name_list[modelName]
    log_path_list = best_setting[data_key][modelName]
    for log_path in log_path_list:
        # args
        try:
            args = extract_args(ROOT + data_key + log_path)
            print(args)
        except:
            print('skip')
            continue
        # reader
        if count == 0:
            reader = EvalDataReader(args)
            reader.n_neg = -1
            reader.n_neg_val = -1
            reader.n_neg_test = -1
            # fairness calculator
            args.fair_group_feature = group_feature
            fairness_controller = FairUserGroupPerformance(args, reader)
        # model
        modelClass = eval('{0}.{0}'.format(modelDef))
        model = modelClass(args, reader, device)
        model.load_from_checkpoint(args.model_path, with_optimizer = False)
        model = model.to(device)
        model.device = device
        # header
        if count == 0:
            uG = get_userwise_group(model, group_feature, fairness_controller)
            fout.write('\t'.join(['model','fair_group','fair_lambda','metric'] + [str(uid) for uid in uG.keys()]) + '\n')
            fout.write('\t'.join(['all','-','-','group'] + [str(g) for g in uG.values()]) + '\n')
        count += 1
        # evaluation
        user_results = get_userwise_performance(model, k_list, phase)
        for m in metrics:
            fout.write('\t'.join([modelName,args.fair_group_feature,str(args.fair_lambda),m] + 
                                 [str(user_results[uid][m]) if uid in user_results else '0' for uid in uG]) + '\n')

In [None]:
result_file_path = ROOT + data_key + "/results/fairness_" + group_feature + "_" + modelName + "_" + phase + ".csv"
with open(result_file_path, 'r') as fin:
    header = fin.readline().strip().split('\t')
    result_dict = {i: [h] for i,h in enumerate(header)}
    for line in fin:
        row = line.strip().split('\t')
        for i,v in enumerate(row):
            result_dict[i].append(v)

In [None]:
import pandas as pd
df = pd.DataFrame.from_dict(result_dict,orient='index')
df[:10]

In [None]:
import numpy as np
groups = np.unique(df[1].values[4:])

In [None]:
import numpy as np
from tqdm import tqdm
group_metrics = {G: {} for G in groups}
for G in tqdm(groups):
    subset = df.loc[df[1] == G]
    
    for col in range(2,len(df.columns)):
        label = '-'.join([df[col].iloc[i] for i in range(4)])
        group_metrics[G][label] = np.mean([float(v) for v in subset[col].values])
print(group_metrics)

In [None]:
def get_diff(groupwise_performance, rho = 1):
    S = []
    for i,v0 in enumerate(groupwise_performance):
        for v1 in groupwise_performance[i+1:]:
            S.append(abs(v0-v1) ** rho)
    return np.mean(S)

for label in group_metrics[groups[0]]:
    groupwise_performance = [group_metrics[G][label] for G in groups]
    print(f"{label}\t: {get_diff(groupwise_performance)}\t {groupwise_performance}")

## Plots

### 1. Metrics over Lambda

In [None]:
import numpy as np
from tqdm import tqdm
groups = np.unique(df[1].values[4:])
sorted_lambda = sorted(list(set([float(v) for v in df.iloc[2][2:]])))
print(f"lambda:{sorted_lambda}")

In [None]:
lambda_row = df.iloc[2]
metric_row = df.iloc[3]
# {metric: {group_feature: [value]}}
metric_results = {m: {G: np.zeros(len(sorted_lambda)) for G in groups} for m in metrics}
group_size = {}
for G in groups:
    subset = df.loc[df[1] == G]
    group_size[G] = len(subset)
    for m, group_results in metric_results.items():
        for i,v in enumerate(metric_row):
            if v == m:
                lbd = float(lambda_row[i])
                group_results[G][sorted_lambda.index(lbd)] = np.mean([float(v) for v in subset[i]])
for m, group_results in metric_results.items():
    group_results['All'] = np.zeros(len(sorted_lambda))
    for G,C in group_size.items():
        group_results['All'] += group_results[G] * C
    group_results['All'] /= len(df)
metric_results

In [None]:
import matplotlib.pyplot as plt
import numpy as np
def plot_multiple_line(stats, features, x_ticks = [], ncol = 2, row_height = 4, no_title = False, no_xticks = False,
                       ylabel = 'y', xlabel = 'x', legend_title = '', legend_appear_at = 0):
    '''
    @input:
    - stats: {field_name: {key: [values]}}
    - features: [field_name]
    - ncol: number of subplots in each row
    '''
    assert ncol > 0
    N = len(features)
    fig_height = 12 // ncol if len(features) == 1 else row_height*((N-1)//ncol+1)
    plt.figure(figsize = (16, fig_height))
    for i,field in enumerate(features):
        plt.subplot((N-1)//ncol+1,ncol,i+1)
        minY,maxY = float('inf'),float('-inf')
        for key, value_list in stats[field].items():
#             print(key, value_list)
            X = np.arange(1,len(value_list)+1) if len(x_ticks) == 0 else x_ticks
            minY,maxY = min(minY,min(value_list)),max(maxY,max(value_list))
            if i == legend_appear_at:
                plt.plot(X,value_list,label = key)
            else:
                plt.plot(X,value_list)
        plt.ylabel(ylabel)
        plt.xlabel(xlabel)
        if not no_title:
            plt.title(field)
        if no_xticks:
            plt.xticks([])
        scale = 1e-4 + maxY - minY
        plt.ylim(minY - scale * 0.05, maxY + scale * 0.05)
        if i == legend_appear_at:
            plt.legend(title = legend_title, loc = 'center right')
    plt.show()

In [None]:
%matplotlib inline
plt.rcParams['font.size'] = 18
# selected_metrics = list(metric_results.keys())
# selected_metrics = ['P@10','RECALL@10', 'AUC']
# selected_metrics = [f'RECALL@{k}' for k in [1,10,50]]
# selected_metrics = [f'F1@{k}' for k in [1,10,50]]
# selected_metrics = ['F1@50']
for m in [f'{m_name}@{k}' for m_name in ['F1','RECALL'] for k in [1,10,50]]:
    selected_metrics = [m]
    plot_multiple_line(metric_results, selected_metrics, x_ticks = sorted_lambda, 
                       no_title = True, no_xticks = True,
                       ncol = 3, ylabel = '', xlabel = '', 
                       legend_title = 'activity', legend_appear_at = -1)
#     plot_multiple_line(metric_results, selected_metrics, x_ticks = sorted_lambda, 
#                        ncol = 3, ylabel = '', xlabel = '', 
#                        legend_title = 'activity', legend_appear_at = -1)

### 2. Performance Distribution

In [None]:
import numpy as np
from tqdm import tqdm
pick_column = {'model_name': }
label = '-'.join([df[pick_column].iloc[i] for i in range(4)])
group_metrics = {G: {} for G in groups}
for G in tqdm(groups):
    subset = df.loc[df[1] == G]
    group_metrics[G][label] = [float(v) for v in subset[pick_column].values if float(v) > 0]

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import seaborn as sns

for G in groups:
    ax = sns.distplot(group_metrics[G][label], rug=True, hist=False, label = G)
#     sns.histplot(data=group_metrics[G][label], x = G, kde=True)
#     plt.hist(group_metrics[G][label], 50, density = True, label = G)
plt.title(label)
plt.legend()
plt.show()