In [None]:
import os,sys,inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parentdir = os.path.dirname(currentdir)
sys.path.insert(0,parentdir) 

data_key = 'ml-1m/'
# data_key = 'amz_Movies_and_TV/'
# data_key = 'amz_Electronics/'
from data.preprocess import ROOT
target_path = ROOT + data_key

### 1. Extract Results from Logs

In [None]:
from utils import extract_results
log_path = target_path + "logs/"
control_args = ['lr', 'l2_coef', 'fair_group_feature', 'fair_lambda', 'fair_noise_sigma', 'n_local_step']
results = extract_results(log_path, control_args, "f2rec_train_and_eval")
print(results.keys())

In [None]:
import json
print("Example:")
print(json.dumps(list(results.values())[0], indent = 4))

In [None]:
import numpy as np
metrics = [f"{m_name}@{i}" for i in [1,5,10,20,50] for m_name in ['HR', 'RECALL', 'P', 'F1', 'NDCG']] + ["AUC", "MR", "MRR"]
# for m_name in ['HR', 'RECALL', 'P', 'F1', 'NDCG']:
#     metrics += [f"{m_name}@{i}" for i in [1,5,10,20,50]]
# metrics += ["AUC", "MR", "MRR"]
for m_name in metrics:
    for k, res_dict in results.items():
        if m_name in res_dict:
            res_dict[m_name] = np.mean(res_dict[m_name])

In [None]:
import pandas as pd
df = pd.DataFrame.from_dict(results, orient = 'index')
df

In [None]:
import os
import datetime
try:
    os.mkdir(target_path + "results/")
except:
    print("Dir exists: '" + target_path + "results/'")
result_file_path = target_path + "results/recommendation_performance.csv"
# result_file_path = target_path + "results/" + datetime.datetime.now().strftime('%Y%m%d_%H%M%S') + ".csv"
df.to_csv(result_file_path, sep = '\t')

## Plots

In [None]:
result_file_path = target_path + "results/recommendation_performance.csv"
df = pd.read_csv(result_file_path, sep = '\t')
df = df.astype({'fair_group_feature': str})
df

### 1. Lambda plot

In [None]:
def get_fair_model_lambda_comparison(df, model_name, params, selected_lambda):
    '''
    @output:
    - lambda_result: {metric: [value]}
    '''
    compare_feature = 'fair_lambda'
    subset = df[df['model_name'] == model_name]
    for k,v in params.items():
        subset = subset[subset[k] == v]
    lambda_rows = {lbd:idx for idx,lbd in enumerate(np.array(subset['fair_lambda']))}
    selected_rows = [lambda_rows[lbd] for lbd in selected_lambda]
    return {m: np.array(subset[m])[selected_rows] for m in metrics}

def get_single_model_performance(df, model_name, params):
    '''
    @output:
    - lambda_result: {metric: value}
    '''
    compare_feature = 'fair_lambda'
    subset = df[df['model_name'] == model_name]
    for k,v in params.items():
        subset = subset[subset[k] == v]
    return {m: np.array(subset[m])[0] for m in metrics}

In [None]:
selected_lambda = [-0.7,-0.5,-0.3,-0.1,0.1,0.3,0.5,0.7,0.9]
# selected_lambda = [-0.1,0.1,0.3,0.5,0.7,0.9]

'''
ML1M
'''
mf_result = get_single_model_performance(df, 'MF', {'lr': 0.0001, 'l2_coef': 0.1, 'fair_group_feature': 'nan'})
fedmf_result = get_single_model_performance(df, 'FedMF', {'lr': 0.003, 'l2_coef': 0.1, 'fair_group_feature': 'nan', 'n_local_step': 1})

fairmf_result = get_fair_model_lambda_comparison(df, 'MF',
        {'lr': 0.00003, 'l2_coef': 0.1, 'fair_group_feature': 'activity'}, selected_lambda)
# f3mf_result = get_fair_model_lambda_comparison(df, 'FedMF',
#         {'lr': 0.003, 'l2_coef': 0.1, 'fair_group_feature': 'activity', 'fair_noise_sigma': 0}, selected_lambda)
f2mf_result = get_fair_model_lambda_comparison(df, 'FedMF',
        {'lr': 0.003, 'l2_coef': 0.1, 'fair_group_feature': 'activity', 'fair_noise_sigma': 0.01}, selected_lambda)

'''
Movies
'''
# mf_result = get_single_model_performance(df, 'MF', {'lr': 0.0001, 'l2_coef': 0.1, 'fair_group_feature': 'nan'})
# fedmf_result = get_single_model_performance(df, 'FedMF', {'lr': 0.003, 'l2_coef': 0.1, 'fair_group_feature': 'nan', 'n_local_step': 1})
# fairmf_result = get_fair_model_lambda_comparison(df, 'MF', 
#         {'lr': 0.00001, 'l2_coef': 1.0, 'fair_group_feature': 'activity'}, selected_lambda)
# # f3mf_result = get_fair_model_lambda_comparison(df, 'FedMF', 
# #         {'lr': 0.003, 'l2_coef': 1.0, 'fair_group_feature': 'activity', 'fair_noise_sigma': 0}, selected_lambda)
# f2mf_result = get_fair_model_lambda_comparison(df, 'FedMF', 
#         {'lr': 0.003, 'l2_coef': 1.0, 'fair_group_feature': 'activity', 'fair_noise_sigma': 0.001}, selected_lambda)

fair_model_results = {'FairMF': fairmf_result, 'F2MF': f2mf_result}

In [None]:
from utils import plot_recommendation_over_lambda
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 14})
selected_metrics = ['RECALL@10','F1@10','NDCG@10','RECALL@50','F1@50','NDCG@50']
model_colors = {'MF': '#a01010', 'FairMF': '#f04040', 'FedMF': '#1010a0', 'F3MF': '#4040f0', 'F2MF': '#4040f0'}
for i in range(len(selected_metrics)):
    plot_recommendation_over_lambda(fair_model_results, selected_lambda, [selected_metrics[i]], 
                                    row_height = 4, ncol = 3, other_model_results = {'MF': mf_result, 'FedMF': fedmf_result}, 
                                    legend_appear_at = 0, colors = model_colors)