## Notebook for summarizing results

In [None]:
import os
os.chdir('..')

import numpy as np
import pandas as pd
import json

import logging
from pathlib import Path

In [None]:
class Config:
    METHODS = ["ease", "fslr", "bnslim", "bnslim_admm", "fairmf", "fda"]
    K_VALUES = ["10"]
    ACCURACY_METRICS = ["ndcg", "recall"]
    FAIRNESS_METRICS = {
        'consumer': ["c-equity", "u-parity"],
        'provider': ["bdv", "apcr"]
    }

In [None]:
def get_k_value(k: str) -> bool:
    return k in Config.K_VALUES

def get_rec_methods(method: str) -> bool:
    return method in Config.METHODS

def format_value(value: float) -> str:
    return f"{value:.3f}"

def get_metrics_for_experiment_type(experiment_type: str):
    if experiment_type not in Config.FAIRNESS_METRICS:
        raise ValueError("Invalid experiment type. Choose 'provider' or 'consumer'.")
    return Config.ACCURACY_METRICS + Config.FAIRNESS_METRICS[experiment_type]

def report_metrics(metrics, data):
    metric_data = []
    for metric in metrics:
        for method, values in data[metric].items():
            if not get_rec_methods(method):
                continue
            for k, v in values.items():
                if not get_k_value(k):
                    continue
                if isinstance(v, float):
                    score = format_value(v)
                else:
                    score = f"{format_value(v['mean'])} ({format_value(v['std'])})"
                metric_data.append({'Metric': metric, 'Method': method, 'K': k, 'Score': score})
    return metric_data

def compute_trades(data, experiment_type, ALPHA=0.2):
    if experiment_type not in ["consumer", "provider"]:
        raise ValueError("Invalid experiment type. Choose 'provider' or 'consumer'.")

    trade_off_scores = {}

    # select the appropriate fairness metric based on the experiment type
    fairness_metric = "c-equity" if experiment_type == "consumer" else "bdv"

    for method, ndcg_values in data.get("ndcg", {}).items():
        if not get_rec_methods(method):
            continue

        for k, ndcg_v in ndcg_values.items():
            if not get_k_value(k):
                continue

            ndcg_mean = ndcg_v.get('mean') if isinstance(ndcg_v, dict) else ndcg_v
            if ndcg_mean is None:
                logging.warning(f"Missing 'mean' in ndcg for method {method} and k {k}")
                continue

            fairness_v = None
            if fairness_metric in data and k in data[fairness_metric].get(method, {}):
                fairness_val = data[fairness_metric][method][k]
                fairness_v = fairness_val.get('mean') if isinstance(fairness_val, dict) else fairness_val
            
            if fairness_v is not None:
                trade_off_value = ALPHA * (1 - ndcg_mean) + (1 - ALPHA) * fairness_v
                trade_off_scores.setdefault(method, {}).setdefault(k, []).append(trade_off_value)
            else:
                logging.warning(f"Fairness value not found for method {method} and k {k}")

    return trade_off_scores

def aggregate_trade_off_scores(trade_off_all_scores):
    aggregated_trade_off = {}
    for method, k_values in trade_off_all_scores.items():
        aggregated_trade_off[method] = {k: {'mean': np.mean(scores), 'std': np.std(scores)}
                                        for k, scores in k_values.items()}
    return aggregated_trade_off

def aggregate_metrics(metrics, data_list):
    aggregated_data = {}
    for metric in metrics:
        aggregated_data[metric] = {}
        for method in data_list[0][metric]:
            if not get_rec_methods(method):
                continue
            aggregated_data[metric][method] = {}
            for k in data_list[0][metric][method]:
                if not get_k_value(k):
                    continue
                if isinstance(data_list[0][metric][method][k], float):
                    means = [data[metric][method][k] for data in data_list]
                else:
                    means = [data[metric][method][k]['mean'] for data in data_list]
                aggregated_data[metric][method][k] = {'mean': np.mean(means), 'std': np.std(means)}
    return aggregated_data

def aggregate_fit_times(data_list):
    aggregated_times = {}
    for data in data_list:
        for method, time in data["fit_time"].items():
            if method not in aggregated_times:
                aggregated_times[method] = []
            aggregated_times[method].append(time)
    return {method: {'mean': np.mean(times), 'std': np.std(times)} for method, times in aggregated_times.items()}

def process_data(folder_path: str, experiment_type: str):
    folder_path = Path(folder_path)
    data_list = []

    for folder in folder_path.iterdir():
        if folder.is_dir():
            seed_path = folder / "results.json"
            try:
                with open(seed_path, "r") as f:
                    data = json.load(f)
                    data_list.append(data)
            except (IOError, json.JSONDecodeError) as e:
                logging.error(f"Error processing file {seed_path}: {e}")

    if not data_list:
        return {}

    # separate accuracy and fairness metrics
    accuracy_metrics = Config.ACCURACY_METRICS
    fairness_metrics = Config.FAIRNESS_METRICS[experiment_type]

    aggregated_data = aggregate_metrics(accuracy_metrics + fairness_metrics, data_list)
    aggregated_fit_times = aggregate_fit_times(data_list)

    metrics_dfs = {
        'accuracy': pd.DataFrame(report_metrics(accuracy_metrics, aggregated_data)),
        'fairness': pd.DataFrame(report_metrics(fairness_metrics, aggregated_data)),
        'fit_times': pd.DataFrame([{
            'Method': method, 
            'Average Fit Time': f"{format_value(times['mean'])} ({format_value(times['std'])})"
        } for method, times in aggregated_fit_times.items()])
    }

    return metrics_dfs

In [None]:
# Example usage
# data = process_data("ml-1m", "consumer")
data = process_data("lastfm-1k", "consumer")
# data = process_data("coco/all", "provider")
# data = process_data("coco/subset", "provider")
# data = process_data("goodreads", "provider")

for metric in ["accuracy", "fairness", "fit_times"]:
    print(f"\n{metric.replace('_', ' ').capitalize()}:\n", data.get(metric, pd.DataFrame()))