# Evaluation of the algorithms
- we now have the matrix factorization model that is telling us our true ratings
- together with groups and predicted top items for each group and each algorithm

- we can now evaluate the performance of the algorithms

In [None]:
import os
import sys
from typing import List
import numpy as np
import pandas as pd
from tqdm import tqdm

from dataclasses import dataclass, field

from scipy.stats.stats import pearsonr
from sklearn.metrics import mean_absolute_error

sys.path.append(os.path.join(sys.path[0], '..'))

from evaluation.evaluation_utils import calculate_dcg, RatingsRetriever, load_mf_matrices, load_groups, get_top_dict, get_group_size_from_name


In [None]:

@dataclass
class WeightedMetrics:
    avg_ratings: List[float] = field(default_factory=list)
    corr_ratings: List[float] = field(default_factory=list)
    mae_ratings: List[float] = field(default_factory=list)

    avg_ndcgs_ratings: List[float] = field(default_factory=list)
    corr_ndcgs_ratings: List[float] = field(default_factory=list)
    mae_ndcgs_ratings: List[float] = field(default_factory=list)

    alg_name: str = field(default=None)
    group_name: str = field(default=None)

    def to_avg_dict(self):
        return {
            'alg_name': self.alg_name,
            'group_name': self.group_name,
            'avg_ratings': np.mean(self.avg_ratings),
            'corr_ratings': np.mean(self.corr_ratings),
            'mae_ratings': np.mean(self.mae_ratings),
            
            'avg_ndcgs_ratings': np.mean(self.avg_ndcgs_ratings),
            'corr_ndcgs_ratings': np.mean(self.corr_ratings),
            'mae_ndcgs_ratings': np.mean(self.mae_ndcgs_ratings),
        }


def calculate_metrics(groups, results, weights, ratings_retriever, idcg_top_k):
    # now for each group (set of users), and items that have been recommended to the group
    # we calculate, for each user, sum of ratings and ndcg of ratings
    metrics = WeightedMetrics()

    # groups is an np.array of groups for each row we have n user idx which are the group members
    # results is an np.array of items for each group
    # weights is an np.array of weights for each group

    for group, result, weight in tqdm(list(zip(groups.values, results, weights))):
        group_item_ratings = ratings_retriever.get_ratings(group, result)
        ratings_user_sum = np.sum(group_item_ratings, axis=1)

        dcgs = np.apply_along_axis(calculate_dcg, 1, group_item_ratings)
        idcgs = np.array([ratings_retriever.get_user_IDCG(user_id, idcg_top_k) for user_id in group])
        ndcgs = dcgs / idcgs

        normalized_ratings = ratings_user_sum / np.sum(ratings_user_sum)
        normalized_ndcgs = ndcgs / np.sum(ndcgs)

        metrics.avg_ratings.append(float(np.mean(ratings_user_sum)))
        metrics.corr_ratings.append(pearsonr(normalized_ratings, weight)[0])
        metrics.mae_ratings.append(mean_absolute_error(normalized_ratings, weight))

        metrics.avg_ndcgs_ratings.append(float(np.mean(ndcgs)))
        metrics.corr_ndcgs_ratings.append(pearsonr(normalized_ndcgs, weight)[0])
        metrics.mae_ndcgs_ratings.append(mean_absolute_error(normalized_ndcgs, weight))
    
    return metrics


def process_results(mf_path, groups_path, groups_weights_path, results_path, idcg_top_k):
    u_features, i_features = load_mf_matrices(mf_path)
    ratings_retriever = RatingsRetriever(u_features, i_features)
    groups = load_groups(groups_path)

    # now, for each group type we have results for each algorithm
    results = []
    for group_name in sorted(os.listdir(results_path)):
        # skip if not directory
        if not os.path.isdir(os.path.join(results_path, group_name)):
            continue
        group_size = get_group_size_from_name(group_name)
        #load weights csv to np
        weights = np.loadtxt(os.path.join(groups_weights_path, f'group_weights_{group_size}.csv'), delimiter=',')

        print(f'--- processing group: {group_name}')
        group_results = {}
        for result_file in os.listdir(os.path.join(results_path, group_name)):
            # if result_file != 'avg_uniform.npy':
                # continue
            result = np.load(os.path.join(results_path, group_name, result_file))
            
            algorithm_name = result_file.split('.')[0]
            # print(result_file)
            metrics = calculate_metrics(
                groups[group_name],
                result,
                weights=weights,
                ratings_retriever=ratings_retriever,
                idcg_top_k=idcg_top_k,
            )
            metrics.alg_name = algorithm_name
            metrics.group_name = group_name

            results.append(metrics)
    avg_results = pd.DataFrame([result.to_avg_dict() for result in results])
    avg_results['group_size'] = avg_results['group_name'].apply(get_group_size_from_name)
    return avg_results


In [None]:
sort_type_dict = {
    'avg_ratings': 'max',
    'corr_ratings': 'max',
    'mae_ratings': 'min',
    'avg_ndcgs_ratings': 'max',
    'corr_ndcgs_ratings': 'max',
    'mae_ndcgs_ratings': 'min',
}

In [None]:
def get_latex_table_data(metrics: WeightedMetrics, all_other_metrics: List[WeightedMetrics]):
    res_order = ['avg_ratings', 'corr_ratings', 'mae_ratings', 'avg_ndcgs_ratings', 'corr_ndcgs_ratings', 'mae_ndcgs_ratings']
    top_dict = get_top_dict(all_other_metrics, sort_type_dict)
    texts = []
    for res_name in res_order:
        possition = top_dict[res_name].tolist().index(metrics.alg_name)
        if possition == 0:
            texts.append(f'\\textbf{{{metrics[res_name]:.2f}}}')
        elif possition == 1:
            texts.append(f'\\underline{{{metrics[res_name]:.2f}}}')
        else:
            texts.append(f'{metrics[res_name]:.2f}')
    return ' & '.join(texts)

def create_latex_table(avg_results, eval_path, dataset_name):
    map_alg_name = {
        'avg_uniform': 'AVG-U',
        'avg': 'AVG',
        'dhondt_do': 'DHondtDO',
        'ep_fuzz_dhondt': 'EP-Fuzz-DA',
    }
    alg_order = ['avg_uniform', 'avg', 'dhondt_do', 'ep_fuzz_dhondt']

    output_lines = []
    output_lines.append('\\begin{tabular}{ c | c c c | c c c || c c c | c c c}')
    for i, group_size in enumerate([4, 6, 8]):
        group_order = [f'prs_{group_size}_se=1_noc=1000', f'prs_{group_size}_se=4_noc=1000']
        output_lines.append('')
        if i != 0:
            output_lines.append('\multicolumn{12}{c}{} \\\\')
        # print('& \multicolumn{12}{c}{\\textbf{group size ' + str(group_size) +'}} \\\\')
        output_lines.append('\multicolumn{1}{c}{} & \multicolumn{6}{c}{PRS(M=1)' + f', group size s={group_size}' + '} & \multicolumn{6}{c}{PRS(M=4)' + f', group size s={group_size}' + '} \\\\')
        output_lines.append('\multicolumn{1}{c}{} & \multicolumn{3}{c}{AR} & \multicolumn{3}{c}{nDCG} & \multicolumn{3}{c}{AR} & \multicolumn{3}{c}{nDCG} \\\\')
        output_lines.append('& mean & corr & MAE & mean & corr & MAE & mean & corr & MAE & mean & corr & MAE \\\\')
        output_lines.append('\hline')
        for alg in alg_order:
            alg_texts = []
            for group in group_order:
                all_metrics_for_group = avg_results[avg_results['group_name'] == group]
                specific_results: WeightedMetrics = all_metrics_for_group[all_metrics_for_group['alg_name'] == alg].iloc[0]
                # print(specific_results)
                # print in the order
                ltx_table_data = get_latex_table_data(specific_results, all_metrics_for_group)
                alg_texts.append(ltx_table_data)
            
            output_lines.append(f'{map_alg_name[alg]} & {" & ".join(alg_texts)} \\\\')
    output_lines.append('')
    output_lines.append('\end{tabular}')

    # make sure the directory exists
    os.makedirs(eval_path, exist_ok=True)
    # write the lines to file in results_evaluation
    results_file_path = os.path.join(eval_path, f'{dataset_name}_weighted_results.tex')
    with open(results_file_path, 'w') as f:
        f.writelines(line + '\n' for line in output_lines)

In [None]:
datasets = [
    'kgrec',
    # 'movie_lens',
    # 'spotify',
    # 'netflix'
]

for dataset in datasets:
    data_dir = f'../datasets/{dataset}/'
    
    mf_path = os.path.join(data_dir, 'mf')
    groups_path = os.path.join(data_dir, 'groups')
    groups_weights_path = os.path.join(groups_path, 'weights')
    results_path = os.path.join(data_dir, 'experiment_results', 'weighted')

    eval_path = os.path.join(data_dir, 'evaluation_results')

    idcg_top_k = 10

    results = process_results(mf_path, groups_path, groups_weights_path, results_path, idcg_top_k)

    os.makedirs(eval_path, exist_ok=True)
    results.to_csv(os.path.join(eval_path, f'{dataset}_weighted_results.csv'), index=False)

In [None]:
for dataset in datasets:
    data_dir = f'../datasets/{dataset}/'
    eval_path = os.path.join(data_dir, 'evaluation_results')
    results = pd.read_csv(os.path.join(eval_path, f'{dataset}_weighted_results.csv'), index_col=False)
    print('create_latex_table')
    create_latex_table(results, eval_path, dataset)