# This notebook contains an evaluation for the uniform scenario

Simply run it as the last step of the evaluation.

Notes:
- we now have the matrix factorization model that is telling us our true ratings
- together with groups and predicted top items for each group and each algorithm

- we can now evaluate the performance of the algorithms

In [1]:
import os
import sys
from typing import List
import numpy as np
import pandas as pd
from tqdm import tqdm

from dataclasses import dataclass, field


sys.path.append(os.path.join(sys.path[0], '..'))

from evaluation.evaluation_utils import calculate_dcg, RatingsRetriever, load_mf_matrices, load_groups, get_top_dict, get_group_size_from_name


In [2]:
@dataclass
class UniformMetrics:
    avg_ratings: List[float] = field(default_factory=list)
    min_ratings: List[float] = field(default_factory=list)
    minmax_ratings: List[float] = field(default_factory=list)
    std_ratings: List[float] = field(default_factory=list)

    avg_ndcgs_ratings: List[float] = field(default_factory=list)
    min_ndcgs_ratings: List[float] = field(default_factory=list)
    minmax_ndcgs_ratings: List[float] = field(default_factory=list)
    std_ndcgs_ratings: List[float] = field(default_factory=list)

    alg_name: str = field(default=None)
    group_name: str = field(default=None)

    def to_avg_dict(self):
        return {
            'alg_name': self.alg_name,
            'group_name': self.group_name,
            'avg_ratings': np.mean(self.avg_ratings),
            'min_ratings': np.mean(self.min_ratings),
            'minmax_ratings': np.mean(self.minmax_ratings),
            'std_ratings': np.mean(self.std_ratings),
            'avg_ndcgs_ratings': np.mean(self.avg_ndcgs_ratings),
            'min_ndcgs_ratings': np.mean(self.min_ndcgs_ratings),
            'minmax_ndcgs_ratings': np.mean(self.minmax_ndcgs_ratings),
            'std_ndcgs_ratings': np.mean(self.std_ndcgs_ratings),
        }



def calculate_uniform_metrics(groups, results, ratings_retriever, idcg_top_k):
    # now for each group (set of users), and items that have been recommended to the group
    # we calculate, for each user, sum of ratings and ndcg of ratings
    metrics = UniformMetrics()

    for group, result in tqdm(list(zip(groups.values, results))):
        group_item_ratings = ratings_retriever.get_ratings(group, result)
        ratings_user_sum = np.sum(group_item_ratings, axis=1)

        dcgs = np.apply_along_axis(calculate_dcg, 1, group_item_ratings)
        idcgs = np.array([ratings_retriever.get_user_IDCG(user_id, idcg_top_k) for user_id in group])
        ndcgs = dcgs / idcgs

        metrics.avg_ratings.append(float(np.mean(ratings_user_sum)))
        metrics.min_ratings.append(float(np.min(ratings_user_sum)))
        max_ratings = np.max(ratings_user_sum)
        metrics.minmax_ratings.append(float(np.divide(np.min(ratings_user_sum), max_ratings, out=np.zeros_like(max_ratings), where=max_ratings!=0)))
        metrics.std_ratings.append(float(np.std(ratings_user_sum)))

        metrics.avg_ndcgs_ratings.append(float(np.mean(ndcgs)))
        metrics.min_ndcgs_ratings.append(float(np.min(ndcgs)))
        max_ndcgs = np.max(ndcgs)
        metrics.minmax_ndcgs_ratings.append(float(np.divide(np.min(ndcgs), max_ndcgs, out=np.zeros_like(max_ndcgs), where=max_ndcgs!=0)))
        metrics.std_ndcgs_ratings.append(float(np.std(ndcgs)))
    
    return metrics

In [5]:
def process_results(mf_path, groups_path, results_path, idcg_top_k):
    # first, load the data
    u_features, i_features = load_mf_matrices(mf_path)
    ratings_retriever = RatingsRetriever(u_features, i_features)
    groups = load_groups(groups_path)

    # now, for each group type we have results for each algorithm
    results = []
    for group_name in sorted(os.listdir(results_path)):
        # skip if not directory
        if not os.path.isdir(os.path.join(results_path, group_name)):
            continue

        group_size = get_group_size_from_name(group_name)

        print(f'--- processing group: {group_name}')
        group_results = {}
        for result_file in os.listdir(os.path.join(results_path, group_name)):
            result = np.load(os.path.join(results_path, group_name, result_file))
            algorithm_name = result_file.split('.')[0]
            # print(result_file)
            metrics = calculate_uniform_metrics(
                groups[group_name],
                result,
                ratings_retriever,
                idcg_top_k,
            )
            metrics.alg_name = algorithm_name
            metrics.group_name = group_name

            results.append(metrics)
    avg_results = pd.DataFrame([result.to_avg_dict() for result in results])
    avg_results['group_size'] = avg_results['group_name'].apply(get_group_size_from_name)
    return avg_results

In [6]:
sort_type_dict = {
    'avg_ratings': 'max',
    'min_ratings': 'max',
    'minmax_ratings': 'max',
    'std_ratings' : 'max',
    'avg_ndcgs_ratings': 'max',
    'min_ndcgs_ratings': 'max',
    'minmax_ndcgs_ratings': 'max',
    'std_ndcgs_ratings': 'max',
}

def get_latex_table_data(metrics: UniformMetrics, all_other_metrics: List[UniformMetrics]):
    res_order = ['avg_ratings', 'min_ratings', 'minmax_ratings', 'avg_ndcgs_ratings', 'min_ndcgs_ratings', 'minmax_ndcgs_ratings']
    top_dict = get_top_dict(all_other_metrics, sort_type_dict)
    texts = []
    for res_name in res_order:
        possition = top_dict[res_name].tolist().index(metrics.alg_name)

        if possition == 0:
            texts.append(f'\\textbf{{{metrics[res_name]:.2f}}}')
        elif possition == 1:
            texts.append(f'\\underline{{{metrics[res_name]:.2f}}}')
        elif possition == 2:
            texts.append(f'\\textit{{{metrics[res_name]:.2f}}}')
        else:
            texts.append(f'{metrics[res_name]:.2f}')
    return ' & '.join(texts)


def create_latex_table(avg_results, eval_path, dataset_name):
    map_alg_name = {
        'avg': 'AVG',
        'fai': 'FAI',
        'lm': 'LM',
        'xpo': 'XPO',
        'npo': 'NPO',
        'gfar': 'GFAR',
        'dhondt_do': 'DHondtDO',
        'ep_fuzz_dhondt': 'EP-Fuzz-DA',
    }
    alg_order = ['avg', 'fai', 'lm', 'xpo', 'npo', 'gfar', 'dhondt_do', 'ep_fuzz_dhondt']

    output_lines = []
    output_lines.append('\\begin{tabular}{ c | c c c | c c c || c c c | c c c}')
    for i, group_size in enumerate([2,3,4,6,8]):
        group_order = [f'prs_{group_size}_se=1_noc=1000', f'prs_{group_size}_se=4_noc=1000']
        output_lines.append('')
        if i != 0:
            output_lines.append('\multicolumn{12}{c}{} \\\\')
        # print('& \multicolumn{12}{c}{\\textbf{group size ' + str(group_size) +'}} \\\\')
        output_lines.append('\multicolumn{1}{c}{} & \multicolumn{6}{c}{PRS(M=1)' + f', group size s={group_size}' + '} & \multicolumn{6}{c}{PRS(M=4)' + f', group size s={group_size}' + '} \\\\')
        output_lines.append('\multicolumn{1}{c}{} & \multicolumn{3}{c}{AR} & \multicolumn{3}{c}{nDCG} & \multicolumn{3}{c}{AR} & \multicolumn{3}{c}{nDCG} \\\\')
        output_lines.append('& mean & min & M/M & mean & min & M/M & mean & min & M/M & mean & min & M/M \\\\')
        output_lines.append('\hline')
        for alg in alg_order:
            alg_texts = []
            for group in group_order:
                all_metrics_for_group = avg_results[avg_results['group_name'] == group]
                specific_results: UniformMetrics = all_metrics_for_group[all_metrics_for_group['alg_name'] == alg].iloc[0]
                # print(specific_results)
                # print in the order
                ltx_table_data = get_latex_table_data(specific_results, all_metrics_for_group)
                alg_texts.append(ltx_table_data)
            
            output_lines.append(f'{map_alg_name[alg]} & {" & ".join(alg_texts)} \\\\')
    output_lines.append('')
    output_lines.append('\end{tabular}')

    # make sure the directory exists
    os.makedirs(eval_path, exist_ok=True)
    # write the lines to file in results_evaluation
    results_file_path = os.path.join(eval_path, f'{dataset_name}_uniform_results.tex')
    with open(results_file_path, 'w') as f:
        f.writelines(line + '\n' for line in output_lines)


In [7]:
datasets = [
    'kgrec',
    'movie_lens',
    'movie_lens_small',
    'spotify',
    'netflix'
]

for dataset in datasets:
    data_dir = f'../datasets/{dataset}/'
    
    mf_path = os.path.join(data_dir, 'mf')
    groups_path = os.path.join(data_dir, 'groups')
    results_path = os.path.join(data_dir, 'experiment_results', 'uniform')
    eval_path = os.path.join(data_dir, 'evaluation_results')

    idcg_top_k = 10

    results = process_results(mf_path, groups_path, results_path, idcg_top_k)
    
    os.makedirs(eval_path, exist_ok=True)
    results.to_csv(os.path.join(eval_path, f'{dataset}_uniform_results.csv'), index=False)

U_features shape: (1000000, 300)
I_features shape: (2262292, 300)
--- processing group: prs_2_se=1_noc=1000


100%|██████████| 1000/1000 [18:14<00:00,  1.09s/it]
100%|██████████| 1000/1000 [00:00<00:00, 6662.85it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12787.55it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12691.17it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12916.56it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12845.75it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12888.10it/s]
100%|██████████| 1000/1000 [00:00<00:00, 13011.52it/s]


--- processing group: prs_2_se=4_noc=1000


100%|██████████| 1000/1000 [18:20<00:00,  1.10s/it]
100%|██████████| 1000/1000 [00:00<00:00, 6199.13it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12453.47it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12510.30it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12678.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12379.11it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12653.07it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12638.55it/s]


--- processing group: prs_3_se=1_noc=1000


100%|██████████| 1000/1000 [26:59<00:00,  1.62s/it]
100%|██████████| 1000/1000 [00:00<00:00, 6607.07it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11709.20it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11840.16it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11645.64it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11591.02it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11576.05it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11897.82it/s]


--- processing group: prs_3_se=4_noc=1000


100%|██████████| 1000/1000 [27:56<00:00,  1.68s/it]
100%|██████████| 1000/1000 [00:00<00:00, 5114.59it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11956.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12173.18it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11972.20it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11899.71it/s]
100%|██████████| 1000/1000 [00:00<00:00, 12003.55it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11680.21it/s]


--- processing group: prs_4_se=1_noc=1000


100%|██████████| 1000/1000 [36:34<00:00,  2.19s/it]
100%|██████████| 1000/1000 [00:00<00:00, 3956.00it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11474.61it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11553.89it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11375.43it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11450.06it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11604.01it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11548.83it/s]


--- processing group: prs_4_se=4_noc=1000


100%|██████████| 1000/1000 [35:11<00:00,  2.11s/it]
100%|██████████| 1000/1000 [00:00<00:00, 5466.86it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11029.25it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11326.44it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11155.12it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11302.99it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11429.62it/s]
100%|██████████| 1000/1000 [00:00<00:00, 11316.29it/s]


--- processing group: prs_6_se=1_noc=1000


100%|██████████| 1000/1000 [53:32<00:00,  3.21s/it]
100%|██████████| 1000/1000 [00:00<00:00, 3703.91it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10252.39it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10305.54it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10183.61it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10362.06it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10496.39it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10432.87it/s]


--- processing group: prs_6_se=4_noc=1000


100%|██████████| 1000/1000 [51:52<00:00,  3.11s/it]
100%|██████████| 1000/1000 [00:00<00:00, 5135.13it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10398.90it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10400.09it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10426.98it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10317.15it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10153.83it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10049.53it/s]


--- processing group: prs_8_se=1_noc=1000


100%|██████████| 1000/1000 [1:07:55<00:00,  4.08s/it]
100%|██████████| 1000/1000 [00:00<00:00, 4267.21it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9440.66it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9422.86it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9360.93it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9423.56it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9467.01it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9217.36it/s]


--- processing group: prs_8_se=4_noc=1000


100%|██████████| 1000/1000 [1:08:09<00:00,  4.09s/it]
100%|██████████| 1000/1000 [00:00<00:00, 6061.48it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8927.55it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9091.15it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9121.48it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9097.27it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9012.74it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9299.80it/s]


In [8]:
for dataset in datasets:
    data_dir = f'../datasets/{dataset}/'
    eval_path = os.path.join(data_dir, 'evaluation_results')
    results = pd.read_csv(os.path.join(eval_path, f'{dataset}_uniform_results.csv'), index_col=False)
    print('create_latex_table')
    create_latex_table(results, eval_path, dataset)

create_latex_table
