# This notebook contains an evaluation for the long-term scenario

Simply run it as the last step of the evaluation.

In [1]:
import os
import sys
from typing import List
import numpy as np
import pandas as pd
from tqdm import tqdm

from dataclasses import dataclass, field


sys.path.append(os.path.join(sys.path[0], '..'))

from evaluation.evaluation_utils import calculate_dcg, RatingsRetriever, load_mf_matrices, load_groups, get_top_dict, get_group_size_from_name


In [2]:
@dataclass
class LongtermMetrics:
    avg_ratings: List[float] = field(default_factory=list)
    min_ratings: List[float] = field(default_factory=list)
    minmax_ratings: List[float] = field(default_factory=list)
    std_ratings: List[float] = field(default_factory=list)

    avg_ndcgs_ratings: List[float] = field(default_factory=list)
    min_ndcgs_ratings: List[float] = field(default_factory=list)
    minmax_ndcgs_ratings: List[float] = field(default_factory=list)
    std_ndcgs_ratings: List[float] = field(default_factory=list)

    alg_name: str = field(default=None)
    group_name: str = field(default=None)

    def to_avg_dict(self):
        return {
            'alg_name': self.alg_name,
            'group_name': self.group_name,
            'avg_ratings': np.mean(self.avg_ratings),
            'min_ratings': np.mean(self.min_ratings),
            'minmax_ratings': np.mean(self.minmax_ratings),
            'std_ratings': np.mean(self.std_ratings),
            'avg_ndcgs_ratings': np.mean(self.avg_ndcgs_ratings),
            'min_ndcgs_ratings': np.mean(self.min_ndcgs_ratings),
            'minmax_ndcgs_ratings': np.mean(self.minmax_ndcgs_ratings),
            'std_ndcgs_ratings': np.mean(self.std_ndcgs_ratings),
        }



def calculate_metrics(groups, results, ratings_retriever, idcg_top_k):
    # now for each group (set of users), and items that have been recommended to the group
    # we calculate, for each user, sum of ratings and ndcg of ratings
    metrics = LongtermMetrics()

    for group, result in tqdm(list(zip(groups.values, results))):
        group_item_ratings = ratings_retriever.get_ratings(group, result)
        ratings_user_sum = np.sum(group_item_ratings, axis=1)

        dcgs = np.apply_along_axis(calculate_dcg, 1, group_item_ratings)
        idcgs = np.array([ratings_retriever.get_user_IDCG(user_id, idcg_top_k) for user_id in group])
        ndcgs = dcgs / idcgs

        metrics.avg_ratings.append(float(np.mean(ratings_user_sum)))
        metrics.min_ratings.append(float(np.min(ratings_user_sum)))
        max_ratings = np.max(ratings_user_sum)
        metrics.minmax_ratings.append(float(np.divide(np.min(ratings_user_sum), max_ratings, out=np.zeros_like(max_ratings), where=max_ratings!=0)))
        metrics.std_ratings.append(float(np.std(ratings_user_sum)))

        metrics.avg_ndcgs_ratings.append(float(np.mean(ndcgs)))
        metrics.min_ndcgs_ratings.append(float(np.min(ndcgs)))
        max_ndcgs = np.max(ndcgs)
        metrics.minmax_ndcgs_ratings.append(float(np.divide(np.min(ndcgs), max_ndcgs, out=np.zeros_like(max_ndcgs), where=max_ndcgs!=0)))
        metrics.std_ndcgs_ratings.append(float(np.std(ndcgs)))
    
    return metrics

    
def process_results(mf_path, groups_path, results_path, idcg_top_k):
    # first, load the data
    u_features, i_features = load_mf_matrices(mf_path)
    ratings_retriever = RatingsRetriever(u_features, i_features)
    groups = load_groups(groups_path)

    # now, for each group type we have results for each algorithm
    results = []
    for group_name in sorted(os.listdir(results_path)):
        # skip if not directory
        if not os.path.isdir(os.path.join(results_path, group_name)):
            continue

        print(f'--- processing group: {group_name}')
        group_results = {}
        for result_file in os.listdir(os.path.join(results_path, group_name)):
            
            # skip trace directories
            if os.path.isdir(os.path.join(results_path, group_name, result_file)):
                continue

            result = np.load(os.path.join(results_path, group_name, result_file))
            algorithm_name = result_file.split('.')[0]
            # print(result_file)
            metrics = calculate_metrics(
                groups[group_name],
                result,
                ratings_retriever,
                idcg_top_k,
            )
            metrics.alg_name = algorithm_name
            metrics.group_name = group_name

            results.append(metrics)
    avg_results = pd.DataFrame([result.to_avg_dict() for result in results])
    avg_results['group_size'] = avg_results['group_name'].apply(get_group_size_from_name)
    return avg_results

In [3]:
sort_type_dict = {
    'avg_ratings': 'max',
    'min_ratings': 'max',
    'minmax_ratings': 'max',
    'std_ratings' : 'max',
    'avg_ndcgs_ratings': 'max',
    'min_ndcgs_ratings': 'max',
    'minmax_ndcgs_ratings': 'max',
    'std_ndcgs_ratings': 'max',
}

def get_latex_table_data(metrics: LongtermMetrics, all_other_metrics: List[LongtermMetrics]):
    res_order = ['avg_ratings', 'min_ratings', 'minmax_ratings', 'avg_ndcgs_ratings', 'min_ndcgs_ratings', 'minmax_ndcgs_ratings']
    top_dict = get_top_dict(all_other_metrics, sort_type_dict)
    texts = []
    for res_name in res_order:
        possition = top_dict[res_name].tolist().index(metrics.alg_name)

        value = metrics[res_name]
        str_value = f'{value:.2f}' if value < 100 else f'{value:.1f}'

        if possition == 0:
            texts.append(f'\\textbf{{{str_value}}}')
        elif possition == 1:
            texts.append(f'\\underline{{{str_value}}}')
        # elif possition == 2:
        #     texts.append(f'\\textit{{{metrics[res_name]:.2f}}}')
        else:
            texts.append(str_value)
    return ' & '.join(texts)


def create_latex_table(avg_results, eval_path, dataset_name):
    map_alg_name = {
        'avg_uniform': 'AVG-U',
        'avg': 'AVG',
        'dhondt_do': 'DHondtDO',
        'ep_fuzz_dhondt': 'EP-Fuzz-DA',
    }
    alg_order = ['avg_uniform', 'avg', 'dhondt_do', 'ep_fuzz_dhondt']

    output_lines = []
    output_lines.append('\\begin{tabular}{ c | c c c | c c c || c c c | c c c }')
    for i, group_size in enumerate([2,3,4,6,8]):
        group_order = [f'prs_{group_size}_se=1_noc=1000', f'prs_{group_size}_se=4_noc=1000']
        output_lines.append('')
        if i != 0:
            output_lines.append('\multicolumn{12}{c}{} \\\\')
        # print('& \multicolumn{12}{c}{\\textbf{group size ' + str(group_size) +'}} \\\\')
        output_lines.append('\multicolumn{1}{c}{} & \multicolumn{6}{c}{PRS(M=1)' + f', group size s={group_size}' + '} & \multicolumn{6}{c}{PRS(M=4)' + f', group size s={group_size}' + '} \\\\')
        output_lines.append('\multicolumn{1}{c}{} & \multicolumn{3}{c}{AR} & \multicolumn{3}{c}{nDCG} & \multicolumn{3}{c}{AR} & \multicolumn{3}{c}{nDCG} \\\\')
        output_lines.append('& mean & min & M/M & mean & min & M/M & mean & min & M/M & mean & min & M/M \\\\')
        output_lines.append('\hline')
        for alg in alg_order:
            alg_texts = []
            for group in group_order:
                all_metrics_for_group = avg_results[avg_results['group_name'] == group]
                specific_results: LongtermMetrics = all_metrics_for_group[all_metrics_for_group['alg_name'] == alg].iloc[0]
                # print(specific_results)
                # print in the order
                ltx_table_data = get_latex_table_data(specific_results, all_metrics_for_group)
                alg_texts.append(ltx_table_data)
            
            output_lines.append(f'{map_alg_name[alg]} & {" & ".join(alg_texts)} \\\\')
    output_lines.append('')
    output_lines.append('\end{tabular}')

    # make sure the directory exists
    os.makedirs(eval_path, exist_ok=True)
    # write the lines to file in results_evaluation
    results_file_path = os.path.join(eval_path, f'{dataset_name}_longterm_results.tex')
    with open(results_file_path, 'w') as f:
        f.writelines(line + '\n' for line in output_lines)


In [4]:
datasets = [
    'kgrec',
    'movie_lens',
    'movie_lens_small',
    'spotify',
    'netflix'
]


In [5]:

for dataset in datasets:
    data_dir = f'../datasets/{dataset}/'
    
    mf_path = os.path.join(data_dir, 'mf')
    groups_path = os.path.join(data_dir, 'groups')
    results_path = os.path.join(data_dir, 'experiment_results', 'longterm')
    eval_path = os.path.join(data_dir, 'evaluation_results')

    idcg_top_k = 10

    results = process_results(mf_path, groups_path, results_path, idcg_top_k)
    
    os.makedirs(eval_path, exist_ok=True)
    results.to_csv(os.path.join(eval_path, f'{dataset}_longterm_results.csv'), index=False)

U_features shape: (1000000, 300)
I_features shape: (2262292, 300)
--- processing group: prs_2_se=1_noc=1000


100%|██████████| 1000/1000 [24:10<00:00,  1.45s/it]
100%|██████████| 1000/1000 [00:00<00:00, 5628.41it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10573.63it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10780.06it/s]


--- processing group: prs_2_se=4_noc=1000


100%|██████████| 1000/1000 [23:27<00:00,  1.41s/it]
100%|██████████| 1000/1000 [00:00<00:00, 8617.19it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10690.13it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10850.19it/s]


--- processing group: prs_3_se=1_noc=1000


100%|██████████| 1000/1000 [34:51<00:00,  2.09s/it]
100%|██████████| 1000/1000 [00:00<00:00, 8231.66it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10206.78it/s]
100%|██████████| 1000/1000 [00:00<00:00, 5855.17it/s]


--- processing group: prs_3_se=4_noc=1000


100%|██████████| 1000/1000 [34:18<00:00,  2.06s/it]
100%|██████████| 1000/1000 [00:00<00:00, 5232.02it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7227.25it/s]
100%|██████████| 1000/1000 [00:00<00:00, 4269.53it/s]


--- processing group: prs_4_se=1_noc=1000


100%|██████████| 1000/1000 [41:05<00:00,  2.47s/it] 
100%|██████████| 1000/1000 [00:00<00:00, 7902.57it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8291.38it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10332.10it/s]


--- processing group: prs_4_se=4_noc=1000


100%|██████████| 1000/1000 [31:55<00:00,  1.92s/it]
100%|██████████| 1000/1000 [00:00<00:00, 6301.71it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8688.03it/s]
100%|██████████| 1000/1000 [00:00<00:00, 10384.56it/s]


--- processing group: prs_6_se=1_noc=1000


100%|██████████| 1000/1000 [47:48<00:00,  2.87s/it]
100%|██████████| 1000/1000 [00:00<00:00, 6906.74it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9014.95it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9222.04it/s]


--- processing group: prs_6_se=4_noc=1000


100%|██████████| 1000/1000 [47:00<00:00,  2.82s/it]
100%|██████████| 1000/1000 [00:00<00:00, 7282.41it/s]
100%|██████████| 1000/1000 [00:00<00:00, 7677.07it/s]
100%|██████████| 1000/1000 [00:00<00:00, 9353.93it/s]


--- processing group: prs_8_se=1_noc=1000


100%|██████████| 1000/1000 [1:02:39<00:00,  3.76s/it]
100%|██████████| 1000/1000 [00:00<00:00, 4591.26it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8428.10it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8622.09it/s]


--- processing group: prs_8_se=4_noc=1000


100%|██████████| 1000/1000 [1:02:17<00:00,  3.74s/it]
100%|██████████| 1000/1000 [00:00<00:00, 5218.20it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8227.27it/s]
100%|██████████| 1000/1000 [00:00<00:00, 8613.65it/s]


In [6]:
for dataset in datasets:
    data_dir = f'../datasets/{dataset}/'
    eval_path = os.path.join(data_dir, 'evaluation_results')
    results = pd.read_csv(os.path.join(eval_path, f'{dataset}_longterm_results.csv'), index_col=False)
    print('create_latex_table')
    create_latex_table(results, eval_path, dataset)

create_latex_table
