# Generate data for statistical evaluation

Generate one csv table per grouping containing all categories. Only exporting test set's results.
The data is expected to have the following columns:
* Dataset: {lastfm_10_pc|ml-10m|rtl}
* dt_group: {a(low), b(medium), c(high)}
* model: {m0-7}
* Recall, MRR, UserRecall, UserMRR: float
* eq1, eq2, eq3: bool

Requires:
* final_results.csv - containing all results for all metrics for all categories for all groupings for all datasets.

Returns:
* csv files containing specified columns, one file per grouping (e.g. Kendalls Tau, mean_dt etc.)

In [None]:
import os

import numpy as np
import pandas as pd

In [None]:
input_path = "/Users/nknyazev/Documents/Delft/Thesis/temporal/data/results/RQ3/final_results.csv"
output_folder = "/Users/nknyazev/Documents/Delft/Thesis/temporal/data/results/statistics/RQ3"

In [None]:
output_columns = ["Dataset", 'dt_group', 'model', 'Recall', 'MRR', 'UserRecall', 'UserMRR', 'eq1', 'eq2', 'eq3']

In [None]:
input_df = pd.read_csv(input_path, sep="\t")

In [None]:
output_df = input_df[input_df["subset"] == "test"].drop('subset', axis=1).copy()
output_df['eq1'] = output_df['model_id'].apply(lambda x: x in [1,4,5,7])
output_df['eq2'] = output_df['model_id'].apply(lambda x: x in [2,4,6,7])
output_df['eq3'] = output_df['model_id'].apply(lambda x: x in [3,5,6,7])
output_df['model_id'] = output_df['model_id'].apply(lambda x: f"m{x}")
output_df['category'] = output_df['category'].replace({'low': 'a(low)', 'middle': 'b(medium)', 'high': 'c(high)'})

output_df = output_df.rename(
    {'dataset': 'Dataset', 'category': 'dt_group', 'model_id': 'model', 'mrr': 'MRR', 'recall': 'Recall', 'u_recall': 'UserRecall', 'u_mrr': 'UserMRR'}, axis=1
)

In [None]:
for grouping, df in output_df.groupby('grouping'):
    output_path = os.path.join(output_folder, f'{grouping}_nonwide.csv')
    df[output_columns].to_csv(output_path, index=False)