# Generate data from Table 2 to be used for statistical tests by R scripts in this folder

In [None]:
import os
import json
import collections
import re

import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pyplot as plt

sns.set_style("darkgrid")

import warnings
warnings.filterwarnings("ignore")

In [None]:
# Ensure everything is rounded to 4 significant digits
num_sign_digits = 4
pd.options.display.float_format = ('{:,.' + str(num_sign_digits) + 'f}').format

In [None]:
results_path = "/Users/nknyazev/Documents/Delft/Thesis/temporal/data/results/best_runs_201909.json"

In [None]:
output_root = "/Users/nknyazev/Documents/Delft/Thesis/temporal/data/results/statistics/RQ2"
!mkdir -p $output_root

In [None]:
# https://stackoverflow.com/questions/6027558/flatten-nested-python-dictionaries-compressing-keys
def flatten(d, parent_key='', sep='_'):
    items = []
    for k, v in d.items():
        new_key = parent_key + sep + k if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)

def convert_types(df):
    cols = df.columns
    types = [int, float, str]
    for col in cols:
        for t in types:
            try:
                df[col] = df[col].astype(t)
                break
            except ValueError as e:
                pass
    return df

In [None]:
find_subset = lambda x: "low" if "low" in x else "medium" if "medium" in x else "high"
user_metric = lambda x: "u_" in x
find_metric = lambda x: ("u_recall" if user_metric(x) else "recall") if "recall" in x else ("u_mrr" if user_metric(x) else "mrr")

In [None]:
with open(results_path) as input_file:
    results = json.load(input_file)

In [None]:
for k, v in results.items():
    results[k] = {**v["run_params"], **flatten(v["results"])}

In [None]:
df = pd.DataFrame(list(results.values()))

In [None]:
df = convert_types(df).round(num_sign_digits)

In [None]:
# # Sort by dataset and model_id
# df = df.sort_values(["dataset", "model_id"])

In [None]:
# Filter out irrelevant cols
groups = ["low", "medium", "high"]
metrics = ["recall", "mrr", "u_recall", "u_mrr"]
metrics_cols_to_keep = [f"test_{x}_{y}" for y in metrics for x in groups]
cols_to_keep = ["dataset", "model_id"] + metrics_cols_to_keep

In [None]:
# Group
df2 = df.copy()[cols_to_keep]
df2["model_id"] = df2["model_id"].apply(lambda x: "m{}".format(x))
df2 = df2.set_index(["dataset", "model_id"])\
        .sort_index()
df2.index.set_names(["Dataset", "model"], inplace=True)

outer_index = [find_metric(x) for x in df2.columns]
inner_index = [find_subset(x) for x in df2.columns]

df2.columns = [outer_index, inner_index]
df2 = df2.stack()
renaming_dict = {"recall": "Recall", "mrr": "MRR", "u_recall": "UserRecall", "u_mrr": "UserMRR"}
df2 = df2.rename(renaming_dict, axis=1)[["Recall", "MRR", "UserRecall", "UserMRR"]]
df2.index.set_names(["Dataset", "model", "dt_group"], inplace=True)
df2.index = df2.index.reorder_levels(["Dataset", "dt_group", "model"])
df2 = df2.sort_values(["Dataset", "dt_group", "model"])
df2 = df2.reindex(["low", "medium", "high"], level=1)


In [None]:
equation_df = pd.DataFrame([[0,0,0],[1,0,0],[0,1,0],[0,0,1],[1,1,0],[1,0,1],[0,1,1],[1,1,1]], columns=["eq1", "eq2", "eq3"], dtype=bool)


In [None]:
repeated_equations = pd.concat([equation_df]*(len(df2.index)//len(equation_df)))
repeated_equations.index = df2.index


In [None]:
output_df = df2.join(repeated_equations)

In [None]:
non_wide_path = os.path.join(output_root, "nonwide.csv")
# output_df.to_csv(non_wide_path, index=True)

In [None]:
# R at some point orders columns alphabetically breaking the ordering
a = output_df.reset_index()
a["dt_group"] = a["dt_group"].apply(lambda x: {"low": "a(low)", "medium": "b(medium)", "high": "c(high)"}[x])
a.to_csv(non_wide_path, index=False)