# Evaluation

In [None]:
import numpy as np
import pandas as pd
from tools import eval_methods

### Evaluate candidate selection and ranking

In [None]:
dRenameCombs = {"stations":"stns", "stations+alts":"stns+alts", "stations+places+alts":"stns+alts+plcs"}

# Options:
devtest_settings = ["test"]
cr_approaches = ["perfect_match", "partial_match", "deezy_match"]
ncand_options = [1, 3, 5]
combinations = [["stations"], ["stations", "alts"], ["stations", "places", "alts"]]

for setting in devtest_settings:
    for comb in combinations:
                
        print("==========================================")
        print("Split:", setting)
        print("Combination:", comb)
        
        eval_results = []
        for approach in cr_approaches:
            appr_results = []
            
            for num_candidates in ncand_options:
                
                test_df = pd.read_pickle("../processed/resolution/candranking_" + approach + "_" + setting + str(num_candidates) + ".pkl")

                # Get relevant columns from dataframe:
                relv_columns = []
                for c in comb:
                    relv_columns.append("cr_" + approach + "_" + c)

                exact_station = True
                if comb == ["stations", "places", "alts"]:
                    exact_station = False
                    
                # Report performance:
                p = test_df.apply(lambda row: eval_methods.pAt(row, approach, relv_columns, exact_station), axis=1).mean()
                mapAt = test_df.apply(lambda row: eval_methods.avgP(row, approach, relv_columns, exact_station), axis=1).mean()
                isRetrieved = test_df.apply(lambda row: eval_methods.isRetrieved(row, approach, relv_columns, exact_station), axis=1).mean()
                
                # Perfect match always returns just candidates where nv=1:
                if approach == "perfect_match" and num_candidates  > 1:
                    appr_results += [np.nan, np.nan, np.nan]
                else:
                    appr_results += [p, mapAt, isRetrieved]
            
            annotation = "Strict" if exact_station else "Appr"
            approach_renamed = approach.split("_")[0]
            if approach_renamed == "perfect":
                approach_renamed = "exact"
            
            eval_results.append([annotation, approach_renamed + ":" + dRenameCombs["+".join(comb)]] + appr_results)
            
#         print(eval_results)
            
        cr_eval_df = pd.DataFrame(eval_results, columns = ["Eval", "Approach", "p", "map", "retr", "p", "map", "retr", "p", "map", "retr"])
        cr_eval_df = cr_eval_df.round(2)
        cr_eval_df = cr_eval_df.fillna("--")
        print(cr_eval_df.to_latex(index=False))
        print("==========================================")
        print()

### Evaluate entity resolution

In [None]:
# Load gazetteer
gazetteer_df = pd.read_csv("../processed/wikidata/gb_gazetteer.csv", header=0, index_col=0, low_memory=False)

In [None]:
cr_approaches = ["deezy_match", "partial_match", "perfect_match"]
ncand_options = [1, 3, 5]

# Dictionary of shorter names for the approaches:
dApproachNames = {"candrank_most_confident":"string confidence", "wikipedia_most_relevant":"wikipedia relevance", "semantically_most_similar":"semantic coherence", "our_method_all":"SVM simple", "our_method_comb":"SVM combined", "skyline": "skyline","ranklib":"RankLib"}
approachList = ["skyline", "candrank_most_confident", "wikipedia_most_relevant", "semantically_most_similar","ranklib", "our_method_all", "our_method_comb"]

for candrank_method in cr_approaches:
    for num_candidates in ncand_options:

        results_test_df = pd.read_pickle("../processed/resolution/resolved_" + candrank_method + "_test" + str(num_candidates) + ".pkl")

        eval_results = []
        for topres_approach in approachList:

            acc_at = (np.nan, np.nan, np.nan)
            exact_acc_approx = np.nan

            exact_acc_strict = eval_methods.topres_exactmetrics(results_test_df, topres_approach, True)

            if topres_approach != "skyline":
                acc_at = eval_methods.topres_distancemetrics(gazetteer_df, results_test_df, topres_approach, False)
                exact_acc_approx = eval_methods.topres_exactmetrics(results_test_df, topres_approach, False)

            eval_results.append([dApproachNames[topres_approach] + " (" + candrank_method.split("_")[0] + ", nv=" + str(num_candidates) + ")", exact_acc_strict, exact_acc_approx, acc_at[0], acc_at[1], acc_at[2]])

        tr_eval_df = pd.DataFrame(eval_results, columns = ["Approach", "PStr", "PAppr", "Acc@1km", "Acc@5km", "Acc@10km"])

        print()
        tr_eval_df = tr_eval_df.round(2)
        tr_eval_df = tr_eval_df.fillna("-")
        print(tr_eval_df.to_latex(index=False))
        print()