# Evaluation

In [None]:
import time
import numpy as np
import pandas as pd
from pathlib import Path
from collections import OrderedDict
from tools import eval_methods, selection_methods
from tqdm.auto import tqdm
tqdm.pandas()

### Evaluate candidate selection and ranking

In [None]:
# Options:
devtest_settings = ["test"]
cr_approaches = ["deezy_match", "partial_match", "perfect_match"]
ncand_options = [1, 3, 5]
exact_options = [True, False]

# -----------------------------------
# Evaluate the different candrank experiments
for setting in devtest_settings:
    for approach in cr_approaches:
        for num_candidates in ncand_options:
            for exact_station in exact_options:
                print("==========================================")
                print("Split:", setting)
                print("Approach:", approach)
                print("Num candidates:", num_candidates)
                print("Strict eval:", exact_station)

                test_df = pd.read_pickle("../processed/resolution/candranking_" + approach + "_" + setting + str(num_candidates) + ".pkl")

                combinations = [["stations"], ["stations", "alts"], ["stations", "places", "alts"]]
                candrank_approaches = [x.replace("cr_", "").replace("_stations", "") for x in test_df if x.startswith("cr_") and x.endswith("stations")]

                eval_results = []
                for combination in combinations:
                    for approach in candrank_approaches:

                        # Get relevant columns from dataframe:
                        relv_columns = []
                        for c in combination:
                            relv_columns.append("cr_" + approach + "_" + c)

                        # Report performance:
                        p = test_df.apply(lambda row: eval_methods.pAt(row, approach, relv_columns, exact_station), axis=1).mean()
                        mapAt = test_df.apply(lambda row: eval_methods.avgP(row, approach, relv_columns, exact_station), axis=1).mean()
                        isRetrieved = test_df.apply(lambda row: eval_methods.isRetrieved(row, approach, relv_columns, exact_station), axis=1).mean()
                        eval_results.append([approach + ": " + "+".join(combination), p, mapAt, isRetrieved])

                annotation = "strict" if exact_station else "appr"
                cr_eval_df = pd.DataFrame(eval_results, columns = ["Approach:" + annotation, "p@" + str(num_candidates), "map@" + str(num_candidates), "retr@" + str(num_candidates)])
                cr_eval_df.round(3)
                print(cr_eval_df.round(2).to_latex(index=False))
                print("==========================================")
                print()

### Evaluate entity resolution

In [None]:
# Load gazetteer
gazetteer_df = pd.read_csv("../processed/wikidata/gb_gazetteer.csv", header=0, index_col=0, low_memory=False)

In [None]:
cr_approaches = ["deezy_match", "partial_match", "perfect_match"]
ncand_options = [1, 3, 5]

for candrank_method in cr_approaches:
    for num_candidates in ncand_options:
        print(candrank_method, num_candidates)

        results_test_df = pd.read_pickle("../processed/resolution/resolved_" + candrank_method + "_test" + str(num_candidates) + ".pkl")

        eval_results = []
        dApproachNames = {"candrank_most_confident":"string confidence", "wikipedia_most_relevant":"wikipedia relevance", "semantically_most_similar":"semantic coherence", "our_method_all":"SVM simple", "our_method_comb":"SVM combined", "skyline": "skyline","ranklib":"ranklib all features"}
        for topres_approach in ["skyline", "candrank_most_confident", "wikipedia_most_relevant", "semantically_most_similar","ranklib", "our_method_all", "our_method_comb"]:
            print(topres_approach, candrank_method, "(numCands: " + str(num_candidates) + ")")

            acc_at = (np.nan, np.nan, np.nan)
            exact_acc_approx = np.nan

            exact_acc_strict = eval_methods.topres_exactmetrics(results_test_df, topres_approach, True)

            if topres_approach != "skyline":
                acc_at = eval_methods.topres_distancemetrics(gazetteer_df, results_test_df, topres_approach, False)
                exact_acc_approx = eval_methods.topres_exactmetrics(results_test_df, topres_approach, False)

            eval_results.append([dApproachNames[topres_approach], exact_acc_strict, exact_acc_approx, acc_at[0], acc_at[1], acc_at[2]])

        tr_eval_df = pd.DataFrame(eval_results, columns = ["Approach", "PrecStr", "PrecAppr", "Acc@1km", "Acc@5km", "Acc@10km"])

        print()
        tr_eval_df = tr_eval_df.round(2)
        tr_eval_df = tr_eval_df.fillna("-")
        print(tr_eval_df.to_latex(index=False))
        print()