In [None]:
from pathlib import Path
from evaluation.plotting import plot_metric_for_each_model_grouped_by_size
from utility import load_rave_dataset
from rave_constants import *
import pandas as pd
from evaluation.evaluation_utils import read_all_results_in_folder, get_experiment_results_df, evaluate_k_fold, METRICS_COLUMNS
from evaluation.experiment_evaluation import *
from evaluation.latex_utils import get_latex_table_lines, TableHeaderCell

In [None]:
dataset_version = "v2"
SELECTED_ATTRIBUTES = ALL_SELECTED_ATTRIBUTES_V2 if dataset_version == "v2" else ALL_SELECTED_ATTRIBUTES

dataset_path = f"./dataset/rave_dataset_{dataset_version}.json"
rave_dataset = load_rave_dataset(dataset_path)
for sample in rave_dataset:
    sample.keep_selected_text_attributes(SELECTED_ATTRIBUTES)

In [None]:
results_file_path = Path("./results")
all_results: dict[str, dict] = read_all_results_in_folder(results_file_path)

In [None]:
experiment_results_df_raw = get_experiment_results_df(rave_dataset, all_results, SELECTED_ATTRIBUTES)
experiment_code_names = experiment_results_df_raw[ExperimentResults.EXPERIMENT_CODE_NAME].unique().tolist()
experiment_code_names_ordered = [
    'vanilla',
    'oa',
    'bow_LogReg',
    'bow_SVM',
    'bow_XGBoost',
    'bow_1-NN',
    'bow_3-NN',
    'bow_5-NN',
    'bow_7-NN',
    'embeddings_LogReg',
    'embeddings_SVM',
    'embeddings_XGBoost',
    'embeddings_1-NN',
    'embeddings_3-NN',
    'embeddings_5-NN',
    'embeddings_7-NN',
    'tfidf_LogReg',
    'tfidf_SVM',
    'tfidf_XGBoost',
    'tfidf_1-NN',
    'tfidf_3-NN',
    'tfidf_5-NN',
    'tfidf_7-NN',
]
experiment_results_df_raw[ExperimentResults.EXPERIMENT_CODE_NAME] = pd.Categorical(
    experiment_results_df_raw[ExperimentResults.EXPERIMENT_CODE_NAME], 
    categories=experiment_code_names_ordered, 
    ordered=True)
experiment_results_df = evaluate_k_fold(experiment_results_df_raw)
experiment_results_df

In [None]:
bow_experiment_headers = [
    TableHeaderCell('vanilla', can_bold=False),
    TableHeaderCell('oa', can_bold=False),
    TableHeaderCell('bow_LogReg', "LogReg"),
    TableHeaderCell('bow_SVM', "SVM"),
    TableHeaderCell('bow_XGBoost', "XGBoost"),
    TableHeaderCell('bow_1-NN', "1-NN"),
    TableHeaderCell('bow_3-NN', "3-NN"),
    TableHeaderCell('bow_5-NN', "5-NN"),
    TableHeaderCell('bow_7-NN', "7-NN"),
]

embeddings_experiment_headers = [
    TableHeaderCell('vanilla', can_bold=False),
    TableHeaderCell('oa', can_bold=False),
    TableHeaderCell('embeddings_LogReg','LogReg'),
    TableHeaderCell('embeddings_SVM','SVM'),
    TableHeaderCell('embeddings_XGBoost','XGBoost'),
    TableHeaderCell('embeddings_1-NN','1-NN'),
    TableHeaderCell('embeddings_3-NN','3-NN'),
    TableHeaderCell('embeddings_5-NN','5-NN'),
    TableHeaderCell('embeddings_7-NN','7-NN'),
]

tfidf_experiment_headers = [
    TableHeaderCell('vanilla', can_bold=False),
    TableHeaderCell('oa', can_bold=False),
    TableHeaderCell('tfidf_LogReg', 'LogReg'),
    TableHeaderCell('tfidf_SVM', 'SVM'),
    TableHeaderCell('tfidf_XGBoost', 'XGBoost'),
    TableHeaderCell('tfidf_1-NN', '1-NN'),
    TableHeaderCell('tfidf_3-NN', '3-NN'),
    TableHeaderCell('tfidf_5-NN', '5-NN'),
    TableHeaderCell('tfidf_7-NN', '7-NN'),
]

sorted_model_names = experiment_results_df.sort_values(by=[ExperimentResults.MODEL_NAME, ExperimentResults.MODEL_SIZE])[ExperimentResults.MODEL_NAME_W_SIZE].unique()

res_bow = get_latex_table_lines(
    experiment_results_df, 
    f"micro_{ExperimentEvaluator.F1_SCORE}",
    sorted_model_names,
    bow_experiment_headers,
    "Micro F1 Score",
    "BoW",
    "tab:bow_all_models_micro_f1",
    2,
    True)

res_tfidf = get_latex_table_lines(
    experiment_results_df, 
    f"micro_{ExperimentEvaluator.F1_SCORE}",
    sorted_model_names,
    tfidf_experiment_headers,
    "Micro F1 Score",
    "TF-IDF",
    "tab:tfidf_all_models_micro_f1",
    2,
    True)

res_embeddings = get_latex_table_lines(
    experiment_results_df, 
    f"micro_{ExperimentEvaluator.F1_SCORE}",
    sorted_model_names,
    embeddings_experiment_headers,
    "Micro F1 Score",
    "Embeddings",
    "tab:embeddings_all_models_micro_f1",
    2,
    True)

In [None]:
def get_delta_to_experiment(all_data: pd.DataFrame, base_data: pd.DataFrame, experiment_code_name: str) -> pd.DataFrame:
    code_name_df = all_data.loc[all_data[ExperimentResults.EXPERIMENT_CODE_NAME] == experiment_code_name, [ExperimentResults.MODEL_NAME_W_SIZE, *METRICS_COLUMNS]]
    result_df = pd.merge(base_data, code_name_df, on=ExperimentResults.MODEL_NAME_W_SIZE, suffixes=("", f"_{experiment_code_name}"))
    delta_suffix = "_delta"
    for metric_name in METRICS_COLUMNS:
        result_df[f"{metric_name}_{experiment_code_name}{delta_suffix}"] = result_df[metric_name] - result_df[f"{metric_name}_{experiment_code_name}"]
    return result_df

In [None]:
no_oa_df = experiment_results_df.loc[experiment_results_df[ExperimentResults.ORACLE_ATTRIBUTES] == False]
idx = no_oa_df.groupby(ExperimentResults.MODEL_NAME_W_SIZE)[f"micro_{ExperimentEvaluator.F1_SCORE}"].idxmax()
best_no_oa_df = experiment_results_df.loc[idx]
differneces_df = get_delta_to_experiment(experiment_results_df, best_no_oa_df, "oa")
differneces_df = get_delta_to_experiment(experiment_results_df, differneces_df, "vanilla")
differneces_df

In [None]:
def plot_micro_f1_score_for_each_model_all_settings(experiment_results_df: pd.DataFrame, take_max:bool):
    correct_text = "Highest" if take_max else "Lowest"
    plot_metric_for_each_model_grouped_by_size(
        experiment_results_df, 
        f"micro_{ExperimentEvaluator.F1_SCORE}", 
        "Micro F1 Score (%)", 
        f"{correct_text} micro F1 score achieved", 
        take_max,
        2)

In [None]:
plot_micro_f1_score_for_each_model_all_settings(differneces_df, True)