In [None]:
import pandas as pd
import numpy as np
from sklearn.base import clone

ROOT_DIR = "./plot_data/experiment-1/tables/"
RESULTS_DIR = "./plot_data/experiment-1/results/"

meta_features = pd.read_csv(ROOT_DIR + 'meta_features_table.csv', index_col=0) # MinMax Scaled

reg_quantifiers_eval = pd.read_csv(ROOT_DIR + 'reg_quantifiers_evaluation_table.csv', index_col=0)
knn_quantifiers_eval = pd.read_csv(ROOT_DIR + 'knn_quantifiers_evaluation_table.csv', index_col=0)

reg_quantifiers_eval_agg = pd.read_csv(ROOT_DIR + 'reg_quantifiers_evaluation_table_agg.csv', index_col=[0,1])
knn_quantifiers_eval_agg = pd.read_csv(ROOT_DIR + 'knn_quantifiers_evaluation_table_agg.csv', index_col=[0,1])

In [2]:
def fit_regressor(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table):
    meta_features_table = meta_features_table
    not_agg_evaluation_table = not_aggregated_evaluation_table
    evaluation_table = evaluation_table

    model_dict = {}
    
    X_train = meta_features_table.values
    y_train = None
    for quantifier in evaluation_table.index.levels[0].tolist():
        y_train = evaluation_table.loc[quantifier]['abs_error'].values
        model_dict[quantifier] = clone(model)
        model_dict[quantifier].fit(X_train, y_train)

    return model_dict

# Evaluate Quantifier Recommender with Leave-One-Out
def loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, recommender_eval_path: str = None):
    aux_recommender_evaluation_table = pd.DataFrame(columns=["predicted_error", "true_error"], index=evaluation_table.index)
    for quantifier, recommender in model_dict.items():
        recommender_ = clone(recommender)
        for dataset in evaluation_table.index.levels[1]:
            X_test = meta_features_table.loc[dataset].values
            X_test = np.array(X_test).reshape(1, -1)
            y_test = evaluation_table.loc[quantifier, dataset]['abs_error']

            X_train = meta_features_table.drop(index=dataset).values
            y_train = evaluation_table.loc[quantifier].drop(index=dataset)['abs_error'].values

            recommender_.fit(X_train, y_train)
            predicted_error = recommender_.predict(X_test)[0]

            aux_recommender_evaluation_table.loc[(quantifier, dataset)] = [predicted_error, y_test]
    
    datasets = aux_recommender_evaluation_table.index.get_level_values('dataset').unique()
    recommender_evaluation_table = pd.DataFrame(columns=["predicted_ranking", "predicted_ranking_weights", "predicted_ranking_mae",
                                                            "true_ranking", "true_ranking_weights", "true_ranking_mae"], index=datasets)
    for dataset in datasets:
        filtered_result = aux_recommender_evaluation_table.xs(dataset, level='dataset')
        
        predicted_ranking = filtered_result.sort_values(by='predicted_error').index.tolist()
        predicted_ranking_mae = [filtered_result.loc[quantifier, 'predicted_error'] for quantifier in predicted_ranking]

        errors = np.array(predicted_ranking_mae)
        denominator = np.sum(1/errors)
        predicted_ranking_weights = (1/errors)/denominator

        true_ranking = filtered_result.sort_values(by='true_error').index.tolist()
        true_ranking_mae = [filtered_result.loc[quantifier, 'true_error'] for quantifier in true_ranking]

        errors = np.array(true_ranking_mae)
        if np.any(errors == 0):
            errors = np.array([1e-6 if x == 0 else x for x in errors])
        denominator = np.sum(1/errors)
        true_ranking_weights = (1/errors)/denominator

        recommender_evaluation_table.loc[dataset] = [predicted_ranking, predicted_ranking_weights, predicted_ranking_mae,
                                                        true_ranking, true_ranking_weights, true_ranking_mae]
        
    if not recommender_eval_path is None:
        recommender_evaluation_table.to_csv(recommender_eval_path)
    
    not_agg_evaluation_table = not_agg_evaluation_table.copy(deep=True)
    not_agg_evaluation_table.sort_values(by=['quantifier', 'dataset'], inplace=True)
    not_agg_evaluation_table.reset_index(drop=True, inplace=True)
    
    return recommender_evaluation_table, not_agg_evaluation_table


In [4]:
meta_features_table

Unnamed: 0,attr_conc.mean,attr_conc.sd,attr_ent.mean,attr_ent.sd,attr_to_inst,best_node.mean,best_node.sd,can_cor.mean,can_cor.sd,cat_to_num,...,tree_imbalance.sd,tree_shape.mean,tree_shape.sd,var.mean,var.sd,var_importance.mean,var_importance.sd,w_lambda,worst_node.mean,worst_node.sd
0,0.008076,0.068451,0.002277,0.487575,0.036792,0.561303,0.002908,0.0,0.0,0.0,...,0.358205,0.012390,0.179874,0.0,0.0,0.040575,0.082875,0.0,0.595559,0.003122
1,0.018818,0.000000,0.655553,0.000214,0.001305,0.268795,0.089752,0.0,0.0,0.0,...,0.291647,0.019560,0.150003,0.0,0.0,1.000000,0.425873,0.0,0.275539,0.097913
2,0.079141,0.147877,0.536879,0.000009,0.010380,0.739677,0.151297,0.0,0.0,0.0,...,0.695297,0.302634,0.606986,0.0,0.0,0.498967,0.562337,0.0,0.215528,0.006171
3,0.041816,0.188793,0.285023,0.825212,0.058818,1.000000,0.000000,0.0,0.0,0.0,...,0.000000,1.000000,0.000000,0.0,0.0,0.055195,0.389382,0.0,0.548738,0.006181
4,0.159952,0.552636,0.515038,0.544216,0.029761,0.315149,0.265705,0.0,0.0,0.0,...,0.312985,0.013647,0.147358,0.0,0.0,0.037231,0.020176,0.0,0.285434,0.107141
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,0.057643,0.433470,0.096516,0.562013,0.044251,0.596928,0.234005,0.0,0.0,0.0,...,0.373381,0.031128,0.271067,0.0,0.0,0.033094,0.097880,0.0,0.360850,0.187647
96,0.008222,0.111015,0.070431,0.970933,0.026781,0.897692,0.020090,0.0,0.0,0.0,...,0.465097,0.089106,0.426712,0.0,0.0,0.016491,0.103040,0.0,0.956467,0.001506
97,0.050778,0.334608,0.533369,0.996965,0.001611,0.714450,0.752608,0.0,0.0,0.0,...,1.000000,0.899746,0.383326,0.0,0.0,0.498967,0.722514,0.0,0.109386,0.514661
98,0.043376,0.143995,0.659467,0.091685,0.007570,0.674358,0.138816,0.0,0.0,0.0,...,0.462460,0.021192,0.095032,0.0,0.0,0.141086,0.258016,0.0,0.541247,0.330399


In [5]:
evaluation_table

Unnamed: 0_level_0,Unnamed: 1_level_0,abs_error,run_time
quantifier,dataset,Unnamed: 2_level_1,Unnamed: 3_level_1
ACC,1043_ada_agnostic,0.054885,0.000249
ACC,1460_banana,0.457800,0.000243
ACC,1462_banknote-authentication,0.010968,0.000239
ACC,1466_cardiotocography,0.000000,0.000243
ACC,1475_first-order-theorem-proving,0.077038,0.000246
...,...,...,...
X,spambase,0.023555,0.000245
X,sylva_prior,0.006520,0.000266
X,visualizing_soil,0.002000,0.000253
X,wind,0.035743,0.000247


In [3]:
from sklearn.neighbors import KNeighborsRegressor

model = KNeighborsRegressor()
meta_features_table = meta_features
evaluation_table = reg_quantifiers_eval_agg
not_aggregated_evaluation_table = reg_quantifiers_eval

model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
_,_ = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "neigh_regressor_recommendation_eval.csv")

KeyError: '1043_ada_agnostic'