In [1]:
import pandas as pd
import numpy as np
from sklearn.base import clone

ROOT_DIR = "./plot_data/experiment-1/tables/"
RESULTS_DIR = "./plot_data/experiment-1/results/"

# meta_features = pd.read_csv(ROOT_DIR + 'meta_features_table.csv', index_col=0) # MinMax Scaled
meta_features = pd.read_csv(ROOT_DIR + 'meta_features_table_alt.csv', index_col=0) # MinMax Scaled

reg_quantifiers_eval = pd.read_csv(ROOT_DIR + 'reg_quantifiers_evaluation_table.csv', index_col=0)
knn_quantifiers_eval = pd.read_csv(ROOT_DIR + 'knn_quantifiers_evaluation_table.csv', index_col=0)

reg_quantifiers_eval_agg = pd.read_csv(ROOT_DIR + 'reg_quantifiers_evaluation_table_agg.csv', index_col=[0,1])
knn_quantifiers_eval_agg = pd.read_csv(ROOT_DIR + 'knn_quantifiers_evaluation_table_agg.csv', index_col=[0,1])

In [2]:
def fit_regressor(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table):
    meta_features_table = meta_features_table
    not_agg_evaluation_table = not_aggregated_evaluation_table
    evaluation_table = evaluation_table

    model_dict = {}
    
    X_train = meta_features_table.values
    y_train = None
    for quantifier in evaluation_table.index.levels[0].tolist():
        y_train = evaluation_table.loc[quantifier]['abs_error'].values
        model_dict[quantifier] = clone(model)
        model_dict[quantifier].fit(X_train, y_train)

    return model_dict

# Evaluate Quantifier Recommender with Leave-One-Out
def loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, recommender_eval_path: str = None):
    aux_recommender_evaluation_table = pd.DataFrame(columns=["predicted_error", "true_error"], index=evaluation_table.index)
    for quantifier, recommender in model_dict.items():
        recommender_ = clone(recommender)
        for dataset in evaluation_table.index.levels[1]:
            X_test = meta_features_table.loc[dataset].values
            X_test = np.array(X_test).reshape(1, -1)
            y_test = evaluation_table.loc[quantifier, dataset]['abs_error']

            X_train = meta_features_table.drop(index=dataset).values
            y_train = evaluation_table.loc[quantifier].drop(index=dataset)['abs_error'].values

            recommender_.fit(X_train, y_train)
            predicted_error = recommender_.predict(X_test)[0]

            aux_recommender_evaluation_table.loc[(quantifier, dataset)] = [predicted_error, y_test]
    
    datasets = aux_recommender_evaluation_table.index.get_level_values('dataset').unique()
    recommender_evaluation_table = pd.DataFrame(columns=["predicted_ranking", "predicted_ranking_weights", "predicted_ranking_mae",
                                                            "true_ranking", "true_ranking_weights", "true_ranking_mae"], index=datasets)
    for dataset in datasets:
        filtered_result = aux_recommender_evaluation_table.xs(dataset, level='dataset')
        
        predicted_ranking = filtered_result.sort_values(by='predicted_error').index.tolist()
        predicted_ranking_mae = [filtered_result.loc[quantifier, 'predicted_error'] for quantifier in predicted_ranking]

        errors = np.array(predicted_ranking_mae)
        denominator = np.sum(1/errors)
        predicted_ranking_weights = (1/errors)/denominator

        true_ranking = filtered_result.sort_values(by='true_error').index.tolist()
        true_ranking_mae = [filtered_result.loc[quantifier, 'true_error'] for quantifier in true_ranking]

        errors = np.array(true_ranking_mae)
        if np.any(errors == 0):
            errors = np.array([1e-6 if x == 0 else x for x in errors])
        denominator = np.sum(1/errors)
        true_ranking_weights = (1/errors)/denominator

        recommender_evaluation_table.loc[dataset] = [predicted_ranking, predicted_ranking_weights, predicted_ranking_mae,
                                                        true_ranking, true_ranking_weights, true_ranking_mae]
        
    if not recommender_eval_path is None:
        recommender_evaluation_table.to_csv(recommender_eval_path)
    
    not_agg_evaluation_table = not_aggregated_evaluation_table.copy(deep=True)
    not_agg_evaluation_table.sort_values(by=['quantifier', 'dataset'], inplace=True)
    not_agg_evaluation_table.reset_index(drop=True, inplace=True)
    
    return recommender_evaluation_table, not_agg_evaluation_table


In [3]:
from sklearn.neighbors import KNeighborsRegressor
from ensemble_quantifier import EnsembleQuantifier

model = KNeighborsRegressor()

meta_features_table = meta_features
evaluation_table = reg_quantifiers_eval_agg
not_aggregated_evaluation_table = reg_quantifiers_eval

model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
REG_KNN_recommender_eval, REG_KNN_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "neigh_regressor_recommendation_eval.csv")

REG_KNN_recommender_eval.to_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_recommender_eval.csv")
REG_KNN_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_quantifiers_eval.csv")

ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("regression", REG_KNN_recommender_eval, REG_KNN_quantifiers_eval, f"./plot_data/experiment-1/results/neigh_reg/reg_ensemble_quantifier_evaluation_table_KNN.csv")

Finished 1043_ada_agnostic
Finished 1460_banana
Finished 1462_banknote-authentication
Finished 1466_cardiotocography
Finished 1475_first-order-theorem-proving
Finished 1479_hill-valley
Finished 1485_madelon
Finished 1489_phoneme
Finished 1494_qsar-biodeg
Finished 1496_ringnorm
Finished 1497_wall-robot-navigation
Finished 1504_steel-plates-fault
Finished 1507_twonorm
Finished 1526_wall-robot-navigation
Finished 1535_volcanoes-b5
Finished 1538_volcanoes-d1
Finished 1539_volcanoes-d2
Finished 1540_volcanoes-d3
Finished 1541_volcanoes-d4
Finished 1566_hill-valley
Finished 182_satimage
Finished 23_cmc
Finished 28_optdigits
Finished 294_satellite_image
Finished 2dplanes
Finished 30_page-blocks
Finished 312_scene
Finished 375_JapaneseVowels
Finished 40474_thyroid-allbp
Finished 40475_thyroid-allhyper
Finished 40733_yeast
Finished 44_spambase
Finished 4538_GesturePhaseSegmentationProcessed
Finished 60_waveform-5000
Finished 679_rmftsa_sleepdata
Finished AedesQuinx
Finished AedesSex
Finished Ar

Unnamed: 0,quantifier,dataset,sample_size,sampling_seed,iteration,alpha,pred_prev,abs_error,run_time
0,(REG)Top-1,1043_ada_agnostic,100,623,1,0.0,0.062761,0.062761,0.057738
1,(REG)Top-1,1043_ada_agnostic,100,243,2,0.0,0.037312,0.037312,0.045248
2,(REG)Top-1,1043_ada_agnostic,100,18,3,0.0,0.105745,0.105745,0.047769
3,(REG)Top-1,1043_ada_agnostic,100,549,4,0.0,0.000045,0.000045,0.045466
4,(REG)Top-1,1043_ada_agnostic,100,447,5,0.0,0.055759,0.055759,0.045566
...,...,...,...,...,...,...,...,...,...
439995,(REG)Top-9+W,winetype,100,554,6,1.0,0.999574,0.000426,0.618714
439996,(REG)Top-9+W,winetype,100,379,7,1.0,0.999617,0.000383,0.611238
439997,(REG)Top-9+W,winetype,100,348,8,1.0,0.999074,0.000926,0.672699
439998,(REG)Top-9+W,winetype,100,840,9,1.0,0.996340,0.003660,0.612316


In [None]:
from sklearn.ensemble import RandomForestRegressor

model=RandomForestRegressor(n_jobs=-1)

meta_features_table = meta_features
evaluation_table = reg_quantifiers_eval_agg
not_aggregated_evaluation_table = reg_quantifiers_eval

model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
REG_RF_recommender_eval, REG_RF_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "rf_regressor_recommendation_eval.csv")

REG_RF_recommender_eval.to_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_recommender_eval.csv")
REG_RF_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_quantifiers_eval.csv")

ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("regression", REG_RF_recommender_eval, REG_RF_quantifiers_eval, f"./plot_data/experiment-1/results/rf_reg/reg_ensemble_quantifier_evaluation_table_RF.csv")

In [None]:
from xgboost import XGBRegressor

model=XGBRegressor(n_jobs=-1)

meta_features_table = meta_features
evaluation_table = reg_quantifiers_eval_agg
not_aggregated_evaluation_table = reg_quantifiers_eval

model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
REG_XGBR_recommender_eval, REG_XGBR_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "xgbr_regressor_recommendation_eval.csv")

REG_XGBR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_recommender_eval.csv")
REG_XGBR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_quantifiers_eval.csv")

ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("regression", REG_XGBR_recommender_eval, REG_XGBR_quantifiers_eval, f"./plot_data/experiment-1/results/xgbr_reg/reg_ensemble_quantifier_evaluation_table_XGBR.csv")

In [None]:
from sklearn.svm import SVR

model=SVR()

meta_features_table = meta_features
evaluation_table = reg_quantifiers_eval_agg
not_aggregated_evaluation_table = reg_quantifiers_eval

model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
REG_SVR_recommender_eval, REG_SVR_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "svr_regressor_recommendation_eval.csv")

REG_SVR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval.csv")
REG_SVR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval.csv")

ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR.csv")

In [None]:
# import pandas as pd
# from ensemble_quantifier import EnsembleQuantifier

# ## REGRESSORES

# # KNN
# REG_KNN_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_recommender_eval.csv", index_col=0)
# REG_KNN_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_KNN_recommender_eval, REG_KNN_quantifiers_eval, f"./plot_data/experiment-1/results/neigh_reg/reg_ensemble_quantifier_evaluation_table_KNN.csv")

# # RF
# REG_RF_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_recommender_eval.csv", index_col=0)
# REG_RF_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_RF_recommender_eval, REG_RF_quantifiers_eval, f"./plot_data/experiment-1/results/rf_reg/reg_ensemble_quantifier_evaluation_table_RF.csv")

# # XGBOOST
# REG_XGBR_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_recommender_eval.csv", index_col=0)
# REG_XGBR_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_XGBR_recommender_eval, REG_XGBR_quantifiers_eval, f"./plot_data/experiment-1/results/xgbr_reg/reg_ensemble_quantifier_evaluation_table_XGBR.csv")

# # SVR
# REG_SVR_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval.csv", index_col=0)
# REG_SVR_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR.csv")