# Data

In [None]:
import pandas as pd
import numpy as np
from sklearn.base import clone

ROOT_DIR = "./plot_data/experiment-1/tables/"
RESULTS_DIR = "./plot_data/experiment-1/results/"

# meta_features = pd.read_csv(ROOT_DIR + 'meta_features_table.csv', index_col=0) # MinMax Scaled
meta_features = pd.read_csv(ROOT_DIR + 'meta_features_table_alt.csv', index_col=0) # MinMax Scaled

reg_quantifiers_eval = pd.read_csv(ROOT_DIR + 'reg_quantifiers_evaluation_table.csv', index_col=0)
knn_quantifiers_eval = pd.read_csv(ROOT_DIR + 'knn_quantifiers_evaluation_table.csv', index_col=0)

reg_quantifiers_eval_agg = pd.read_csv(ROOT_DIR + 'reg_quantifiers_evaluation_table_agg.csv', index_col=[0,1])
knn_quantifiers_eval_agg = pd.read_csv(ROOT_DIR + 'knn_quantifiers_evaluation_table_agg.csv', index_col=[0,1])

arr_table = pd.read_csv(ROOT_DIR + 'arr_table.csv', index_col=0)

# Util

In [None]:
def fit_regressor(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table):
    meta_features_table = meta_features_table
    not_agg_evaluation_table = not_aggregated_evaluation_table
    evaluation_table = evaluation_table

    model_dict = {}
    
    X_train = meta_features_table.values
    y_train = None
    for quantifier in evaluation_table.index.levels[0].tolist():
        y_train = evaluation_table.loc[quantifier]['abs_error'].values
        model_dict[quantifier] = clone(model)
        model_dict[quantifier].fit(X_train, y_train)

    return model_dict

# Evaluate Quantifier Recommender with Leave-One-Out
def loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, recommender_eval_path: str = None):
    aux_recommender_evaluation_table = pd.DataFrame(columns=["predicted_error", "true_error"], index=evaluation_table.index)
    for quantifier, recommender in model_dict.items():
        # recommender_ = clone(recommender)
        for dataset in evaluation_table.index.levels[1]:
            recommender_ = clone(recommender)
            X_test = meta_features_table.loc[dataset].values
            X_test = np.array(X_test).reshape(1, -1)
            y_test = evaluation_table.loc[quantifier, dataset]['abs_error']

            X_train = meta_features_table.drop(index=dataset).values
            y_train = evaluation_table.loc[quantifier].drop(index=dataset)['abs_error'].values

            recommender_.fit(X_train, y_train)
            predicted_error = recommender_.predict(X_test)[0]

            aux_recommender_evaluation_table.loc[(quantifier, dataset)] = [predicted_error, y_test]
    
    datasets = aux_recommender_evaluation_table.index.get_level_values('dataset').unique()
    recommender_evaluation_table = pd.DataFrame(columns=["predicted_ranking", "predicted_ranking_weights", "predicted_ranking_mae",
                                                            "true_ranking", "true_ranking_weights", "true_ranking_mae"], index=datasets)
    for dataset in datasets:
        filtered_result = aux_recommender_evaluation_table.xs(dataset, level='dataset')
        
        predicted_ranking = filtered_result.sort_values(by='predicted_error').index.tolist()
        predicted_ranking_mae = [filtered_result.loc[quantifier, 'predicted_error'] for quantifier in predicted_ranking]

        errors = np.array(predicted_ranking_mae)
        denominator = np.sum(1/errors)
        predicted_ranking_weights = (1/errors)/denominator

        true_ranking = filtered_result.sort_values(by='true_error').index.tolist()
        true_ranking_mae = [filtered_result.loc[quantifier, 'true_error'] for quantifier in true_ranking]

        errors = np.array(true_ranking_mae)
        if np.any(errors == 0):
            errors = np.array([1e-6 if x == 0 else x for x in errors])
        denominator = np.sum(1/errors)
        true_ranking_weights = (1/errors)/denominator

        recommender_evaluation_table.loc[dataset] = [predicted_ranking, predicted_ranking_weights, predicted_ranking_mae,
                                                        true_ranking, true_ranking_weights, true_ranking_mae]
        
    if not recommender_eval_path is None:
        recommender_evaluation_table.to_csv(recommender_eval_path)
    
    not_agg_evaluation_table = not_aggregated_evaluation_table.copy(deep=True)
    not_agg_evaluation_table.sort_values(by=['quantifier', 'dataset'], inplace=True)
    not_agg_evaluation_table.reset_index(drop=True, inplace=True)
    
    return recommender_evaluation_table, not_agg_evaluation_table


In [None]:
from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from skopt import BayesSearchCV

# Evaluate Quantifier Recommender with Leave-One-Out
def loo_grid_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, param_grid, recommender_eval_path: str = None):
    aux_recommender_evaluation_table = pd.DataFrame(columns=["predicted_error", "true_error"], index=evaluation_table.index)
    for quantifier, recommender in model_dict.items():
        # recommender_ = clone(recommender)
        for dataset in evaluation_table.index.levels[1]:
            recommender_ = clone(recommender)
            X_test = meta_features_table.loc[dataset].values
            X_test = np.array(X_test).reshape(1, -1)
            y_test = evaluation_table.loc[quantifier, dataset]['abs_error']

            X_train = meta_features_table.drop(index=dataset).values
            y_train = evaluation_table.loc[quantifier].drop(index=dataset)['abs_error'].values

            # nested tuning (inner CV) dentro do fold LOO (outer)
            inner_cv = KFold(n_splits=5, shuffle=True, random_state=42)
            # gs = GridSearchCV(
            #     estimator=recommender_,
            #     param_grid=param_grid,   # chaves: "C", "epsilon", "gamma", "kernel", etc.
            #     scoring="neg_mean_absolute_error",
            #     cv=inner_cv,
            #     n_jobs=-1,
            #     refit=True,
            #     verbose=0
            # )
            gs = BayesSearchCV(
                estimator=recommender_,
                search_spaces=param_grid,
                n_iter=25,
                scoring="neg_mean_absolute_error",
                cv=inner_cv,
                n_jobs=-1,
                refit=True,
                verbose=0,
                random_state=42
            )
            gs.fit(X_train, y_train)
            
            predicted_error = gs.predict(X_test)[0]

            aux_recommender_evaluation_table.loc[(quantifier, dataset)] = [predicted_error, y_test]

            print(f"Finished {dataset}")

        print(f"Finished {quantifier}")

    datasets = aux_recommender_evaluation_table.index.get_level_values('dataset').unique()
    recommender_evaluation_table = pd.DataFrame(columns=["predicted_ranking", "predicted_ranking_weights", "predicted_ranking_mae",
                                                            "true_ranking", "true_ranking_weights", "true_ranking_mae"], index=datasets)
    for dataset in datasets:
        filtered_result = aux_recommender_evaluation_table.xs(dataset, level='dataset')
        
        predicted_ranking = filtered_result.sort_values(by='predicted_error').index.tolist()
        predicted_ranking_mae = [filtered_result.loc[quantifier, 'predicted_error'] for quantifier in predicted_ranking]

        errors = np.array(predicted_ranking_mae)
        denominator = np.sum(1/errors)
        predicted_ranking_weights = (1/errors)/denominator

        true_ranking = filtered_result.sort_values(by='true_error').index.tolist()
        true_ranking_mae = [filtered_result.loc[quantifier, 'true_error'] for quantifier in true_ranking]

        errors = np.array(true_ranking_mae)
        if np.any(errors == 0):
            errors = np.array([1e-6 if x == 0 else x for x in errors])
        denominator = np.sum(1/errors)
        true_ranking_weights = (1/errors)/denominator

        recommender_evaluation_table.loc[dataset] = [predicted_ranking, predicted_ranking_weights, predicted_ranking_mae,
                                                        true_ranking, true_ranking_weights, true_ranking_mae]
        
    if not recommender_eval_path is None:
        recommender_evaluation_table.to_csv(recommender_eval_path)
    
    not_agg_evaluation_table = not_aggregated_evaluation_table.copy(deep=True)
    not_agg_evaluation_table.sort_values(by=['quantifier', 'dataset'], inplace=True)
    not_agg_evaluation_table.reset_index(drop=True, inplace=True)
    
    return recommender_evaluation_table, not_agg_evaluation_table


In [None]:
from sklearn.preprocessing import MinMaxScaler


def fit_knn(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table):
    meta_features_table = meta_features_table
    not_agg_evaluation_table = not_aggregated_evaluation_table
    evaluation_table = evaluation_table
    
    eval_table = evaluation_table.reset_index()
    dt_list = eval_table['dataset'].unique().tolist()
    qtf_list = eval_table['quantifier'].unique().tolist()
    arr_table = pd.DataFrame(columns=qtf_list)
    alpha = 0
    m = len(qtf_list) - 1
    for dt in dt_list:
        rows_by_dataset = eval_table[eval_table['dataset'] == dt]
        arr_row = []
        for qtf in qtf_list:
            acc_i = np.array(rows_by_dataset[rows_by_dataset['quantifier'] == qtf]['inv_abs_error'].values)
            acc_j = np.array(rows_by_dataset[rows_by_dataset['quantifier'] != qtf]['inv_abs_error'].values)

            run_time_i = np.array(rows_by_dataset[rows_by_dataset['quantifier'] == qtf]['run_time'].values)
            run_time_j = np.array(rows_by_dataset[rows_by_dataset['quantifier'] != qtf]['run_time'].values)

            acc_i_div_j = acc_i / acc_j
            run_time_i_div_j = 1 + (alpha * np.log10(run_time_i / run_time_j))
            arr_i = np.sum(acc_i_div_j / run_time_i_div_j) / m

            arr_row.append(arr_i)
        arr_table.loc[dt] = arr_row

    data = meta_features_table.values
    model.fit(data) # NearestNeighbors(n_neighbors=n_neighbors, metric='manhattan', n_jobs=-1)

    return model, arr_table



def loo_knn(model, arr_table, meta_features_table, not_aggregated_evaluation_table, evaluation_table,  recommender_eval_path: str = None):
    # predicted_arr_table = pd.DataFrame(columns=self.arr_table.columns, index=self.arr_table.index.tolist())
    # true_arr_table = pd.DataFrame(columns=self.arr_table.columns, index=self.arr_table.index.tolist())

    recommender_evaluation_table = pd.DataFrame(columns=["predicted_ranking",
                                                            "predicted_ranking_weights",
                                                            "predicted_ranking_arr",
                                                            "true_ranking",
                                                            "true_ranking_weights",
                                                            "true_ranking_arr"], index=arr_table.index.tolist())
    transform_pipeline_ = MinMaxScaler() # clone(self.transform_pipeline)
    recommender_ = clone(model)
    for dataset in arr_table.index.tolist():
        X_test = meta_features_table.loc[dataset].values
        X_test = np.array(X_test).reshape(1, -1)
        y_test = arr_table.loc[dataset].values

        X_train = meta_features_table.drop(index=dataset).values
        y_train = (arr_table.drop(index=dataset)).values

        transform_pipeline_.fit(X_train)
        transformed_train = transform_pipeline_.transform(X_train)
        recommender_.fit(transformed_train, y_train)

        transformed_test = transform_pipeline_.transform(X_test)
        distances, indices = recommender_.kneighbors(transformed_test)
        distances, indices = distances[0], indices[0]
        quantifiers = arr_table.columns
        new_arr_array = np.array(len(quantifiers) * [np.float64(0)])
        tolerance = 1e-10
        weights = np.array(1/(distances + tolerance)) / np.sum(1/(distances + tolerance))
        for idx, w in zip(indices, weights):
            arr_idx = meta_features_table.iloc[idx].name
            new_arr_array += np.array(arr_table.loc[arr_idx].values) * w

        quantifier_arr_pairs = sorted(list(zip(quantifiers, new_arr_array)), key=lambda x: x[1], reverse=True)
        predicted_ranking, predicted_arr = zip(*quantifier_arr_pairs)
        predicted_ranking_weights = np.array(predicted_arr) / np.sum(predicted_arr)

        quantifier_arr_pairs = sorted(list(zip(quantifiers, y_test)), key=lambda x: x[1], reverse=True)
        true_ranking, true_arr = zip(*quantifier_arr_pairs)
        true_ranking_weights = np.array(true_arr) / np.sum(true_arr)

        recommender_evaluation_table.loc[dataset] = [predicted_ranking, predicted_ranking_weights, predicted_arr,
                                                        true_ranking, true_ranking_weights, true_arr]
        
    if not recommender_eval_path is None:
        recommender_evaluation_table.to_csv(recommender_eval_path)
        
    not_agg_evaluation_table = not_aggregated_evaluation_table.copy(deep=True)
    not_agg_evaluation_table.sort_values(by=['quantifier', 'dataset'], inplace=True)
    not_agg_evaluation_table.reset_index(drop=True, inplace=True)
    
    return recommender_evaluation_table, not_agg_evaluation_table

In [None]:
import numpy as np
from sklearn.base import BaseEstimator
from sklearn.neighbors import NearestNeighbors

class ARRKNN(BaseEstimator):
    def __init__(self, n_neighbors=5, metric="minkowski", p=2, algorithm="auto", tolerance=1e-10):
        self.n_neighbors = n_neighbors
        self.metric = metric
        self.p = p
        self.algorithm = algorithm
        self.tolerance = tolerance

    def fit(self, X, y):
        # y: (n_samples, n_quantifiers)  -> ARR vetor por dataset
        self._y_train = np.asarray(y)

        self._nn = NearestNeighbors(
            n_neighbors=self.n_neighbors,
            metric=self.metric,
            p=self.p,
            algorithm=self.algorithm
        )
        self._nn.fit(X)
        return self

    def predict(self, X):
        distances, indices = self._nn.kneighbors(X)

        preds = []
        for dists, idxs in zip(distances, indices):
            w = 1.0 / (dists + self.tolerance)
            w = w / np.sum(w)
            # média ponderada dos vetores ARR dos vizinhos
            pred_vec = np.sum(self._y_train[idxs] * w[:, None], axis=0)
            preds.append(pred_vec)

        return np.asarray(preds)  # (n_samples, n_quantifiers)


import numpy as np
import pandas as pd
from sklearn.base import clone
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import GridSearchCV, KFold

def loo_grid_knn(model, arr_table, meta_features_table, not_aggregated_evaluation_table, evaluation_table,
                 param_grid, recommender_eval_path: str = None):
    recommender_evaluation_table = pd.DataFrame(
        columns=["predicted_ranking",
                 "predicted_ranking_weights",
                 "predicted_ranking_arr",
                 "true_ranking",
                 "true_ranking_weights",
                 "true_ranking_arr"],
        index=arr_table.index.tolist()
    )

    for dataset in arr_table.index.tolist():
        X_test = meta_features_table.loc[dataset].values.reshape(1, -1)
        y_test = arr_table.loc[dataset].values  # vetor ARR verdadeiro

        X_train = meta_features_table.drop(index=dataset).values
        y_train = arr_table.drop(index=dataset).values  # matriz (99, n_quantifiers)

        # mantém seu scaling (fit só no treino do fold externo)
        transform_pipeline_ = MinMaxScaler()
        transform_pipeline_.fit(X_train)
        transformed_train = transform_pipeline_.transform(X_train)
        transformed_test = transform_pipeline_.transform(X_test)

        # nested tuning interno
        inner_cv = KFold(n_splits=5, shuffle=True, random_state=42)

        gs = GridSearchCV(
            estimator=clone(model),
            param_grid=param_grid,
            scoring="neg_mean_absolute_error",  # funciona com multioutput: agrega MAE nos outputs
            cv=inner_cv,
            n_jobs=-1,
            refit=True,
            verbose=0
        )
        gs.fit(transformed_train, y_train)

        # previsão do vetor ARR (igual ao seu new_arr_array)
        new_arr_array = gs.best_estimator_.predict(transformed_test)[0]

        quantifiers = arr_table.columns

        quantifier_arr_pairs = sorted(zip(quantifiers, new_arr_array), key=lambda x: x[1], reverse=True)
        predicted_ranking, predicted_arr = zip(*quantifier_arr_pairs)
        predicted_ranking_weights = np.array(predicted_arr) / np.sum(predicted_arr)

        quantifier_arr_pairs = sorted(zip(quantifiers, y_test), key=lambda x: x[1], reverse=True)
        true_ranking, true_arr = zip(*quantifier_arr_pairs)
        true_ranking_weights = np.array(true_arr) / np.sum(true_arr)

        recommender_evaluation_table.loc[dataset] = [
            predicted_ranking, predicted_ranking_weights, predicted_arr,
            true_ranking, true_ranking_weights, true_arr
        ]

    if recommender_eval_path is not None:
        recommender_evaluation_table.to_csv(recommender_eval_path)

    not_agg_evaluation_table = not_aggregated_evaluation_table.copy(deep=True)
    not_agg_evaluation_table.sort_values(by=["quantifier", "dataset"], inplace=True)
    not_agg_evaluation_table.reset_index(drop=True, inplace=True)

    return recommender_evaluation_table, not_agg_evaluation_table



In [None]:
# RUN

# KNN

In [None]:
from sklearn.neighbors import NearestNeighbors
from ensemble_quantifier import EnsembleQuantifier

model = NearestNeighbors(n_neighbors=1, metric='manhattan', n_jobs=-1)

meta_features_table = meta_features
evaluation_table = knn_quantifiers_eval_agg
not_aggregated_evaluation_table = knn_quantifiers_eval

knn, arr_table = fit_knn(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table)

knn_recommender_eval, knn_quantifiers_eval = loo_knn(knn, arr_table, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "knn_1_recommendation_eval.csv")
ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("knn", knn_recommender_eval, knn_quantifiers_eval, f"./plot_data/experiment-1/results/knn_1/knn_1_ensemble_quantifier_evaluation_table.csv")

In [None]:
from sklearn.neighbors import NearestNeighbors
from ensemble_quantifier import EnsembleQuantifier

model = NearestNeighbors(n_neighbors=3, metric='manhattan', n_jobs=-1)

meta_features_table = meta_features
evaluation_table = knn_quantifiers_eval_agg
not_aggregated_evaluation_table = knn_quantifiers_eval

knn, arr_table = fit_knn(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table)

knn_recommender_eval, knn_quantifiers_eval = loo_knn(knn, arr_table, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "knn_3_recommendation_eval.csv")
ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("knn", knn_recommender_eval, knn_quantifiers_eval, f"./plot_data/experiment-1/results/knn_3/knn_3_ensemble_quantifier_evaluation_table.csv")

In [None]:
from sklearn.neighbors import NearestNeighbors
from ensemble_quantifier import EnsembleQuantifier

model = NearestNeighbors(n_neighbors=5, metric='manhattan', n_jobs=-1)

meta_features_table = meta_features
evaluation_table = knn_quantifiers_eval_agg
not_aggregated_evaluation_table = knn_quantifiers_eval

knn, arr_table = fit_knn(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table)

knn_recommender_eval, knn_quantifiers_eval = loo_knn(knn, arr_table, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "knn_5_recommendation_eval.csv")
ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("knn", knn_recommender_eval, knn_quantifiers_eval, f"./plot_data/experiment-1/results/knn_5/knn_5_ensemble_quantifier_evaluation_table.csv")

In [None]:
from sklearn.neighbors import NearestNeighbors
from ensemble_quantifier import EnsembleQuantifier

model = NearestNeighbors(n_neighbors=7, metric='manhattan', n_jobs=-1)

meta_features_table = meta_features
evaluation_table = knn_quantifiers_eval_agg
not_aggregated_evaluation_table = knn_quantifiers_eval

knn, arr_table = fit_knn(model, meta_features_table, not_aggregated_evaluation_table, evaluation_table)

knn_recommender_eval, knn_quantifiers_eval = loo_knn(knn, arr_table, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "knn_7_recommendation_eval.csv")
ensemble_qtf = EnsembleQuantifier()
ensemble_qtf.evaluation("knn", knn_recommender_eval, knn_quantifiers_eval, f"./plot_data/experiment-1/results/knn_7/knn_7_ensemble_quantifier_evaluation_table.csv")

# Regressors

In [None]:
try:
    from sklearn.neighbors import KNeighborsRegressor
    from ensemble_quantifier import EnsembleQuantifier

    model = KNeighborsRegressor()

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_KNN_recommender_eval, REG_KNN_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "neigh_regressor_recommendation_eval.csv")

    REG_KNN_recommender_eval.to_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_recommender_eval.csv")
    REG_KNN_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_quantifiers_eval.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation("regression", REG_KNN_recommender_eval, REG_KNN_quantifiers_eval, f"./plot_data/experiment-1/results/neigh_reg/reg_ensemble_quantifier_evaluation_table_KNN.csv")
except Exception as e:
    print(e)

In [None]:
try:
    from sklearn.ensemble import RandomForestRegressor

    model=RandomForestRegressor(n_jobs=-1)

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_RF_recommender_eval, REG_RF_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "rf_regressor_recommendation_eval.csv")

    REG_RF_recommender_eval.to_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_recommender_eval.csv")
    REG_RF_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_quantifiers_eval.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation("regression", REG_RF_recommender_eval, REG_RF_quantifiers_eval, f"./plot_data/experiment-1/results/rf_reg/reg_ensemble_quantifier_evaluation_table_RF.csv")
except Exception as e:
    print(e)

In [None]:
try:
    from xgboost import XGBRegressor

    model=XGBRegressor(n_jobs=-1)

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_XGBR_recommender_eval, REG_XGBR_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "xgbr_regressor_recommendation_eval.csv")

    REG_XGBR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_recommender_eval.csv")
    REG_XGBR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_quantifiers_eval.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation("regression", REG_XGBR_recommender_eval, REG_XGBR_quantifiers_eval, f"./plot_data/experiment-1/results/xgbr_reg/reg_ensemble_quantifier_evaluation_table_XGBR.csv")
except Exception as e:
    print(e)

In [None]:
try:
    from sklearn.svm import SVR

    model=SVR()

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_SVR_recommender_eval, REG_SVR_quantifiers_eval = loo_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, RESULTS_DIR + "svr_regressor_recommendation_eval.csv")

    REG_SVR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval.csv")
    REG_SVR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR.csv")
except Exception as e:
    print(e)

# Fine-Tunning

In [None]:
# FINE-TUNNING
from ensemble_quantifier import EnsembleQuantifier
from sklearn.neighbors import KNeighborsRegressor
from skopt.space import Integer, Real, Categorical

try:
    model=KNeighborsRegressor()

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    # param_grid = {
    #     "n_neighbors": [1, 3, 5, 7, 9, 15, 25, 35],
    #     "weights": ["uniform", "distance"],
    #     "metric": ["euclidean", "manhattan", "minkowski"],
    #     "p": [1, 2],
    #     "algorithm": ["auto", "ball_tree", "kd_tree", "brute"],
    # }
    param_grid = {
        "n_neighbors": Integer(1, 50),
        "weights": Categorical(["uniform", "distance"]),
    }


    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_NEIGH_recommender_eval, REG_NEIGH_quantifiers_eval = loo_grid_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, param_grid, RESULTS_DIR + "neigh_regressor_recommendation_eval_GRID.csv")

    REG_NEIGH_recommender_eval.to_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_recommender_eval_GRID.csv")
    REG_NEIGH_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_quantifiers_eval_GRID.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation_opt("regression", REG_NEIGH_recommender_eval, REG_NEIGH_quantifiers_eval, f"./plot_data/experiment-1/results/neigh_reg/reg_ensemble_quantifier_evaluation_table_KNN_GRID.csv")
except Exception as e:
    print(e)

In [None]:
# FINE-TUNNING
from ensemble_quantifier import EnsembleQuantifier
from sklearn.ensemble import RandomForestRegressor
from skopt.space import Integer, Real, Categorical

try:
    model=RandomForestRegressor()

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    # param_grid = {
    #     "n_estimators": [100, 300, 500, 800],
    #     "max_depth": [None, 5, 10, 20, 40],
    #     "min_samples_split": [2, 5, 10, 20],
    #     "min_samples_leaf": [1, 2, 5, 10],
    #     "max_features": ["sqrt", "log2", 0.3, 0.5, 0.8],
    #     "bootstrap": [True, False],
    # }
    param_grid = {
        "n_estimators": Integer(50, 600),
        "max_depth": Integer(2, 50),          # se quiser permitir None, dá pra tratar à parte
        "min_samples_leaf": Integer(1, 20),
    }


    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_RF_recommender_eval, REG_RF_quantifiers_eval = loo_grid_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, param_grid, RESULTS_DIR + "rf_regressor_recommendation_eval_GRID.csv")

    REG_RF_recommender_eval.to_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_recommender_eval_GRID.csv")
    REG_RF_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_quantifiers_eval_GRID.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation_opt("regression", REG_RF_recommender_eval, REG_RF_quantifiers_eval, f"./plot_data/experiment-1/results/rf_reg/reg_ensemble_quantifier_evaluation_table_RF_GRID.csv")
except Exception as e:
    print(e)

In [None]:
# FINE-TUNNING
from ensemble_quantifier import EnsembleQuantifier
from skopt.space import Integer, Real, Categorical

try:
    from sklearn.svm import SVR

    model=SVR()

    meta_features_table = meta_features
    evaluation_table = reg_quantifiers_eval_agg
    not_aggregated_evaluation_table = reg_quantifiers_eval

    # param_grid = {
    #     "kernel": ["rbf"],
    #     "C": [0.1, 1, 10, 100, 1000],
    #     "gamma": ["scale", "auto", 0.001, 0.01, 0.1, 1],
    #     "epsilon": [0.001, 0.01, 0.1, 0.5, 1],
    # }
    param_grid = {
        "kernel": Categorical(["rbf", "linear"]),   # se quiser só RBF: ["rbf"]
        "C": Real(1e-3, 1e3, prior="log-uniform"),
        "gamma": Real(1e-4, 1e0, prior="log-uniform"),
        "epsilon": Real(1e-4, 1e0, prior="log-uniform"),
    }
    

    model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
    REG_SVR_recommender_eval, REG_SVR_quantifiers_eval = loo_grid_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, param_grid, RESULTS_DIR + "svr_regressor_recommendation_eval_GRID.csv")

    REG_SVR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval_GRID.csv")
    REG_SVR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval_GRID.csv")

    ensemble_qtf = EnsembleQuantifier()
    ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR_GRID.csv")
except Exception as e:
    print(e)

In [None]:
# # FINE-TUNNING
# from ensemble_quantifier import EnsembleQuantifier
# try:
#     from sklearn.svm import SVR

#     model=SVR()

#     meta_features_table = meta_features
#     evaluation_table = reg_quantifiers_eval_agg
#     not_aggregated_evaluation_table = reg_quantifiers_eval

#     param_grid = {
#         "kernel": ['linear'],
#         "C": [0.01, 0.1, 1, 10, 100, 1000],
#         "epsilon": [0.001, 0.01, 0.1, 0.5, 1],
#     }

#     model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
#     REG_SVR_recommender_eval, REG_SVR_quantifiers_eval = loo_grid_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, param_grid, RESULTS_DIR + "svr_regressor_recommendation_eval_GRID_2.csv")

#     REG_SVR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval_GRID_2.csv")
#     REG_SVR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval_GRID_2.csv")

#     ensemble_qtf = EnsembleQuantifier()
#     ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR_GRID_2.csv")
# except Exception as e:
#     print(e)

In [None]:
# # FINE-TUNNING
# from ensemble_quantifier import EnsembleQuantifier
# try:
#     from sklearn.svm import SVR

#     model=SVR()

#     meta_features_table = meta_features
#     evaluation_table = reg_quantifiers_eval_agg
#     not_aggregated_evaluation_table = reg_quantifiers_eval

#     param_grid = {
#         "kernel": ["poly"],
#         "C": [0.1, 1, 10, 100],
#         "degree": [2, 3, 4],
#         "gamma": ["scale", 0.01, 0.1],
#         "coef0": [0, 0.1, 1],
#         "epsilon": [0.01, 0.1, 0.5]
#     }

#     model_dict = fit_regressor(model=model, meta_features_table=meta_features_table, not_aggregated_evaluation_table=not_aggregated_evaluation_table, evaluation_table=evaluation_table)
#     REG_SVR_recommender_eval, REG_SVR_quantifiers_eval = loo_grid_reg(model_dict, meta_features_table, not_aggregated_evaluation_table, evaluation_table, param_grid, RESULTS_DIR + "svr_regressor_recommendation_eval_GRID_3.csv")

#     REG_SVR_recommender_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval_GRID_3.csv")
#     REG_SVR_quantifiers_eval.to_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval_GRID_3.csv")

#     ensemble_qtf = EnsembleQuantifier()
#     ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR_GRID_3.csv")
# except Exception as e:
#     print(e)

In [None]:
# import pandas as pd
# from ensemble_quantifier import EnsembleQuantifier

# ## REGRESSORES

# # KNN
# REG_KNN_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_recommender_eval.csv", index_col=0)
# REG_KNN_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/neigh_reg/neigh_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_KNN_recommender_eval, REG_KNN_quantifiers_eval, f"./plot_data/experiment-1/results/neigh_reg/reg_ensemble_quantifier_evaluation_table_KNN.csv")

# # RF
# REG_RF_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_recommender_eval.csv", index_col=0)
# REG_RF_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/rf_reg/rf_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_RF_recommender_eval, REG_RF_quantifiers_eval, f"./plot_data/experiment-1/results/rf_reg/reg_ensemble_quantifier_evaluation_table_RF.csv")

# # XGBOOST
# REG_XGBR_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_recommender_eval.csv", index_col=0)
# REG_XGBR_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/xgbr_reg/xgbr_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_XGBR_recommender_eval, REG_XGBR_quantifiers_eval, f"./plot_data/experiment-1/results/xgbr_reg/reg_ensemble_quantifier_evaluation_table_XGBR.csv")

# # SVR
# REG_SVR_recommender_eval = pd.read_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_recommender_eval.csv", index_col=0)
# REG_SVR_quantifiers_eval = pd.read_csv(f"./plot_data/experiment-1/results/svr_reg/svr_reg_quantifiers_eval.csv", index_col=0)
# ensemble_qtf = EnsembleQuantifier()
# ensemble_qtf.evaluation("regression", REG_SVR_recommender_eval, REG_SVR_quantifiers_eval, f"./plot_data/experiment-1/results/svr_reg/reg_ensemble_quantifier_evaluation_table_SVR.csv")