In [None]:
from ms.metaresearch.selectors.model_free import *
from ms.metaresearch.selectors.model_based import *
from ms.metaresearch.selectors.causal import *
from ms.metaresearch.selectors.base import *
from ms.metaresearch.meta_learning import MetaLearner
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier

from ms.handler.metadata_source import TabzillaSource
from ms.metaresearch.meta_model import MetaModel
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import make_scorer, balanced_accuracy_score, f1_score, roc_auc_score

In [None]:
md_source = TabzillaSource()

In [None]:
corr = CorrelationSelector(md_source=md_source)
f_val = FValueSelector(md_source=md_source)
mi = MutualInfoSelector(md_source=md_source)
chi2 = Chi2Selector(md_source=md_source)
xgb = XGBSelector(md_source=md_source)
lasso = LassoSelector(md_source=md_source)
rfe = RFESelector(md_source=md_source)
te = TESelector(md_source=md_source)
base = BaseSelector(md_source=md_source)

In [None]:
# selectors = [base, corr, f_val, mi, chi2, xgb, lasso, rfe, te]
selectors = [corr, f_val, mi, xgb, lasso, rfe]

In [None]:
features_suffixes = ["power"]
# metrics_suffixes = ["perf_abs", "perf_rel", "diff"]
metrics_suffixes = ["perf_abs"]

In [None]:
grid_scoring = "b_acc"
model_scoring = {
    'b_acc': make_scorer(balanced_accuracy_score),
    'f1': make_scorer(f1_score, average='weighted'),
    'roc': make_scorer(
        roc_auc_score,
        average='weighted',
        max_fpr=None,
        multi_class="ovo",
        response_method="predict_proba"
    ),
}

In [None]:
lr = MetaModel(
    name="logreg",
    display_name="Logistic Regression",
    model=LogisticRegression(),
    params={
        "penalty": ["l2"],
        "C": [0.01, 0.1, 1, 10],
        "solver": ["newton-cholesky", "lbfgs", "sag", "saga"],
    }
)

mlp = MetaModel(
    name="mlp",
    display_name="MLP",
    model=MLPClassifier(),
    params={
        "hidden_layer_sizes": [(10,), (25,), (50,)],
        "activation": ["logistic", "relu", "tanh"],
        "solver": ["lbfgs", "sgd", "adam"],
        "alpha": [0.001, 0.01, 0.1],
        "batch_size": ["auto", 10, 25, 50],
        "learning_rate": ["adaptive"],
        "learning_rate_init": [0.001, 0.01, 0.1],
        "max_iter": [25, 50, 100, 200],
    },
)

xgb = MetaModel(
    name="xgb",
    display_name="XGBoost",
    model=XGBClassifier(),
    params={
        'max_depth': [3, 5, 7, 9],
        'learning_rate': [0.01, 0.1, 0.3],
        'n_estimators': [5, 10, 50, 100],
        "eval_metric": ["merror", "mlogloss"],
    },
)

knn = MetaModel(
    name="knn",
    display_name="KNN",
    model=KNeighborsClassifier(),
    params={
        "n_neighbors": [3, 5, 7],
        "weights": ["uniform", "distance"],
        "leaf_size": [10, 20, 30, 50],
        "algorithm": ["auto"],
        "p": [1, 2],
    }
)

In [None]:
meta_learner = MetaLearner(
    md_source=md_source,
    opt_scoring=grid_scoring,
    model_scoring=model_scoring,
    features_folder="preprocessed",
    metrics_folder="preprocessed",
    use_optuna=True,
    opt_cv=5,
    model_cv=10,
    n_trials=50,
    test_mode=False,
)

In [None]:
output = meta_learner.run_models(
    models=[knn, lr, xgb, mlp],
    feature_suffixes=features_suffixes,
    target_suffixes=metrics_suffixes,
    selectors_handlers=selectors,
    rewrite=False,
    to_save=True,
)