In [None]:
from sklearn.linear_model import LinearRegression
from Case import Case

def objective_maker_linear_regression(train_model):
    def objective(trial):
        model = LinearRegression()
        return train_model(model)
    return objective

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(LinearRegression,data_missing_value=missing_value,feature_selection=feature_selection,normalization=True)
        # 1 trial as their are no hyperparameters to tune
        case.find_params_with_optuna(objective_maker_linear_regression,number_of_trials=1)
        case.report()

In [None]:
from sklearn.linear_model import LogisticRegression
from Case import Case

def objective_maker_logistic_regression(train_model):
    def objective(trial):
        LR_params = {
            "max_iter": trial.suggest_int("max_iter", 10, 100_000, step=10),
            "n_jobs": -1,
        }
        model = LogisticRegression(**LR_params)
        return train_model(model)
    return objective

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(LogisticRegression,data_missing_value=missing_value,feature_selection=feature_selection,normalization=True)
        case.find_params_with_optuna(objective_maker_logistic_regression,number_of_trials=1000)
        case.report()

In [None]:
import xgboost as xgb
from Case import Case

def objective_maker_xgb(train_model):
    def objective(trial):
        xgb_params = {
            "verbosity": 0,
            "objective": "binary:logistic",
            # use exact for small dataset.
            "tree_method": "exact",
            # defines booster, gblinear for linear functions.
            # "booster": trial.suggest_categorical("booster", ["gbtree", "gblinear", "dart"]),
            "booster": "gbtree",
            # L2 regularization weight.
            "lambda": trial.suggest_float("lambda", 1e-8, 1.0, log=True),
            # L1 regularization weight.
            "alpha": trial.suggest_float("alpha", 1e-8, 1.0, log=True),
            # sampling ratio for training data.
            "subsample": trial.suggest_float("subsample", 0.2, 1.0),
            # sampling according to each tree.
            # "colsample_bytree": trial.suggest_float("colsample_bytree", 0.2, 1.0),
            "n_estimators": trial.suggest_int("n_estimators", 5, 100, step=1),
        }

        xgb_params['missing'] = 1 #Avoid error

        if xgb_params["booster"] in ["gbtree", "dart"]:
            # maximum depth of the tree, signifies complexity of the tree.
            xgb_params["max_depth"] = trial.suggest_int("max_depth", 1, 20, step=1)
            # minimum child weight, larger the term more conservative the tree.
            xgb_params["min_child_weight"] = trial.suggest_int("min_child_weight", 1, 10)
            xgb_params["eta"] = trial.suggest_float("eta", 1e-8, 1.0, log=True)
            # defines how selective algorithm is.
            xgb_params["gamma"] = trial.suggest_float("gamma", 1e-8, 1.0, log=True)
            xgb_params["grow_policy"] = trial.suggest_categorical("grow_policy", ["depthwise", "lossguide"])

        if xgb_params["booster"] == "dart":
            xgb_params["sample_type"] = trial.suggest_categorical("sample_type", ["uniform", "weighted"])
            xgb_params["normalize_type"] = trial.suggest_categorical("normalize_type", ["tree", "forest"])
            xgb_params["rate_drop"] = trial.suggest_float("rate_drop", 1e-8, 1.0, log=True)
            xgb_params["skip_drop"] = trial.suggest_float("skip_drop", 1e-8, 1.0, log=True)
        
        model = xgb.XGBClassifier(**xgb_params)
        return train_model(model)
    return objective

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(xgb.XGBClassifier,data_missing_value=missing_value,feature_selection=feature_selection)
        case.find_params_with_optuna(objective_maker_xgb,number_of_trials=1000)
        case.report()

In [None]:
from sklearn.neural_network import MLPClassifier
from Case import Case

def objective_maker_mlp_1l(train_model):
    def objective(trial):
        mlp_1l_params = {
            "hidden_layer_sizes": (trial.suggest_int("hidden_layer_sizes_l1", 1,50,step=1)),
            "activation":  trial.suggest_categorical("activation", ["identity", "logistic","tanh", "relu"]),
            "solver": trial.suggest_categorical("solver", ["lbfgs", "sgd","adam"]),
            "alpha": trial.suggest_float("alpha",0.0000001, 0.1, log=True),
            "batch_size": trial.suggest_int("batch_size", 2,512,step=2),
            "learning_rate": trial.suggest_categorical("learning_rate", ["constant", "invscaling", "adaptive"]),
            "learning_rate_init": trial.suggest_float("learning_rate_init", 0.00000001,0.1),
            "power_t": trial.suggest_float("power_t", 0.00000001,0.1),
            "max_iter": trial.suggest_int("max_iter", 100,100000,step=100),
            "early_stopping":True,
            "n_iter_no_change":trial.suggest_int("n_iter_no_change", 10,1000,step=10),  }
        model = MLPClassifier(**mlp_1l_params)
        return train_model(model)
    return objective

import warnings
warnings.filterwarnings("ignore")

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(MLPClassifier,data_missing_value=missing_value,feature_selection=feature_selection,normalization=True)
        case.find_params_with_optuna(objective_maker_mlp_1l,number_of_trials=1000)
        case.report()

In [None]:
from sklearn.neural_network import MLPClassifier
from Case import Case

def objective_maker_mlp_2l(train_model):
    def objective(trial):
        mlp_2l_params = {
            "hidden_layer_sizes": (trial.suggest_int("hidden_layer_sizes_l1", 1,50,step=1),trial.suggest_int("hidden_layer_sizes_l2", 1,50,step=1)),
            "activation":  trial.suggest_categorical("activation", ["identity", "logistic","tanh", "relu"]),
            "solver": trial.suggest_categorical("solver", ["lbfgs", "sgd","adam"]),
            "alpha": trial.suggest_float("alpha",0.0000001, 0.1, log=True),
            "batch_size": trial.suggest_int("batch_size", 2,512,step=2),
            "learning_rate": trial.suggest_categorical("learning_rate", ["constant", "invscaling", "adaptive"]),
            "learning_rate_init": trial.suggest_float("learning_rate_init", 0.00000001,0.1),
            "power_t": trial.suggest_float("power_t", 0.00000001,0.1),
            "max_iter": trial.suggest_int("max_iter", 100,100000,step=100),
            "early_stopping":True,
            "n_iter_no_change":trial.suggest_int("n_iter_no_change", 10,1000,step=10),  }
        model = MLPClassifier(**mlp_2l_params)
        return train_model(model)
    return objective

import warnings
warnings.filterwarnings("ignore")

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(MLPClassifier,data_missing_value=missing_value,feature_selection=feature_selection,normalization=True)
        case.find_params_with_optuna(objective_maker_mlp_2l,number_of_trials=1000)
        case.report()

In [None]:
from sklearn.tree import DecisionTreeClassifier
from Case import Case

def objective_maker_decision_tree(train_model):
    def objective(trial):
        decision_tree_params = {
            "criterion":  trial.suggest_categorical("criterion", ["gini", "entropy"]),
            "splitter": trial.suggest_categorical("splitter", ["best", "random"]),
            "max_depth": trial.suggest_int("max_depth", 1,10,step=1),
            "min_samples_split": trial.suggest_float("min_samples_split", 0, 0.8,),
            "min_samples_leaf": trial.suggest_float("min_samples_leaf", 0, 0.5,),
            "min_weight_fraction_leaf": trial.suggest_float("min_weight_fraction_leaf", 0, 0.5,),
            "max_features": trial.suggest_categorical("max_features", ["auto", "sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2,10,step=1),
            "min_impurity_decrease": trial.suggest_float("min_impurity_decrease", 0, 0.5,),
        }
        model = DecisionTreeClassifier(**decision_tree_params)
        return train_model(model)
    return objective

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(DecisionTreeClassifier,data_missing_value=missing_value,feature_selection=feature_selection,normalization=False)
        case.find_params_with_optuna(objective_maker_decision_tree,number_of_trials=1000)
        case.report()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from Case import Case

def objective_maker_random_forest(train_model):
    def objective(trial):
        random_forest_params = {
            "n_estimators":  trial.suggest_int("n_estimators", 10,1000,step=10),
            "criterion":  trial.suggest_categorical("criterion", ["gini", "entropy"]), #log_loss
            "max_depth": trial.suggest_int("max_depth", 1,10,step=1),
            "min_samples_split": trial.suggest_float("min_samples_split", 0, 0.8,),
            "min_samples_leaf": trial.suggest_float("min_samples_leaf", 0, 0.5,),
            "min_weight_fraction_leaf": trial.suggest_float("min_weight_fraction_leaf", 0, 0.5,),
            "max_features": trial.suggest_categorical("max_features", [None, "sqrt", "log2"]),
            "max_leaf_nodes": trial.suggest_int("max_leaf_nodes", 2,10,step=1),
            "min_impurity_decrease": trial.suggest_float("min_impurity_decrease", 0, 0.5,),
            "n_jobs": -1,
        }
        model = RandomForestClassifier(**random_forest_params)
        return train_model(model)
    return objective

for missing_value in ["drop","fill"]:
    for feature_selection in ["all","correlation"]:
        case = Case(RandomForestClassifier,data_missing_value=missing_value,feature_selection=feature_selection,normalization=False)
        case.find_params_with_optuna(objective_maker_random_forest,number_of_trials=1000)
        case.report()