In [1]:
from kaggle_airbnb.config import (adaboost_search_space, catboost_search_space,
                                  logistic_regression_search_space,
                                  random_forest_search_space, xgb_search_space)
from kaggle_airbnb.tune import get_best_model, test, tune_algo


In [2]:
search_space_dict = {
    "logistic_regression": logistic_regression_search_space,
    "random_forest": random_forest_search_space,
    "ada_boost": adaboost_search_space,
    "xgb": xgb_search_space,
    "cat_boost": catboost_search_space
}


def tune(algo_name, num_samples, n_cpus):
    analysis = tune_algo(search_space_dict[algo_name],
                         num_samples=num_samples,
                         n_cpus=n_cpus)

    return analysis


def evaluate(analysis, algo_name):
    best_model = get_best_model(analysis)
    test(best_model, algo_name)


def res_table(analysis):
    res = analysis.dataframe().copy()
    res = res.drop(
        columns=["time_this_iter_s", "done", "timesteps_total", "episodes_total", "training_iteration",
         "experiment_id", "date", "timestamp", "pid", "hostname", "node_ip", "time_since_restore", "iterations_since_restore", 
         "timesteps_since_restore", "trial_id", "config/algo_wrapper_cls", "logdir"])
    return res


## Linear Regression

In [3]:
%%capture
analysis = tune("logistic_regression", num_samples=8, n_cpus=8)


In [4]:
evaluate(analysis, "logistic_regression")


Best model parameters: {'algo_wrapper_cls': <class 'kaggle_airbnb.wrapper.LogisticRegressionWrapper'>, 'n_jobs': 2}
Best model total accuracy: 0.6729


In [5]:
# n_jobs here is due to have to set at least one parameter for tuning. 
res_table(analysis)

Unnamed: 0,acc,time_total_s,config/n_jobs
0,0.660201,1.488078,2
1,0.663545,0.559429,1
2,0.670234,1.507658,2
3,0.66689,3.12512,2
4,0.670234,0.589632,1
5,0.652843,1.524961,2
6,0.654849,0.558117,1
7,0.67291,1.460697,2


## Random Forest

In [6]:
%%capture
analysis = tune("random_forest", num_samples=100, n_cpus=8)


In [7]:
evaluate(analysis, "random_forest")


Best model parameters: {'algo_wrapper_cls': <class 'kaggle_airbnb.wrapper.RandomForestWrapper'>, 'n_estimators': 33, 'criterion': 'entropy', 'max_depth': 14, 'min_samples_split': 4, 'min_samples_leaf': 1, 'bootstrap': True, 'max_features': 'log2', 'oob_score': True, 'n_jobs': 8}
Best model total accuracy: 0.7619


In [8]:
res_table(analysis)

Unnamed: 0,acc,time_total_s,config/bootstrap,config/criterion,config/max_depth,config/max_features,config/min_samples_leaf,config/min_samples_split,config/n_estimators,config/n_jobs,config/oob_score
0,0.703679,2.332913,True,gini,12,log2,17,2,42,8,False
1,0.652174,3.985271,True,gini,2,log2,15,4,32,8,True
2,0.718395,4.483394,True,entropy,9,log2,9,4,91,8,False
3,0.732441,2.812104,True,entropy,9,auto,3,4,38,8,True
4,0.739130,4.039477,True,entropy,12,auto,6,3,74,8,False
...,...,...,...,...,...,...,...,...,...,...,...
95,0.753177,2.396820,True,gini,15,auto,4,4,30,8,True
96,0.631438,0.444834,True,entropy,3,auto,19,2,1,8,False
97,0.701003,0.997260,True,gini,5,auto,13,4,14,8,False
98,0.697659,0.620532,True,gini,12,log2,15,2,4,8,False


## AdaBoost

In [9]:
%%capture
analysis = tune("ada_boost", num_samples=100, n_cpus=8)


In [10]:
evaluate(analysis, "ada_boost")


Best model parameters: {'algo_wrapper_cls': <class 'kaggle_airbnb.wrapper.AdaBoostWrapper'>, 'n_estimators': 139, 'learning_rate': 0.5544985446364133, 'algorithm': 'SAMME.R'}
Best model total accuracy: 0.6990


In [11]:
res_table(analysis)

Unnamed: 0,acc,time_total_s,config/algorithm,config/learning_rate,config/n_estimators
0,0.670903,8.251392,SAMME,0.004345,107
1,0.678261,1.272887,SAMME,0.036438,5
2,0.685619,5.922128,SAMME.R,0.178752,63
3,0.677592,3.196151,SAMME,0.000116,39
4,0.684281,9.302314,SAMME.R,0.102222,100
...,...,...,...,...,...
95,0.670903,11.508797,SAMME.R,0.000063,124
96,0.674916,15.177454,SAMME.R,0.000227,166
97,0.670234,0.950255,SAMME,0.355603,9
98,0.682274,11.081594,SAMME,0.000022,145


## CatBoost

In [12]:
%%capture
analysis = tune("cat_boost", num_samples=100, n_cpus=8)


In [None]:
evaluate(analysis, "cat_boost")


Best model parameters: {'algo_wrapper_cls': <class 'kaggle_airbnb.wrapper.LogisticRegressionWrapper'>, 'n_jobs': 1}
Best model total accuracy: 0.7057


In [None]:
res_table(analysis)

Unnamed: 0,acc,time_total_s,config/n_jobs
0,0.693645,0.095044,1
1,0.69097,0.097532,1
2,0.687625,1.179293,2
3,0.688963,1.178881,2
4,0.705686,0.092283,1
5,0.68495,0.580218,2
6,0.688294,0.093739,1
7,0.692308,0.581227,2


## XGBoost

In [None]:
%%capture
analysis = tune("xgb", num_samples=100, n_cpus=8)


In [None]:
evaluate(analysis, "xgb")


Best model parameters: {'algo_wrapper_cls': <class 'kaggle_airbnb.wrapper.LogisticRegressionWrapper'>, 'n_jobs': 1}
Best model total accuracy: 0.7057


In [None]:
res_table(analysis)

Unnamed: 0,acc,time_total_s,config/n_jobs
0,0.693645,0.095044,1
1,0.69097,0.097532,1
2,0.687625,1.179293,2
3,0.688963,1.178881,2
4,0.705686,0.092283,1
5,0.68495,0.580218,2
6,0.688294,0.093739,1
7,0.692308,0.581227,2
