In [None]:
#Classification
from mlbox.preprocessing import Reader
from mlbox.preprocessing import Drift_thresholder
from mlbox.optimisation import Optimiser
from mlbox.prediction import Predictor

# Paths to the train set and the test set.
paths = ["Data/titanic/train.csv", 'Data/titanic/test.csv']
# Name of the feature to predict.
# This columns should only be present in the train set.
target_name = "Survived"

# Reading and cleaning all files
# Declare a reader for csv files
rd = Reader(sep=',')
# Return a dictionnary containing three entries
# dict["train"] contains training samples withtout target columns
# dict["test"] contains testing elements withtout target columns
# dict["target"] contains target columns for training samples.
data = rd.train_test_split(paths, target_name)

dft = Drift_thresholder()
data = dft.fit_transform(data)

# Tuning
# Declare an optimiser. Scoring possibilities for classification lie in :
# {"accuracy", "roc_auc", "f1", "neg_log_loss", "precision", "recall"}
opt = Optimiser(scoring='accuracy', n_folds=3)
opt.evaluate(None, data)

# Space of hyperparameters
# The keys must respect the following syntax : "enc__param".
#   "enc" = "ne" for na encoder
#   "enc" = "ce" for categorical encoder
#   "enc" = "fs" for feature selector [OPTIONAL]
#   "enc" = "stck"+str(i) to add layer n°i of meta-features [OPTIONAL]
#   "enc" = "est" for the final estimator
#   "param" : a correct associated parameter for each step.
#   Ex: "max_depth" for "enc"="est", ...
# The values must respect the syntax: {"search":strategy,"space":list}
#   "strategy" = "choice" or "uniform". Default = "choice"
#   list : a list of values to be tested if strategy="choice".
#   Else, list = [value_min, value_max].
# Available strategies for ne_numerical_strategy are either an integer, a float
#   or in {'mean', 'median', "most_frequent"}
# Available strategies for ce_strategy are:
#   {"label_encoding", "dummification", "random_projection", entity_embedding"}
space = {'ne__numerical_strategy': {"search": "choice", "space": [0]},
         'ce__strategy': {"search": "choice",
                          "space": ["label_encoding",
                                    "random_projection",
                                    "entity_embedding"]},
         'fs__threshold': {"search": "uniform",
                           "space": [0.01, 0.3]},
         'est__max_depth': {"search": "choice",
                            "space": [3, 4, 5, 6, 7]}

         }

# Optimises hyper-parameters of the whole Pipeline with a given scoring
# function. Algorithm used to optimize : Tree Parzen Estimator.
#
# IMPORTANT : Try to avoid dependent parameters and to set one feature
# selection strategy and one estimator strategy at a time.
best = opt.optimise(space, data, 15)

# Make prediction and save the results in save folder.
prd = Predictor()
prd.fit_predict(best, data)


reading csv : train.csv ...
cleaning data ...
CPU time: 0.2824747562408447 seconds

reading csv : test.csv ...
cleaning data ...
CPU time: 0.0659022331237793 seconds

> Number of common features : 11

gathering and crunching for train and test datasets ...
reindexing for train and test datasets ...
dropping training duplicates ...
dropping constant variables on training set ...

> Number of categorical features: 5
> Number of numerical features: 6
> Number of training samples : 891
> Number of test samples : 418

> Top sparse features (% missing values on train set):
Cabin       77.1
Age         19.9
Embarked     0.2
dtype: float64

> Task : classification
0.0    549
1.0    342
Name: Survived, dtype: int64

encoding target ...

computing drifts ...
CPU time: 0.3208177089691162 seconds

> Top 10 drifts

('PassengerId', 1.0)
('Name', 0.9895998527491288)
('Ticket', 0.677220232385404)
('Cabin', 0.19771080102497018)
('Embarked', 0.07508027318074184)
('Age', 0.058879254044637896)
('Parch', 

  +str(self.to_path)+"/joblib'. Please clear it regularly.")



MEAN SCORE : accuracy = 0.8103254769921436
VARIANCE : 0.03534908136481175 (fold 1 = 0.7609427609427609, fold 2 = 0.8282828282828283, fold 3 = 0.8417508417508418)
CPU time: 0.998481273651123 seconds

##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                                                       
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2022316867360934}                                              
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 3

CPU time: 0.756927490234375 seconds                                                                                    
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                                                       
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.16110942837672357}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg_lambda': 0.0,



CPU time: 0.37947726249694824 seconds


In [1]:
#Regression
import numpy as np

from mlbox.preprocessing import Reader
from mlbox.preprocessing import Drift_thresholder
from mlbox.optimisation import make_scorer
from mlbox.optimisation import Optimiser
from mlbox.prediction import Predictor

# Paths to the train set and the test set.
paths = ["Data/California_house/train.csv", "Data/California_house/test.csv"]
# Name of the feature to predict.
# This columns should only be present in the train set.
target_name = "SalePrice"

# Reading and cleaning all files
# Declare a reader for csv files
rd = Reader(sep=',')
# Return a dictionnary containing three entries
# dict["train"] contains training samples withtout target columns
# dict["test"] contains testing elements withtout target columns
# dict["target"] contains target columns for training samples.
data = rd.train_test_split(paths, target_name)

dft = Drift_thresholder()
data = dft.fit_transform(data)

# Tuning
mape = make_scorer(lambda y_true,
                   y_pred: 100*np.sum(
                                      np.abs(y_true-y_pred)/y_true
                                      )/len(y_true),
                   greater_is_better=False,
                   needs_proba=False)
# Declare an optimiser. You can declare your own score
# as presented here or use one in
# {"neg_mean_absolute_error", "neg_mean_squared_error", "neg_mean_squared_log_error", "neg_median_absolute_error","r2"}
opt = Optimiser(scoring=mape, n_folds=3)
opt.evaluate(None, data)

# Space of hyperparameters
# The keys must respect the following syntax : "enc__param".
#   "enc" = "ne" for na encoder
#   "enc" = "ce" for categorical encoder
#   "enc" = "fs" for feature selector [OPTIONAL]
#   "enc" = "stck"+str(i) to add layer n°i of meta-features [OPTIONAL]
#   "enc" = "est" for the final estimator
#   "param" : a correct associated parameter for each step.
#   Ex: "max_depth" for "enc"="est", ...
# The values must respect the syntax: {"search":strategy,"space":list}
#   "strategy" = "choice" or "uniform". Default = "choice"
#   list : a list of values to be tested if strategy="choice".
#   Else, list = [value_min, value_max].
# Available strategies for ne_numerical_strategy are either an integer, a float
#   or in {'mean', 'median', "most_frequent"}
# Available strategies for ce_strategy are:
#   {"label_encoding", "dummification", "random_projection", entity_embedding"}
space = {
        'ne__numerical_strategy': {"search": "choice",
                                   "space": [0]},
        'ce__strategy': {"search": "choice",
                         "space": ["label_encoding",
                                   "random_projection",
                                   "entity_embedding"]},
        'fs__threshold': {"search": "uniform",
                          "space": [0.01, 0.3]},
        'est__max_depth': {"search": "choice",
                           "space": [3, 4, 5, 6, 7]}

        }

# Optimises hyper-parameters of the whole Pipeline with a given scoring
# function. Algorithm used to optimize : Tree Parzen Estimator.
#
# IMPORTANT : Try to avoid dependent parameters and to set one feature
# selection strategy and one estimator strategy at a time.
best = opt.optimise(space, data, 15)

# Make prediction and save the results in save folder.
prd = Predictor()
prd.fit_predict(best, data)


reading csv : train.csv ...
cleaning data ...
CPU time: 0.24164915084838867 seconds

reading csv : test.csv ...
cleaning data ...
CPU time: 0.22175860404968262 seconds

> Number of common features : 80

gathering and crunching for train and test datasets ...
reindexing for train and test datasets ...
dropping training duplicates ...
dropping constant variables on training set ...

> Number of categorical features: 43
> Number of numerical features: 37
> Number of training samples : 1460
> Number of test samples : 1459

> Top sparse features (% missing values on train set):
PoolQC         99.5
MiscFeature    96.3
Alley          93.8
Fence          80.8
FireplaceQu    47.3
dtype: float64

> Task : regression
count      1460.000000
mean     180921.195890
std       79442.502883
min       34900.000000
25%      129975.000000
50%      163000.000000
75%      214000.000000
max      755000.000000
Name: SalePrice, dtype: float64

computing drifts ...
CPU time: 1.7929043769836426 seconds

> Top 1

  +str(self.to_path)+"/joblib'. Please clear it regularly.")



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.724280495125964
VARIANCE : 0.8209723498000103 (fold 1 = -10.46930111453782, fold 2 = -10.122939172213853, fold 3 = -8.580601198626216)
CPU time: 2.705786943435669 seconds

##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.07665732192813544}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimator

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.500906967102333                                       
VARIANCE : 0.820487677489814 (fold 1 = -10.14092270764549, fold 2 = -10.019102492351015, fold 3 = -8.342695701310499)  
CPU time: 1.3862693309783936 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.1384936232039936}                                              
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.59557766685531                                        
VARIANCE : 0.8549508727528865 (fold 1 = -10.357255458466955, fold 2 = -10.027940282703016, fold 3 = -8.401537259395955)
CPU time: 1.3585312366485596 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'random_projection'}                                                                      
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.28321256463502886}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.766132260124303                                       
VARIANCE : 0.6519159424487638 (fold 1 = -10.39413352524559, fold 2 = -10.036684792196992, fold 3 = -8.867578462930334) 
CPU time: 1.6405017375946045 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                                                       
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.17181211739505153}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.158118604723201                                       
VARIANCE : 0.681224002220464 (fold 1 = -9.850548518918522, fold 2 = -9.391994004593741, fold 3 = -8.231813290657339)   
CPU time: 41.40543794631958 seconds                                                                                    
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.19955442673455875}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.846559725012701                                       
VARIANCE : 0.5908161480901375 (fold 1 = -10.323433830694714, fold 2 = -10.202293436147498, fold 3 = -9.013951908195896)
CPU time: 1.7802155017852783 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.1922256014146186}                                              
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.558240477133085                                       
VARIANCE : 0.6831917082479363 (fold 1 = -10.005089381833606, fold 2 = -10.076685897202548, fold 3 = -8.5929461523631)  
CPU time: 1.2222223281860352 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.1160479240762673}                                              
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.679822696005107                                       
VARIANCE : 0.7780084360690767 (fold 1 = -10.463597528771487, fold 2 = -9.956679213077392, fold 3 = -8.619191346166442) 
CPU time: 1.6634588241577148 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                                                       
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.20597973748165221}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.4368413493484                                         
VARIANCE : 0.6840414640714645 (fold 1 = -10.02058761539212, fold 2 = -9.813024583344742, fold 3 = -8.476911849308337)  
CPU time: 3.7829365730285645 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'random_projection'}                                                                      
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.184521373018789}                                               
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.741649478514903                                       
VARIANCE : 0.9000896989279707 (fold 1 = -10.367372720925644, fold 2 = -10.388785262048012, fold 3 = -8.468790452571058)
CPU time: 1.9313654899597168 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.0667406404177658}                                              
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.553105563294004                                       
VARIANCE : 0.671755554577625 (fold 1 = -10.0635098940417, fold 2 = -9.991804581104288, fold 3 = -8.604002214736026)    
CPU time: 1.5439021587371826 seconds                                                                                   
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'random_projection'}                                                                      
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.03440789949234985}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.16678475900378                                        
VARIANCE : 0.8417502919612839 (fold 1 = -10.04577078627961, fold 2 = -9.42252767935932, fold 3 = -8.03205581137241)    
CPU time: 1.559462308883667 seconds                                                                                    
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'label_encoding'}                                                                         
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.07466803465283108}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.500906967102333                                       
VARIANCE : 0.820487677489814 (fold 1 = -10.14092270764549, fold 2 = -10.019102492351015, fold 3 = -8.342695701310499)  
CPU time: 1.385406494140625 seconds                                                                                    
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                                                       
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.1240357299059106}                                              
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.201176062295884                                       
VARIANCE : 0.7573506673051876 (fold 1 = -10.009739507096562, fold 2 = -9.40520258141498, fold 3 = -8.188586098376112)  
CPU time: 2.887395143508911 seconds                                                                                    
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                                                       
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.09860865116361203}                                             
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.37965701406232                                        
VARIANCE : 0.6593278966150706 (fold 1 = -9.859159129005144, fold 2 = -9.832457907137794, fold 3 = -8.44735400604402)   
CPU time: 3.208124876022339 seconds                                                                                    
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}                                            
>>> CA ENCODER :{'strategy': 'random_projection'}                                                                      
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.051085893936701855}                                            
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type

  positive)

  positive)

  positive)



MEAN SCORE : make_scorer(<lambda>, greater_is_better=False) = -9.411401136916899                                       
VARIANCE : 0.8146007675201752 (fold 1 = -10.143070040141323, fold 2 = -9.816187913735012, fold 3 = -8.274945456874361) 
CPU time: 2.1876697540283203 seconds                                                                                   
100%|█████████████████████████████████████████████████| 15/15 [01:09<00:00,  4.61s/trial, best loss: 9.158118604723201]


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEST HYPER-PARAMETERS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{'ce__strategy': 'entity_embedding', 'est__max_depth': 3, 'fs__threshold': 0.17181211739505153, 'ne__nu

  positive)


CPU time: 13.644017696380615 seconds

> Feature importances dumped into directory : save

predicting...
CPU time: 0.382554292678833 seconds

> Overview on predictions : 

   SalePrice_predicted
0        125630.713123
1        167386.971644
2        189774.268843
3        192955.774974
4        187209.107133
5        177214.158715
6        173414.947740
7        167214.841382
8        189884.182751
9        127975.191708

dumping predictions into directory : save ...


<mlbox.prediction.predictor.Predictor at 0x1d66b9b4668>

E0505 16:04:04.749013 15876 ioloop.py:801] Exception in callback functools.partial(<function wrap.<locals>.null_wrapper at 0x000001D678CB3950>)
Traceback (most recent call last):
  File "C:\Users\tttra\Anaconda3\envs\tenserflow\lib\site-packages\tornado\ioloop.py", line 758, in _run_callback
    ret = callback()
  File "C:\Users\tttra\Anaconda3\envs\tenserflow\lib\site-packages\tornado\stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\tttra\Anaconda3\envs\tenserflow\lib\site-packages\ipykernel\kernelbase.py", line 306, in advance_eventloop
    eventloop(self)
  File "C:\Users\tttra\Anaconda3\envs\tenserflow\lib\site-packages\ipykernel\eventloops.py", line 238, in loop_tk
    app.tk.createfilehandler(stream.getsockopt(zmq.FD), READABLE, notifier)
AttributeError: '_tkinter.tkapp' object has no attribute 'createfilehandler'
