In [1]:
#Classification
from mlbox.preprocessing import Reader
from mlbox.preprocessing import Drift_thresholder
from mlbox.optimisation import Optimiser
from mlbox.prediction import Predictor
import csv 


# Paths to the train set and the test set.
paths = ["../../Data/titanic/processed/train.csv","../../Data/titanic/processed/x_test.csv"]
# Name of the feature to predict.
# This columns should only be present in the train set.
target_name = "Survived"

# Reading and cleaning all files
# Declare a reader for csv files
rd = Reader(sep=',')
# Return a dictionnary containing three entries
# dict["train"] contains training samples withtout target columns
# dict["test"] contains testing elements withtout target columns
# dict["target"] contains target columns for training samples.
data = rd.train_test_split(paths, target_name)

dft = Drift_thresholder()
data = dft.fit_transform(data)

# Tuning
# Declare an optimiser. Scoring possibilities for classification lie in :
# {"accuracy", "roc_auc", "f1", "neg_log_loss", "precision", "recall"}
opt = Optimiser(scoring='accuracy', n_folds=3)
opt.evaluate(None, data)

# Space of hyperparameters
# The keys must respect the following syntax : "enc__param".
#   "enc" = "ne" for na encoder
#   "enc" = "ce" for categorical encoder
#   "enc" = "fs" for feature selector [OPTIONAL]
#   "enc" = "stck"+str(i) to add layer n°i of meta-features [OPTIONAL]
#   "enc" = "est" for the final estimator
#   "param" : a correct associated parameter for each step.
#   Ex: "max_depth" for "enc"="est", ...
# The values must respect the syntax: {"search":strategy,"space":list}
#   "strategy" = "choice" or "uniform". Default = "choice"
#   list : a list of values to be tested if strategy="choice".
#   Else, list = [value_min, value_max].
# Available strategies for ne_numerical_strategy are either an integer, a float
#   or in {'mean', 'median', "most_frequent"}
# Available strategies for ce_strategy are:
#   {"label_encoding", "dummification", "random_projection", entity_embedding"}
space = {'ne__numerical_strategy': {"search": "choice", "space": [0]},
         'ce__strategy': {"search": "choice",
                          "space": ["label_encoding",
                                    "random_projection",
                                    "entity_embedding"]},
         'fs__threshold': {"search": "uniform",
                           "space": [0.01, 0.3]},
         'est__max_depth': {"search": "choice",
                            "space": [3, 4, 5, 6, 7]}

         }


#



reading csv : train.csv ...
cleaning data ...
CPU time: 0.14305758476257324 seconds

reading csv : x_test.csv ...
cleaning data ...
CPU time: 0.05038189888000488 seconds

> Number of common features : 17

gathering and crunching for train and test datasets ...
reindexing for train and test datasets ...
dropping training duplicates ...
dropping constant variables on training set ...

> Number of categorical features: 0
> Number of numerical features: 17
> Number of training samples : 596
> Number of test samples : 295

> You have no missing values on train set...

> Task : classification
0.0    355
1.0    241
Name: Survived, dtype: int64

encoding target ...

computing drifts ...
CPU time: 0.4463634490966797 seconds

> Top 10 drifts

('Age', 0.08351279474242057)
('SibSp', 0.05880442146022191)
('Fare', 0.057527600192988526)
('Sex_male', 0.05660923901575865)
('Title_Mr', 0.04619353636611545)
('PassengerId', 0.045880809243704856)
('Title_Master', 0.03666179876630493)
('Embarked_C', 0.0271

  +str(self.to_path)+"/joblib'. Please clear it regularly.")



MEAN SCORE : accuracy = 0.7852139485305315
VARIANCE : 0.020466045505150618 (fold 1 = 0.8140703517587939, fold 2 = 0.7688442211055276, fold 3 = 0.7727272727272727)
CPU time: 1.145263910293579 seconds



In [2]:
# Optimises hyper-parameters of the whole Pipeline with a given scoring
# function. Algorithm used to optimize : Tree Parzen Estimator.
best = opt.optimise(space,data,40)
print("Final results : " ,opt.evaluate(best, data))

##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}
>>> CA ENCODER :{'strategy': 'entity_embedding'}      
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2634696989628652}
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'silent': True, 'subsample': 0.9, 'subsample_for_bin': 200000, 'subsample_freq': 0, 'nthread': -1, 'seed': 0}
  0%|          | 0/40 [00:00<?, ?trial/s, best loss=?]




MEAN SCORE : accuracy = 0.7868551511767592            
VARIANCE : 0.047673159561935845 (fold 1 = 0.8542713567839196, fold 2 = 0.7537688442211056, fold 3 = 0.7525252525252525)
CPU time: 1.8632047176361084 seconds                  
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'label_encoding'}                                   
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.23419186577138557}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg_lambda': 0.0, 'si




MEAN SCORE : accuracy = 0.8086476151802785
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.8350939750671387 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'label_encoding'}                                   
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2649965268974138}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg_lam




MEAN SCORE : accuracy = 0.7835135272321202                                       
VARIANCE : 0.03582359182955599 (fold 1 = 0.8341708542713567, fold 2 = 0.7587939698492462, fold 3 = 0.7575757575757576)
CPU time: 1.0156376361846924 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'label_encoding'}                                   
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.08186461819097045}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_s




MEAN SCORE : accuracy = 0.7751383178518857                                       
VARIANCE : 0.027572940786441902 (fold 1 = 0.8140703517587939, fold 2 = 0.7537688442211056, fold 3 = 0.7575757575757576)
CPU time: 1.2702183723449707 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'random_projection'}                                
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.13020289101493723}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_




MEAN SCORE : accuracy = 0.7751383178518857                                       
VARIANCE : 0.027572940786441902 (fold 1 = 0.8140703517587939, fold 2 = 0.7537688442211056, fold 3 = 0.7575757575757576)
CPU time: 1.3422694206237793 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'random_projection'}                                
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.09632553721028365}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_




MEAN SCORE : accuracy = 0.8086476151802785                                       
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.8913254737854004 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'random_projection'}                                
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2675048530440292}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_s




MEAN SCORE : accuracy = 0.7835304468470298                                       
VARIANCE : 0.03275080694905349 (fold 1 = 0.8291457286432161, fold 2 = 0.7537688442211056, fold 3 = 0.7676767676767676)
CPU time: 1.2664132118225098 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'random_projection'}                                
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.09967492440005199}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_s




MEAN SCORE : accuracy = 0.8086476151802785                                       
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.8189256191253662 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'label_encoding'}                                   
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.20737813344518016}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_s




MEAN SCORE : accuracy = 0.7835304468470298                                       
VARIANCE : 0.03275080694905349 (fold 1 = 0.8291457286432161, fold 2 = 0.7537688442211056, fold 3 = 0.7676767676767676)
CPU time: 1.2562165260314941 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}      
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                 
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.03384116980850445}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_s




MEAN SCORE : accuracy = 0.7868551511767592                                       
VARIANCE : 0.047673159561935845 (fold 1 = 0.8542713567839196, fold 2 = 0.7537688442211056, fold 3 = 0.7525252525252525)
CPU time: 1.7143354415893555 seconds                                             
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.019309171379853912}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'rand




MEAN SCORE : accuracy = 0.8086476151802785
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.799170732498169 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.26225828827273096}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg




MEAN SCORE : accuracy = 0.7835304468470298                                        
VARIANCE : 0.03275080694905349 (fold 1 = 0.8291457286432161, fold 2 = 0.7537688442211056, fold 3 = 0.7676767676767676)
CPU time: 1.6052095890045166 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2494958717951565}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7868551511767592                                        
VARIANCE : 0.047673159561935845 (fold 1 = 0.8542713567839196, fold 2 = 0.7537688442211056, fold 3 = 0.7525252525252525)
CPU time: 1.429107666015625 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.0630208461318841}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.7835135272321202                                        
VARIANCE : 0.03582359182955599 (fold 1 = 0.8341708542713567, fold 2 = 0.7587939698492462, fold 3 = 0.7575757575757576)
CPU time: 1.1675705909729004 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.26119107524159674}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7751383178518857                                        
VARIANCE : 0.027572940786441902 (fold 1 = 0.8140703517587939, fold 2 = 0.7537688442211056, fold 3 = 0.7575757575757576)
CPU time: 1.4048516750335693 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'random_projection'}                                 
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.025231240582734786}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.7868551511767592                                        
VARIANCE : 0.047673159561935845 (fold 1 = 0.8542713567839196, fold 2 = 0.7537688442211056, fold 3 = 0.7525252525252525)
CPU time: 0.9900834560394287 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.17214876191556502}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.7835304468470298                                        
VARIANCE : 0.03275080694905349 (fold 1 = 0.8291457286432161, fold 2 = 0.7537688442211056, fold 3 = 0.7676767676767676)
CPU time: 1.2119967937469482 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'random_projection'}                                 
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2999682814499579}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7835135272321202                                        
VARIANCE : 0.03582359182955599 (fold 1 = 0.8341708542713567, fold 2 = 0.7587939698492462, fold 3 = 0.7575757575757576)
CPU time: 1.1336381435394287 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.09877022475374662}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 6, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7835304468470298                                        
VARIANCE : 0.03275080694905349 (fold 1 = 0.8291457286432161, fold 2 = 0.7537688442211056, fold 3 = 0.7676767676767676)
CPU time: 1.1559176445007324 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2525628106306941}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7751383178518857                                        
VARIANCE : 0.027572940786441902 (fold 1 = 0.8140703517587939, fold 2 = 0.7537688442211056, fold 3 = 0.7575757575757576)
CPU time: 1.3218951225280762 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.195305112953771}          
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.0931386947631836 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'random_projection'}                                 
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.010363946352361032}       
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.8693196773529053 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.18071356204936015}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.9469726085662842 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.21870440749185832}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.937861442565918 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.22443571751879623}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.7981162071228027 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.29603788656479274}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.9136815071105957 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.2973589103876797}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.854882001876831 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.28959508866826644}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'random_state': None, 'reg_alpha': 0.0, 'reg




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.529015064239502 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.1396409502681157}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.2107043266296387 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.14303433748796257}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.3029811382293701 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.15260503140601106}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7835135272321202                                        
VARIANCE : 0.03582359182955599 (fold 1 = 0.8341708542713567, fold 2 = 0.7587939698492462, fold 3 = 0.7575757575757576)
CPU time: 1.5359423160552979 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.12583726831581166}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7868551511767592                                        
VARIANCE : 0.047673159561935845 (fold 1 = 0.8542713567839196, fold 2 = 0.7537688442211056, fold 3 = 0.7525252525252525)
CPU time: 1.541126012802124 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'entity_embedding'}                                  
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.23040968494244957}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 5, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.7835135272321202                                        
VARIANCE : 0.03582359182955599 (fold 1 = 0.8341708542713567, fold 2 = 0.7587939698492462, fold 3 = 0.7575757575757576)
CPU time: 1.460597038269043 seconds                                               
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.19469461238573463}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7751383178518857                                        
VARIANCE : 0.027572940786441902 (fold 1 = 0.8140703517587939, fold 2 = 0.7537688442211056, fold 3 = 0.7575757575757576)
CPU time: 1.3950221538543701 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.1763600986589709}         
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.1406207084655762 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.11232803677850924}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.0134472846984863 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'random_projection'}                                 
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.05017519897540412}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 1.0309255123138428 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'random_projection'}                                 
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.05298954402056667}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 7, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7751383178518857                                        
VARIANCE : 0.027572940786441902 (fold 1 = 0.8140703517587939, fold 2 = 0.7537688442211056, fold 3 = 0.7575757575757576)
CPU time: 1.3507359027862549 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.08182506160892669}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 3, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ra




MEAN SCORE : accuracy = 0.8086476151802785                                        
VARIANCE : 0.03960086924490679 (fold 1 = 0.8592964824120602, fold 2 = 0.8040201005025126, fold 3 = 0.7626262626262627)
CPU time: 0.7881207466125488 seconds                                              
##################################################### testing hyper-parameters... #####################################################
>>> NA ENCODER :{'numerical_strategy': 0, 'categorical_strategy': '<NULL>'}       
>>> CA ENCODER :{'strategy': 'label_encoding'}                                    
>>> FEATURE SELECTOR :{'strategy': 'l1', 'threshold': 0.07478772329720093}        
>>> ESTIMATOR :{'strategy': 'LightGBM', 'max_depth': 4, 'boosting_type': 'gbdt', 'class_weight': None, 'colsample_bytree': 0.8, 'importance_type': 'split', 'learning_rate': 0.05, 'min_child_samples': 20, 'min_child_weight': 0.001, 'min_split_gain': 0.0, 'n_estimators': 500, 'n_jobs': -1, 'num_leaves': 31, 'objective': None, 'ran




MEAN SCORE : accuracy = 0.7868551511767592                                        
VARIANCE : 0.047673159561935845 (fold 1 = 0.8542713567839196, fold 2 = 0.7537688442211056, fold 3 = 0.7525252525252525)
CPU time: 0.9580309391021729 seconds                                              
100%|██████████| 40/40 [00:49<00:00,  1.23s/trial, best loss: -0.8086476151802785]


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BEST HYPER-PARAMETERS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

{'ce__strategy': 'label_encoding', 'est__max_depth': 3, 'fs__threshold': 0.23419186577138557, 'ne__numerical_strategy': 0}

##################################################### testing hyper-parameters... ########



In [3]:
from mlbox.prediction import *
pred=Predictor()
pred.fit_predict(best,data)


fitting the pipeline ...
CPU time: 0.26691675186157227 seconds

> Feature importances dumped into directory : save

predicting ...
CPU time: 0.016614913940429688 seconds

> Overview on predictions : 

        0.0       1.0  Survived_predicted
0  0.623678  0.376322                   0
1  0.014289  0.985711                   1
2  0.977290  0.022710                   0
3  0.619419  0.380581                   0
4  0.163809  0.836191                   1
5  0.979954  0.020046                   0
6  0.191114  0.808886                   1
7  0.934425  0.065575                   0
8  0.886010  0.113990                   0
9  0.904934  0.095066                   0

dumping predictions into directory : save ...


<mlbox.prediction.predictor.Predictor at 0x7fc6d7f39d90>

In [5]:
predictions = pd.read_csv("save/Survived_predictions.csv")
predictions = predictions.Survived_predicted
y_test = pd.read_csv("../../Data/titanic/processed/y_test.csv")

In [6]:
from sklearn.metrics import roc_curve,auc
import matplotlib.pyplot as plt
fpr, tpr, _ = roc_curve(y_test, predictions)
roc_auc = auc(fpr, tpr)

plt.figure()
lw = 2
plt.plot(fpr, tpr, color='darkorange',
         lw=lw, label='ROC curve (area = %0.2f)' % roc_auc)
plt.plot([0, 1], [0, 1], color='navy', lw=lw, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('Receiver operating characteristic example')
plt.legend(loc="lower right")
plt.show()
plt.savefig("ML_Box_Titanic_ROC.pdf")