In [0]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
##!pip install automl-alex

In [1]:
import pandas as pd
import sklearn
import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier

In [2]:
print(automl_alex.__version__)

0.07.17


In [3]:
RANDOM_SEED = 42

# Classifier

## Data

In [4]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [5]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [6]:
model = ModelsReview(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [7]:
# let's see what the results are for all available models with default settings
review = model.fit()
review

10%|█         | 1/10 [00:03<00:31,  3.50s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7513 std: 0.050257
 20%|██        | 2/10 [00:03<00:20,  2.56s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6947 std: 0.069592
 30%|███       | 3/10 [00:06<00:18,  2.65s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6663 std: 0.038932
 40%|████      | 4/10 [00:07<00:12,  2.14s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7541 std: 0.050257
 50%|█████     | 5/10 [00:08<00:08,  1.66s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6055 std: 0.046167
 60%|██████    | 6/10 [00:12<00:09,  2.31s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7785 std: 0.048161
 70%|███████   | 7/10 [00:14<00:07,  2.44s/it]
 Mean Score roc_auc_score on 10 Folds: 0.763 std: 0.051063
 80%|████████  | 8/10 [00:18<00:05,  2.88s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6648 std: 0.052364
 90%|█████████ | 9/10 [00:23<00:03,  3.59s/it]
 Mean Score roc_auc_score on 10 Folds: 0.8063 std: 0.044474
100%|██████████| 10/10 [00:27<00:00,  2

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,columns,cv_folds
0,0.701043,0.7513,0.050257,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
1,0.625108,0.6947,0.069592,KNeighbors,{'n_jobs': -1},{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
2,0.627368,0.6663,0.038932,LinearSVM,"{'verbose': 0, 'random_state': 42}",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
3,0.703843,0.7541,0.050257,LinearModel,{'verbose': 0},{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
4,0.559333,0.6055,0.046167,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
5,0.730339,0.7785,0.048161,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
6,0.711937,0.763,0.051063,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
7,0.612436,0.6648,0.052364,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 100,...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
8,0.761826,0.8063,0.044474,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
9,0.690393,0.7497,0.059307,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10


In [8]:
# let's optimize all the models and see what the result will be (it takes a lot of time)
review = model.opt(timeout=1000, verbose=1)

10%|█         | 1/10 [01:43<15:34, 103.83s/it]
 LightGBM  Best Score:  0.7907
 20%|██        | 2/10 [02:31<11:36, 87.05s/it]
 KNeighbors  Best Score:  0.8105
 30%|███       | 3/10 [04:18<10:51, 93.07s/it]
 LinearSVM  Best Score:  0.7025
 40%|████      | 4/10 [06:39<10:44, 107.34s/it]
 LinearModel  Best Score:  0.7527
 50%|█████     | 5/10 [07:32<07:34, 90.96s/it]
 SGD  Best Score:  0.7279
 60%|██████    | 6/10 [11:16<08:43, 130.92s/it]
 RandomForest  Best Score:  0.8036
 70%|███████   | 7/10 [15:40<08:32, 170.84s/it]
 ExtraTrees  Best Score:  0.8035
 80%|████████  | 8/10 [21:06<07:14, 217.34s/it]
 XGBoost  Best Score:  0.7136
 90%|█████████ | 9/10 [26:58<04:17, 257.74s/it]
 CatBoost  Best Score:  0.8041
100%|██████████| 10/10 [31:20<00:00, 188.03s/it]
 MLP  Best Score:  0.7854



In [9]:
model.top1_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoders,columns,cv_folds
0,0.7502,0.7907,0.040533,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, OneHotEncoder_p...",10
1,0.7629,0.8105,0.047637,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 29, 'weights': '...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, num_dependents, OneH...",10
2,0.6894,0.7025,0.013116,LinearSVM,"{'verbose': 0, 'random_state': 42, 'tol': 0.00...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[age, num_dependents, OneHotEncoder_property_m...",10
3,0.7058,0.7527,0.046933,LinearModel,"{'verbose': 0, 'fit_intercept': False, 'C': 88...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[age, OneHotEncoder_property_magnitude_4, OneH...",10
4,0.6712,0.7279,0.056703,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[age, OneHotEncoder_property_magnitude_1, OneH...",10
5,0.754,0.8036,0.049615,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, age, num_dependents, OneHotEncoder_...",10
6,0.7576,0.8035,0.045896,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, age, num_dependents, OneHotEncoder_...",10
7,0.6724,0.7136,0.041203,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 0, '...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, age, OneHotEncoder_property_magnitu...",10
8,0.7932,0.8041,0.010855,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, num_dependents, OneHotEncoder_prope...",10
9,0.7402,0.7854,0.045211,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[age, OneHotEncoder_property_magnitude_1, OneH...",10
