In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install -U -q automl-alex

In [2]:
import pandas as pd
import sklearn
import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier

In [3]:
print(automl_alex.__version__)

0.11.24


In [4]:
RANDOM_SEED = 42

# Classifier

## Data

In [5]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [6]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [7]:
model = ModelsReview(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [8]:
# let's see what the results are for all available models with default settings
review = model.fit()

 10%|█         | 1/10 [00:28<04:14, 28.31s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7503 std: 0.04765
 20%|██        | 2/10 [00:28<02:39, 19.93s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6936 std: 0.078554
 30%|███       | 3/10 [00:33<01:47, 15.42s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6678 std: 0.037481
 40%|████      | 4/10 [00:35<01:08, 11.36s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7592 std: 0.047259
 50%|█████     | 5/10 [00:36<00:40,  8.12s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6429 std: 0.050253
 60%|██████    | 6/10 [00:39<00:26,  6.60s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7661 std: 0.053527
 70%|███████   | 7/10 [00:41<00:15,  5.32s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7595 std: 0.054004
 80%|████████  | 8/10 [00:55<00:15,  7.85s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6543 std: 0.04842
 90%|█████████ | 9/10 [01:02<00:07,  7.73s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7974 std: 0.048408
100%|██████████| 10/10 [01:08<00:00,  6

In [9]:
review

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,columns,cv_folds
0,0.70265,0.7503,0.04765,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
1,0.615046,0.6936,0.078554,KNeighbors,{'n_jobs': -1},{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
2,0.630319,0.6678,0.037481,LinearSVM,"{'verbose': 0, 'random_state': 42}",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
3,0.711941,0.7592,0.047259,LinearModel,{},{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
4,0.592647,0.6429,0.050253,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
5,0.712573,0.7661,0.053527,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
6,0.705496,0.7595,0.054004,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
7,0.60588,0.6543,0.04842,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 100,...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
8,0.748992,0.7974,0.048408,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10
9,0.700609,0.7603,0.059691,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, num_dependents,...",10


In [10]:
# let's optimize all the models and see what the result will be (it takes a lot of time)
review = model.opt(timeout=1000, verbose=1)

  0%|          | 0/10 [00:00<?, ?it/s][32m[I 2020-11-23 12:43:33,722][0m A new study created in memory with name: no-name-1c94f97d-f258-44c9-83ab-03080766a5da[0m
 10%|█         | 1/10 [01:47<16:11, 107.94s/it]
 LightGBM  Best Score:  0.7644
 20%|██        | 2/10 [02:41<12:12, 91.62s/it] 
 KNeighbors  Best Score:  0.8276
 30%|███       | 3/10 [04:57<12:14, 104.90s/it]
 LinearSVM  Best Score:  0.7076
 40%|████      | 4/10 [07:41<12:16, 122.68s/it]
 LinearModel  Best Score:  0.761
 50%|█████     | 5/10 [10:09<10:50, 130.18s/it]
 SGD  Best Score:  0.735
 60%|██████    | 6/10 [14:02<10:44, 161.04s/it]
 RandomForest  Best Score:  0.7945
 70%|███████   | 7/10 [18:44<09:51, 197.27s/it]
 ExtraTrees  Best Score:  0.7902
 80%|████████  | 8/10 [24:28<08:02, 241.46s/it]
 XGBoost  Best Score:  0.6848
 90%|█████████ | 9/10 [31:12<04:50, 290.13s/it]
 CatBoost  Best Score:  0.8184
100%|██████████| 10/10 [39:12<00:00, 235.29s/it]
 MLP  Best Score:  0.7674



In [11]:
model.top1_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoders,columns,cv_folds
0,0.7261,0.7644,0.038315,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(credit_amount, OneHotEncoder_savings_status_1...",10
1,0.7968,0.8276,0.03076,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 46, 'weights': '...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, OneHotEncoder_savings_status_4, One...",10
2,0.6843,0.7076,0.02332,LinearSVM,"{'verbose': 0, 'random_state': 42, 'tol': 0.00...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, credit_amount, num_dependents, OneH...",10
3,0.7368,0.761,0.024152,LinearModel,"{'fit_intercept': True, 'C': 57.59620969341546...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(OneHotEncoder_savings_status_2, OneHotEncoder...",10
4,0.7208,0.735,0.014166,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(num_dependents, OneHotEncoder_savings_status_...",10
5,0.7535,0.7945,0.041027,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
6,0.7508,0.7902,0.039376,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(credit_amount, age, OneHotEncoder_savings_sta...",10
7,0.6488,0.6848,0.035964,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 0, '...",{'early_stopping': False},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, credit_amount, age, OneHotEncoder_s...",10
8,0.7864,0.8184,0.032003,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, credit_amount, OneHotEncoder_saving...",10
9,0.7567,0.7674,0.010686,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(credit_amount, OneHotEncoder_savings_status_3...",10
