In [0]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
##!pip install automl-alex

In [1]:
import pandas as pd
import sklearn
import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier

In [2]:
print(automl_alex.__version__)

0.07.03


In [3]:
RANDOM_SEED = 42

# Classifier

## Data

In [4]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [5]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [6]:
model = ModelsReview(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [7]:
# let's see what the results are for all available models with default settings
review = model.fit()
review

0%|          | 0/10 [00:00<?, ?it/s]
 Mean Score roc_auc_score on 10 Folds: 0.7466 std: 0.041375
 10%|█         | 1/10 [00:01<00:15,  1.67s/it]
 Mean Score roc_auc_score on 10 Folds: 0.5878 std: 0.050595
 20%|██        | 2/10 [00:02<00:12,  1.53s/it]
 Mean Score roc_auc_score on 10 Folds: 0.5137 std: 0.01306
 30%|███       | 3/10 [00:03<00:08,  1.24s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7819 std: 0.053523
 40%|████      | 4/10 [00:03<00:05,  1.01it/s]
 Mean Score roc_auc_score on 10 Folds: 0.4979 std: 0.02354
 50%|█████     | 5/10 [00:03<00:03,  1.38it/s]
 Mean Score roc_auc_score on 10 Folds: 0.7832 std: 0.058924
 60%|██████    | 6/10 [00:08<00:07,  1.90s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7546 std: 0.054554
 70%|███████   | 7/10 [00:12<00:07,  2.43s/it]
 Mean Score roc_auc_score on 10 Folds: 0.6463 std: 0.0539
 80%|████████  | 8/10 [00:13<00:04,  2.07s/it]
 Mean Score roc_auc_score on 10 Folds: 0.7948 std: 0.063596
 90%|█████████ | 9/10 [00:16<00:02,  2.38s/it]
 Mea

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,cv_folds
0,0.705225,0.7466,0.041375,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...",{'early_stopping': False},OneHotEncoder,10
1,0.537205,0.5878,0.050595,KNeighbors,{'n_jobs': -1},{},OneHotEncoder,10
2,0.50064,0.5137,0.01306,LinearSVM,"{'verbose': 0, 'random_state': 42}",{},OneHotEncoder,10
3,0.728377,0.7819,0.053523,LinearModel,{'verbose': 0},{},OneHotEncoder,10
4,0.47436,0.4979,0.02354,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...",{},OneHotEncoder,10
5,0.724276,0.7832,0.058924,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{},OneHotEncoder,10
6,0.700046,0.7546,0.054554,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{},OneHotEncoder,10
7,0.5924,0.6463,0.0539,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 100,...",{'early_stopping': False},OneHotEncoder,10
8,0.731204,0.7948,0.063596,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},OneHotEncoder,10
9,0.383913,0.5063,0.122387,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...",{},OneHotEncoder,10


In [8]:
# let's optimize all the models and see what the result will be (it takes a lot of time)
review = model.opt(timeout=1000, verbose=1)

0%|          | 0/10 [00:00<?, ?it/s]
LightGBM Best Score:0.782
 10%|█         | 1/10 [01:04<09:36, 64.05s/it]
KNeighbors Best Score:0.5764
 20%|██        | 2/10 [02:29<09:24, 70.54s/it]
LinearSVM Best Score:0.6656
 30%|███       | 3/10 [02:51<06:30, 55.78s/it]
LinearModel Best Score:0.7817
 40%|████      | 4/10 [05:22<08:26, 84.45s/it]
SGD Best Score:0.6156
 50%|█████     | 5/10 [08:23<09:27, 113.48s/it]
RandomForest Best Score:0.8311
 60%|██████    | 6/10 [09:30<06:37, 99.40s/it]
ExtraTrees Best Score:0.7957
 70%|███████   | 7/10 [13:39<07:12, 144.26s/it]
XGBoost Best Score:0.7039
 80%|████████  | 8/10 [18:00<05:58, 179.40s/it]
CatBoost Best Score:0.8106
 90%|█████████ | 9/10 [21:53<03:15, 195.59s/it]
MLP Best Score:0.6048
100%|██████████| 10/10 [24:13<00:00, 145.33s/it]


In [9]:
model.top1_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,cv_folds
0,0.782,0.782,0.0,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...",{'early_stopping': False},OneHotEncoder,5
1,0.539,0.5764,0.037354,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 43, 'weights': '...",{},OneHotEncoder,5
2,0.6001,0.6656,0.065494,LinearSVM,"{'verbose': 0, 'random_state': 42, 'tol': 0.00...",{},OneHotEncoder,10
3,0.7305,0.7817,0.051193,LinearModel,"{'verbose': 0, 'fit_intercept': True, 'C': 0.0...",{},OneHotEncoder,10
4,0.5452,0.6156,0.070448,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...",{},OneHotEncoder,10
5,0.8311,0.8311,0.0,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},OneHotEncoder,5
6,0.7433,0.7957,0.052381,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},OneHotEncoder,5
7,0.6106,0.7039,0.093301,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 0, '...",{'early_stopping': False},OneHotEncoder,10
8,0.7649,0.8106,0.045714,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},OneHotEncoder,5
9,0.5114,0.6048,0.093434,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...",{},OneHotEncoder,10
