In [0]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
##!pip install automl-alex

In [1]:
import pandas as pd
import sklearn
import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier

In [2]:
print(automl_alex.__version__)

0.05.15


In [3]:
RANDOM_SEED = 42

# Classifier

## Data

In [4]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [5]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [6]:
model = ModelsReview(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [7]:
# let's see what the results are for all available models with default settings
review = model.fit()
review

100%|██████████| 10/10 [00:53<00:00,  5.35s/it]


Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,target_encoder
0,0.717053,0.7563,0.039247,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...","{'need_norm_data': False, 'early_stopping': Fa...",HelmertEncoder,JamesSteinEncoder
1,0.628857,0.7026,0.073743,KNeighbors,{'n_jobs': -1},"{'need_norm_data': True, 'scaler_name': 'Stand...",HelmertEncoder,JamesSteinEncoder
2,0.612375,0.676,0.063625,LinearSVM,"{'verbose': 0, 'random_state': 42}","{'need_norm_data': True, 'norm_data': True, 's...",HelmertEncoder,JamesSteinEncoder
3,0.703071,0.7584,0.055329,LinearModel,{'verbose': 0},"{'need_norm_data': True, 'scaler_name': 'Stand...",HelmertEncoder,JamesSteinEncoder
4,0.600601,0.6511,0.050499,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...","{'need_norm_data': True, 'norm_data': True, 's...",HelmertEncoder,JamesSteinEncoder
5,0.735086,0.781,0.045914,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{'need_norm_data': False},HelmertEncoder,JamesSteinEncoder
6,0.697041,0.7459,0.048859,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -1}",{'need_norm_data': False},HelmertEncoder,JamesSteinEncoder
7,0.607501,0.6689,0.061399,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 100,...","{'need_norm_data': False, 'early_stopping': Fa...",HelmertEncoder,JamesSteinEncoder
8,0.736037,0.7924,0.056363,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HelmertEncoder,JamesSteinEncoder
9,0.664761,0.7324,0.067639,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...","{'need_norm_data': True, 'scaler_name': 'Stand...",HelmertEncoder,JamesSteinEncoder


In [8]:
# let's optimize all the models and see what the result will be (it takes a lot of time)
review = model.opt(timeout=2000, verbose=1)

0%|          | 0/10 [00:00<?, ?it/s]
LightGBM Best Score:0.8059
 10%|█         | 1/10 [02:06<18:59, 126.63s/it]
KNeighbors Best Score:0.8389
 20%|██        | 2/10 [04:31<17:37, 132.17s/it]
LinearSVM Best Score:0.704
 30%|███       | 3/10 [07:43<17:30, 150.08s/it]
LinearModel Best Score:0.763
 40%|████      | 4/10 [09:30<13:41, 137.00s/it]
SGD Best Score:0.7008
 50%|█████     | 5/10 [12:05<11:52, 142.44s/it]
RandomForest Best Score:0.8353
 60%|██████    | 6/10 [14:00<08:57, 134.37s/it]
ExtraTrees Best Score:0.7901
 70%|███████   | 7/10 [21:03<11:02, 220.88s/it]
XGBoost Best Score:0.7123
 80%|████████  | 8/10 [25:39<07:55, 237.50s/it]
CatBoost Best Score:0.819
 90%|█████████ | 9/10 [29:27<03:54, 234.47s/it]
MLP Best Score:0.7568
100%|██████████| 10/10 [34:20<00:00, 206.04s/it]


In [9]:
model.top1_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,target_encoder
0,0.8059,0.8059,0.0,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...","{'need_norm_data': False, 'early_stopping': Fa...",HelmertEncoder,JamesSteinEncoder
1,0.8389,0.8389,0.0,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 140, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HelmertEncoder,JamesSteinEncoder
2,0.6588,0.704,0.045238,LinearSVM,"{'verbose': 0, 'random_state': 42, 'tol': 0.00...","{'need_norm_data': True, 'norm_data': True, 's...",HelmertEncoder,JamesSteinEncoder
3,0.7465,0.763,0.016508,LinearModel,"{'verbose': 0, 'fit_intercept': False, 'C': 80...","{'need_norm_data': True, 'scaler_name': 'Stand...",HelmertEncoder,JamesSteinEncoder
4,0.6556,0.7008,0.045238,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...","{'need_norm_data': True, 'norm_data': True, 's...",HelmertEncoder,JamesSteinEncoder
5,0.8353,0.8353,0.0,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{'need_norm_data': False},HelmertEncoder,JamesSteinEncoder
6,0.748,0.7901,0.042116,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{'need_norm_data': False},HelmertEncoder,JamesSteinEncoder
7,0.6404,0.7123,0.071858,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 100,...","{'need_norm_data': False, 'early_stopping': Fa...",HelmertEncoder,JamesSteinEncoder
8,0.7875,0.819,0.031534,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HelmertEncoder,JamesSteinEncoder
9,0.6793,0.7568,0.077505,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...","{'need_norm_data': True, 'scaler_name': 'Stand...",HelmertEncoder,JamesSteinEncoder
