In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install --quiet automl_alex

In [1]:
import automl_alex
import sklearn
import time
from automl_alex import AutoML, AutoMLClassifier
print(automl_alex.__version__)

0.07.18


In [2]:
RANDOM_SEED = 42

# Classifier

## Data

In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [4]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [5]:
model = AutoML(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = AutoMLClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED, verbose=1)

Source X_train shape:  (750, 20) | X_test shape:  (250, 20)
##################################################
Auto detect cat features:  13
> Start preprocessing Data
> Generate cat encodet features
 +  44  Features from  HelmertEncoder
 +  54  Features from  HashingEncoder
 +  16  Features from  FrequencyEncoder
> Clean Nans in num features
> Generate interaction Num Features
 +  24  Interaction Features
> Normalization Features
##################################################
> Total Features:  142
##################################################
New X_train shape:  (750, 142) | X_test shape:  (250, 142)


In [6]:
%%time
predict_test, predict_train = model.fit_predict(timeout=1000, verbose=2)


 Opt StackingModels
One iteration takes ~ 0.6 sec
> Start Auto calibration parameters
> Start optimization with the parameters:
CV_Folds =  10
Score_CV_Folds =  5
Feature_Selection =  True
Opt_lvl =  3
Cold_start =  74.0
Early_stoping =  148.0
Metric =  roc_auc_score
Direction =  maximize
##################################################
Default model OptScore = 0.6207
Optimize: : 194it [11:40,  3.61s/it,  | Model: CatBoost | OptScore: 0.7766 | Best roc_auc_score: 0.8144 +- 0.037814]

 Predict from StackingModels
 10%|█         | 1/10 [00:11<01:39, 11.03s/it]
 Mean Score roc_auc_score on 30 Folds: 0.7965 std: 0.050695
 20%|██        | 2/10 [00:22<01:29, 11.21s/it]
 Mean Score roc_auc_score on 30 Folds: 0.7951 std: 0.044614
 30%|███       | 3/10 [00:37<01:26, 12.40s/it]
 Mean Score roc_auc_score on 30 Folds: 0.7926 std: 0.045813
 40%|████      | 4/10 [00:56<01:26, 14.40s/it]
 Mean Score roc_auc_score on 30 Folds: 0.7943 std: 0.049529
 50%|█████     | 5/10 [01:11<01:12, 14.49s/it]
 Mea

In [7]:
predict_test[:5]

array([0.41460593, 0.42930199, 0.52585414, 0.4129506 , 0.23435901])

In [8]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predict_test),4))

Test AUC:  0.802


In [9]:
model.stack_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoders,columns,cv_folds
86,0.7766,0.8144,0.037814,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
74,0.7739,0.8094,0.035474,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
170,0.7733,0.81,0.036668,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
77,0.7682,0.813,0.04483,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
69,0.768,0.8062,0.038191,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, HelmertEncoder_...",10
13,0.767,0.8126,0.045642,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[credit_amount, HelmertEncoder_purpose_0, Helm...",10
75,0.7667,0.806,0.039348,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
188,0.7666,0.8139,0.047309,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
85,0.766,0.8004,0.034388,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
97,0.7639,0.821,0.057089,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[HelmertEncoder, HashingEncoder, FrequencyEnco...","[duration, credit_amount, age, num_dependents,...",10
