In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install --quiet automl-alex

In [1]:
import automl_alex
import sklearn
from automl_alex import AutoML, AutoMLClassifier
print(automl_alex.__version__)

0.05.08.11


In [2]:
RANDOM_SEED = 42

# Classifier

## Data

In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [4]:
dataset.target[:5]

0    good
1     bad
2    good
3    good
4     bad
Name: class, dtype: category
Categories (2, object): [good, bad]

In [5]:
dataset.target = dataset.target.astype('category').cat.codes
dataset.target[:5]

0    0
1    1
2    0
3    0
4    1
dtype: int8

In [6]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [7]:
model = AutoML(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = AutoMLClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [8]:
predict_test, predict_train = model.fit_predict(timeout=1000, verbose=2)


 Step1: Opt StackingModels
One iteration takes ~ 0.5 sec
Start Auto calibration parameters
Start optimization with the parameters:
CV =10
Score_folds =3
Opt_lvl =3
Cold_start =48.0
Early_stoping =100
Metric =roc_auc_score
Direction =maximize
Start Model LightGBM Score roc_auc_score = 0.6909
112it [07:36,  4.63s/it, Best Score roc_auc_score = 0.803 +- 0.033468 Best Model: CatBoost]
 EarlyStopping Exceeded: Best Score: 0.803roc_auc_score

 Step2: Get new X_train from StackingModels
100%|██████████| 25/25 [05:25<00:00, 13.04s/it]

 StackModels Mean roc_auc_score Score Train:0.7968

 Step3: Opt MetaModels
100%|██████████| 2/2 [07:50<00:00, 235.06s/it, Model: XGBoost | Best Score roc_auc_score = 0.6604 +- 0.038579 ]

 Step4: Predict from MetaModels
100%|██████████| 10/10 [00:32<00:00,  3.23s/it]
MetaModels Mean roc_auc_score Score Train : 0.7877764334207165

 Finish!


In [11]:
predict_test[:5]

array([0.47702889, 0.58550138, 0.67426957, 0.30171942, 0.10358626])

In [12]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predict_test),4))

Test AUC:0.7976


In [15]:
model.stack_models_trails.head(5)

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,target_encoder
9,0.803,0.8365,0.033468,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HashingEncoder,JamesSteinEncoder
94,0.8009,0.8311,0.030243,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HashingEncoder,JamesSteinEncoder
64,0.7871,0.8333,0.046219,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HashingEncoder,JamesSteinEncoder
85,0.7801,0.8173,0.037194,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HashingEncoder,JamesSteinEncoder
110,0.7704,0.8117,0.04128,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HashingEncoder,JamesSteinEncoder
