In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install --quiet -U automl_alex

In [2]:
import automl_alex
import sklearn
import time
from automl_alex import AutoML, AutoMLClassifier
print(automl_alex.__version__)

0.07.24


In [3]:
RANDOM_SEED = 42

# Classifier

## Data

In [4]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [5]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [6]:
model = AutoML(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = AutoMLClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED, verbose=1)

Source X_train shape:  (750, 20) | X_test shape:  (250, 20)
##################################################
Auto detect cat features:  13
> Start preprocessing Data
> Generate cat encodet features
 +  55  Features from  OneHotEncoder
 +  44  Features from  HelmertEncoder
 +  54  Features from  HashingEncoder
 +  16  Features from  FrequencyEncoder
> Generate Frequency Encode num features
 +  4  Frequency Encode Num Features 
> Clean Nans in num features
> Generate interaction Num Features
 +  24  Interaction Features
> Normalization Features
##################################################
> Total Features:  201
##################################################
New X_train shape:  (750, 201) | X_test shape:  (250, 201)


In [6]:
%%time
predict_test, predict_train = model.fit_predict(timeout=1000, verbose=2)


 Opt BestModels
One iteration takes ~ 0.5 sec
> Start Auto calibration parameters
> Start optimization with the parameters:
CV_Folds =  10
Score_CV_Folds =  5
Feature_Selection =  True
Opt_lvl =  3
Cold_start =  90.0
Early_stoping =  180.0
Metric =  roc_auc_score
Direction =  maximize
##################################################
Default model OptScore = 0.7254
Optimize: : 174it [10:22,  3.58s/it,  | Model: CatBoost | OptScore: 0.7853 | Best roc_auc_score: 0.8274 +- 0.042062]

 Predict from Models_1
 17%|█▋        | 1/6 [00:11<00:55, 11.09s/it]
 Mean Score roc_auc_score on 20 Folds: 0.805 std: 0.044751
 33%|███▎      | 2/6 [00:24<00:46, 11.74s/it]
 Mean Score roc_auc_score on 20 Folds: 0.8027 std: 0.040799
 50%|█████     | 3/6 [01:08<01:03, 21.32s/it]
 Mean Score roc_auc_score on 20 Folds: 0.8056 std: 0.049165
 67%|██████▋   | 4/6 [01:20<00:37, 18.76s/it]
 Mean Score roc_auc_score on 20 Folds: 0.8062 std: 0.044257
 83%|████████▎ | 5/6 [01:33<00:16, 16.97s/it]
 Mean Score roc_auc_

In [7]:
predict_test[:5]

array([0.38209832, 0.41377497, 0.6122313 , 0.24182635, 0.07519208])

In [8]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predict_test),4))

Test AUC:  0.8182


In [8]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predict_test),4))

Test AUC:  0.8191


In [9]:
model.stack_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoders,columns,cv_folds
0,0.7853,0.8274,0.042062,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[OneHotEncoder_personal_status_1, OneHotEncode...",10
1,0.7824,0.8246,0.042239,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[OneHotEncoder_personal_status_1, OneHotEncode...",10
2,0.7822,0.8147,0.032549,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[OneHotEncoder_personal_status_1, OneHotEncode...",10
3,0.7816,0.8155,0.033926,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[OneHotEncoder_personal_status_1, OneHotEncode...",10
4,0.7792,0.8183,0.039136,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[OneHotEncoder_personal_status_1, OneHotEncode...",10
5,0.7771,0.8215,0.044384,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...",{'early_stopping': True},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[OneHotEncoder_personal_status_1, OneHotEncode...",10
6,0.7252,0.7763,0.051122,LinearModel,"{'verbose': 0, 'fit_intercept': False, 'C': 93...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, OneHotEncoder_p...",10
7,0.7248,0.7763,0.051536,LinearModel,"{'verbose': 0, 'fit_intercept': False, 'C': 87...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, OneHotEncoder_p...",10
8,0.7247,0.7754,0.050709,LinearModel,"{'verbose': 0, 'fit_intercept': False, 'C': 93...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","[duration, credit_amount, age, OneHotEncoder_p...",10


In [18]:
import pandas as pd
import numpy as np
pd.DataFrame([*model.stack_models_predicts['predict_train']]).T

Unnamed: 0,0,1,2,3,4,5,6,7,8
0,0.281334,0.281208,0.236160,0.312978,0.199849,0.323208,0.518981,0.542966,0.481556
1,0.267068,0.258889,0.168407,0.265681,0.194058,0.183128,0.420461,0.460543,0.440335
2,0.498742,0.520057,0.503798,0.488448,0.594333,0.534053,0.892348,0.866062,0.896360
3,0.288990,0.090608,0.264710,0.217713,0.208814,0.344272,0.129078,0.134960,0.148145
4,0.380299,0.466275,0.453555,0.416142,0.364128,0.460757,0.677056,0.647713,0.669464
...,...,...,...,...,...,...,...,...,...
745,0.241503,0.245735,0.319613,0.220868,0.248250,0.266400,0.763363,0.798271,0.776417
746,0.164344,0.191915,0.180521,0.195375,0.158664,0.230133,0.195958,0.185722,0.164875
747,0.205310,0.239537,0.088954,0.248571,0.124627,0.116283,0.199748,0.194761,0.164882
748,0.351683,0.338172,0.482734,0.461362,0.262888,0.395679,0.265005,0.292905,0.285318


In [19]:
X_train_predicts = pd.DataFrame([*model.stack_models_predicts['predict_train']]).T
X_test_predicts = pd.DataFrame([*model.stack_models_predicts['predict_test']]).T

In [20]:
model_param = {
                'verbose':1,
                'random_state': 42,
                'max_iter': 1000,
                }

In [21]:
from sklearn import ensemble, neural_network, linear_model

model_2 = neural_network.MLPClassifier()

In [25]:
#LogisticRegression
model_2 = linear_model.LogisticRegression()

In [26]:
model_2.fit(X_train_predicts, y_train,)

LogisticRegression()

In [27]:
test_stack_predict = model_2.predict_proba(X_test_predicts)[:, 1]

In [28]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predict_test),4))
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, test_stack_predict),4))

Test AUC:  0.8117
Test AUC:  0.8127
