In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install --quiet -U automl_alex

In [2]:
import automl_alex
import sklearn
import time
from automl_alex import AutoML, AutoMLClassifier
print(automl_alex.__version__)

0.07.26


In [3]:
RANDOM_SEED = 42

# Classifier

## Data

In [4]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [5]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [6]:
#model = AutoML(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = AutoMLClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED, verbose=1)

Source X_train shape:(750, 20)| X_test shape:(250, 20)
##################################################
Auto detect cat features:13
> Start preprocessing Data
> Generate cat encodet features
 +55 Features fromOneHotEncoder
 +44 Features fromHelmertEncoder
 +54 Features fromHashingEncoder
 +16 Features fromFrequencyEncoder
> Generate Frequency Encode num features
 +4 Frequency Encode Num Features
> Clean Nans in num features
> Generate interaction Num Features
 +24 Interaction Features
> Normalization Features
##################################################
> Total Features:201
##################################################
New X_train shape:(750, 201)| X_test shape:(250, 201)


In [9]:
%%time
predict_test, predict_train = model.fit_predict(timeout=1500, verbose=2)

__________________________________________________
Step 1: Model 0
__________________________________________________
100%|██████████| 1/1 [00:05<00:00,  5.52s/it]
--------------------------------------------------
Model 1
One iteration takes ~ 1.4 sec
> Start Auto calibration parameters
> Start optimization with the parameters:
CV_Folds =10
Score_CV_Folds =3
Feature_Selection =True
Opt_lvl =2
Cold_start =43.0
Early_stoping =100
Metric =roc_auc_score
Direction =maximize
##################################################
Default model OptScore = 0.7087
Optimize: : 255it [09:01,  1.66s/it,  | Model: RandomForest | OptScore: 0.7619 | Best roc_auc_score: 0.8087 +- 0.046755]
 EarlyStopping Exceeded: Best Score: 0.7619roc_auc_score
Optimize: : 255it [09:01,  2.12s/it,  | Model: RandomForest | OptScore: 0.7619 | Best roc_auc_score: 0.8087 +- 0.046755]

 Predict from Models_1
100%|██████████| 3/3 [00:01<00:00,  1.64it/s]

 > Calc predict policy on Models_1:
 | posible_repeats:89 | stack_top:10

In [12]:
predict_test[:5]

array([0.3442651 , 0.46008764, 0.52973425, 0.37271726, 0.10340613])

In [10]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predict_test),4))

Test AUC:0.8096


In [11]:
model.stack_models_cfgs

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoders,columns,cv_folds
0,0.7619,0.8087,0.046755,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
1,0.7602,0.8072,0.046989,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
2,0.7588,0.8026,0.043793,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, OneHotEncoder_other_payment_pl...",10
3,0.7559,0.8002,0.044282,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
4,0.7545,0.8014,0.046876,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
5,0.754,0.8033,0.049344,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
6,0.7534,0.8062,0.052843,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
7,0.7529,0.8044,0.051466,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, OneHotEncoder_other_payment_pl...",10
8,0.7525,0.8045,0.052013,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
9,0.7517,0.7985,0.046768,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{},"[OneHotEncoder, HelmertEncoder, HashingEncoder...","(duration, age, num_dependents, OneHotEncoder_...",10
