In [0]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install automl-alex

In [8]:
import pandas as pd
import numpy as np
import sklearn
import automl_alex
import json
from automl_alex import BestSingleModelClassifier

In [9]:
print(automl_alex.__version__)

0.05.08.11


In [10]:
RANDOM_SEED = 42

# Classifier

## Data

In [11]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [12]:
dataset.target = dataset.target.astype('category').cat.codes

In [13]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [14]:
model = BestSingleModelClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [8]:
history = model.opt(timeout=1000, verbose=1) 

One iteration takes ~ 0.5 sec
Start Auto calibration parameters
Start optimization with the parameters:
CV =10
Score_folds =2
Opt_lvl =2
Cold_start =63.0
Early_stoping =100
Metric =roc_auc_score
Direction =maximize
Start Model LightGBM Score roc_auc_score = 0.6909
115it [05:01,  2.84s/it, Best Score roc_auc_score = 0.825 +- 0.003859 Best Model: KNeighbors]

In [13]:
predicts_test, predict_train = model.predict()
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predicts_test),4))

Test AUC:0.7722


In [9]:
model.plot_opt_history()

<Figure size 1500x500 with 2 Axes>

In [10]:
history.head(1)

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,target_encoder
109,0.825,0.8289,0.003859,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 133, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HashingEncoder,TargetEncoder


In [11]:
model.wrapper_params

{'need_norm_data': True, 'scaler_name': 'MinMaxScaler'}

In [14]:
history.head(10)

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,target_encoder
109,0.825,0.8289,0.003859,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 133, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HashingEncoder,TargetEncoder
104,0.8234,0.8285,0.005146,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 143, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HashingEncoder,TargetEncoder
99,0.8194,0.8233,0.003859,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 93, 'weights': '...","{'need_norm_data': True, 'scaler_name': 'Stand...",HashingEncoder,TargetEncoder
10,0.8191,0.8435,0.024443,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",HashingEncoder,TargetEncoder
106,0.819,0.819,0.0,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 80, 'weights': '...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HashingEncoder,TargetEncoder
71,0.8182,0.8225,0.004288,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 92, 'weights': '...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HashingEncoder,TargetEncoder
69,0.8182,0.8216,0.003431,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 46, 'weights': '...","{'need_norm_data': True, 'scaler_name': 'MinMa...",HashingEncoder,TargetEncoder
73,0.8173,0.8272,0.009863,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 149, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'Stand...",HashingEncoder,TargetEncoder
77,0.8161,0.8212,0.005146,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 45, 'weights': '...","{'need_norm_data': True, 'scaler_name': 'Stand...",HashingEncoder,TargetEncoder
87,0.8147,0.819,0.004288,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 142, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'Robus...",HashingEncoder,TargetEncoder
