In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install -q -U automl_alex

In [1]:
import automl_alex
import sklearn
import pandas as pd
import time
from automl_alex import DataPrepare
from automl_alex import BestSingleModel, BestSingleModelClassifier, BestSingleModelRegressor
print('AutoML-Alex version:', automl_alex.__version__)

AutoML-Alex version: 1.2.28


In [2]:
RANDOM_SEED = 42

# Classifier

## Data

In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
# https://www.openml.org/d/179
dataset = fetch_openml(data_id=179, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,native-country
0,2,State-gov,77516.0,Bachelors,13.0,Never-married,Adm-clerical,Not-in-family,White,Male,1,0,2,United-States
1,3,Self-emp-not-inc,83311.0,Bachelors,13.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,0,United-States
2,2,Private,215646.0,HS-grad,9.0,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,2,United-States
3,3,Private,234721.0,11th,7.0,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,2,United-States
4,1,Private,338409.0,Bachelors,13.0,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,2,Cuba


In [4]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.2, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((36631, 14), (12211, 14))

## BestSingleModel

In [5]:
model = BestSingleModelClassifier(
    models_names = ['LightGBM', 'ExtraTrees', 'RandomForest', 'XGBoost'],
    auto_parameters=True,
    feature_selection=True,
    random_state=RANDOM_SEED,)
model.fit(X_train, y_train, timeout=600)

12:57:50 | > Start Fit Base Model
12:58:35 | > DATA PREPROC
12:58:35 | Source data shape: (36631, 14)
12:58:35 | ##################################################
12:58:35 | ! START preprocessing Data
12:58:35 | - Auto detect cat features: 12
12:58:35 | > Binary Features
12:58:35 | > Clean Categorical Features
12:58:36 | > Transform Categorical Features.
12:58:36 |  - Encoder: OneHotEncoder ADD features: 135
12:58:37 |  - Encoder: CountEncoder ADD features: 12
12:58:37 | > CleanOutliers
12:58:37 | Num of outlier detected: 231 in Feature education-num
12:58:37 | Proportion of outlier detected: 0.6 %
12:58:37 | Num of outlier detected: 527 in Feature fnlwgt
12:58:37 | Proportion of outlier detected: 1.4 %
12:58:37 |   No nans features
12:58:37 | > Generate interaction Num Features
12:58:37 |  ADD features: 5
12:58:37 | > Normalization Features
12:58:37 | ##################################################
12:58:37 | Final data shape: (36631, 168)
12:58:37 | Total ADD columns: 154
12:58:3

In [6]:
predicts = model.predict(X_test)

Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Finished loading model, total used 300 iterations
Load CrossValidation
Load CrossValidation


In [7]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predicts),4))

Test AUC:  0.9137


In [None]:
model.best_model_name

In [None]:
model.best_model_param

In [None]:
# select Features 
# if feature_selection=True
model.select_columns[:5]

## Save & Load

In [None]:
model.save('AutoML_model_1')

In [None]:
model_new = BestSingleModelClassifier(random_state=RANDOM_SEED,)
model_new = model_new.load('AutoML_model_1')

In [None]:
predicts = model_new.predict(X_test)
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predicts),4))