In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install -q -U automl_alex

In [1]:
import automl_alex
import sklearn
import pandas as pd
import time
from automl_alex import DataPrepare
from automl_alex import AutoML, AutoMLClassifier, AutoMLRegressor
print('AutoML-Alex version:', automl_alex.__version__)

AutoML-Alex version: 1.3.8


In [2]:
RANDOM_SEED = 42

# Classifier

## Data

In [3]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
# https://www.openml.org/d/179
dataset = fetch_openml(data_id=179, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,native-country
0,2,State-gov,77516.0,Bachelors,13.0,Never-married,Adm-clerical,Not-in-family,White,Male,1,0,2,United-States
1,3,Self-emp-not-inc,83311.0,Bachelors,13.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,0,United-States
2,2,Private,215646.0,HS-grad,9.0,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,2,United-States
3,3,Private,234721.0,11th,7.0,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,2,United-States
4,1,Private,338409.0,Bachelors,13.0,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,2,Cuba


In [4]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((36631, 14), (12211, 14))

## AutoML

In [9]:
model = AutoMLClassifier(random_state=RANDOM_SEED,)
model.fit(X_train, y_train, timeout=900)

00:02:39 | > Start Fit Base Model
00:03:17 | ##################################################
00:03:17 | > Start Fit Models 2
00:03:17 | ##################################################
00:03:17 | ##################################################
00:03:18 | > Step 1: calc parameters and pruned score: get test 10 trials
00:07:02 |  One iteration ~ 22.5 sec
00:07:02 |  Possible iters ~ 8.0
00:07:02 | ! Not enough time to find the optimal parameters. 
                     Possible iters < 100. 
                     Please, Increase the 'timeout' parameter for normal optimization.
00:07:02 | --------------------------------------------------
00:07:02 |   Pruned Threshold Score: 0.9026
00:07:02 | ##################################################
00:07:02 | > Step 2: Full opt with Threshold Score Pruner
00:07:02 | ##################################################
00:07:02 | > Start optimization with the parameters:
00:07:02 | CV_Folds = 7
00:07:02 | Score_CV_Folds = 3
00:07:03 | Featu

<automl_alex.automl_alex.AutoMLClassifier at 0x7f6b5a8f98b0>

In [10]:
predicts = model.predict(X_test)

In [11]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predicts),4))

Test AUC:  0.9127


In [15]:
print(f'predict_model_1 AUC: {round(sklearn.metrics.roc_auc_score(y_test, model.predict_model_1),4)}')
print(f'predict_model_2 AUC: {round(sklearn.metrics.roc_auc_score(y_test, model.predict_model_2),4)}')
print(f'predict_model_3 AUC: {round(sklearn.metrics.roc_auc_score(y_test, model.predict_model_3),4)}')

predict_model_1 AUC: 0.9133
predict_model_2 AUC: 0.9095
predict_model_3 AUC: 0.9094


## Save & Load

In [12]:
model.save('AutoML_model_1')

Save model
Save model


In [13]:
model_new = AutoMLClassifier(random_state=RANDOM_SEED,)
model_new = model_new.load('AutoML_model_1')

00:14:45 | Load DataPrepare
00:14:45 | Load Model
00:14:45 | Load Model
00:14:45 | Load Model
00:14:45 | Load Model
00:14:45 | Load Model
00:14:45 | Load Model
00:14:45 | Load Model
Load CrossValidation
Load model
00:14:45 | Load DataPrepare
00:14:45 | Load Model
Finished loading model, total used 300 iterations
00:14:46 | Load Model
Finished loading model, total used 300 iterations
00:14:46 | Load Model
Finished loading model, total used 300 iterations
00:14:46 | Load Model
Finished loading model, total used 300 iterations
00:14:46 | Load Model
Finished loading model, total used 300 iterations
00:14:46 | Load Model
Finished loading model, total used 300 iterations
00:14:46 | Load Model
Finished loading model, total used 300 iterations
Load CrossValidation
Load model
00:14:47 | Load AutoML


In [14]:
predicts = model_new.predict(X_test)
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, predicts),4))

Test AUC:  0.9127


# Regression

## Data

In [None]:
# https://www.openml.org/d/543
dataset = fetch_openml(data_id=543, as_frame=True)

X_train, X_test, y_train, y_test = train_test_split(pd.DataFrame(dataset.data), 
                                                    pd.DataFrame(dataset.target), 
                                                    test_size=0.15, 
                                                    random_state=RANDOM_SEED,)

X_train.shape, X_test.shape

In [None]:
X_train.head(5)

## AutoML

In [None]:
model = AutoMLRegressor(random_state=RANDOM_SEED,)
model.fit(X_train, y_train, timeout=900, verbose=1)

In [None]:
predicts = model.predict(X_test)
print('Test MSE: ', round(sklearn.metrics.mean_squared_error(y_test, predicts),4))