In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
#!pip install -U -q automl-alex

In [1]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier, DataPrepare

In [2]:
print(automl_alex.__version__)

1.2.28


In [3]:
RANDOM_SEED = 42

## Data

In [4]:
# https://www.openml.org/d/179
dataset = fetch_openml(data_id=179, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capitalgain,capitalloss,hoursperweek,native-country
0,2,State-gov,77516.0,Bachelors,13.0,Never-married,Adm-clerical,Not-in-family,White,Male,1,0,2,United-States
1,3,Self-emp-not-inc,83311.0,Bachelors,13.0,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,0,United-States
2,2,Private,215646.0,HS-grad,9.0,Divorced,Handlers-cleaners,Not-in-family,White,Male,0,0,2,United-States
3,3,Private,234721.0,11th,7.0,Married-civ-spouse,Handlers-cleaners,Husband,Black,Male,0,0,2,United-States
4,1,Private,338409.0,Bachelors,13.0,Married-civ-spouse,Prof-specialty,Wife,Black,Female,0,0,2,Cuba


In [5]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((36631, 14), (12211, 14))

In [6]:
de = DataPrepare(verbose=0,random_state=RANDOM_SEED)
X_train = de.fit_transform(X_train)
X_test = de.transform(X_test)
X_train.shape, X_test.shape

((36631, 154), (12211, 154))

# ModelsReview

In [7]:
model = ModelsReview(type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(
    metric = sklearn.metrics.roc_auc_score,
    random_state=RANDOM_SEED)

In [8]:
# let's see what the results are for all available models with default settings
review = model.fit(
    X_train=X_train, 
    y_train=y_train, 
    X_test=X_test, 
    y_test=y_test,
    )

100%|██████████| 9/9 [01:37<00:00, 10.81s/it]


In [9]:
review

Unnamed: 0,Model_Name,Score,Time_Fit_Sec
0,LightGBM,0.9094,4.93
1,KNeighbors,0.8471,18.42
2,LinearModel,0.9103,1.77
3,SGD,0.7556,1.78
4,RandomForest,0.8832,2.62
5,ExtraTrees,0.8441,3.48
6,XGBoost,0.7676,26.41
7,CatBoost,0.912,17.17
8,MLP,0.9095,20.71
