In [1]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
!pip install -U -q automl-alex

[0m

In [2]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.datasets import fetch_openml

import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier, DataPrepare

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
print(automl_alex.__version__)

2023.3.11


In [4]:
RANDOM_SEED = 42

## Data

In [5]:
# https://www.openml.org/d/31
dataset = fetch_openml(data_id=31, as_frame=True)
dataset.target = dataset.target.astype('category').cat.codes
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [6]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

In [7]:
de = DataPrepare(verbose=0,random_state=RANDOM_SEED)
X_train = de.fit_transform(X_train)
X_test = de.transform(X_test)
X_train.shape, X_test.shape

((750, 78), (250, 78))

# ModelsReview

In [8]:
model = ModelsReview(type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(
    metric = sklearn.metrics.roc_auc_score,
    random_state=RANDOM_SEED)

In [9]:
# let's see what the results are for all available models with default settings
review = model.fit(
    X_train=X_train, 
    y_train=y_train, 
    X_test=X_test, 
    y_test=y_test,
    )

100%|██████████| 8/8 [00:02<00:00,  3.53it/s]


In [10]:
review

Unnamed: 0,Model_Name,Score,Time_Fit_Sec
0,LightGBM,0.7927,0.23
1,KNeighbors,0.6137,0.02
2,LinearModel,0.7207,0.02
3,RandomForest,0.7912,0.19
4,ExtraTrees,0.8067,0.15
5,XGBoost,0.6895,0.23
6,CatBoost,0.8004,0.95
7,MLP,0.7011,0.47
