In [0]:
# If you run this notebook on Google Colaboratory, uncomment the below to install automl_alex.
##!pip install automl-alex

In [0]:
import pandas as pd
import sklearn
import automl_alex
from automl_alex import ModelsReview, ModelsReviewClassifier

In [5]:
print(automl_alex.__version__)

0.05.07.5


In [0]:
RANDOM_SEED = 42

# Classifier

## Data

In [7]:
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
dataset = fetch_openml(name='credit-g', version=1, as_frame=True)
dataset.data.head(5)

Unnamed: 0,checking_status,duration,credit_history,purpose,credit_amount,savings_status,employment,installment_commitment,personal_status,other_parties,residence_since,property_magnitude,age,other_payment_plans,housing,existing_credits,job,num_dependents,own_telephone,foreign_worker
0,<0,6.0,critical/other existing credit,radio/tv,1169.0,no known savings,>=7,4.0,male single,none,4.0,real estate,67.0,none,own,2.0,skilled,1.0,yes,yes
1,0<=X<200,48.0,existing paid,radio/tv,5951.0,<100,1<=X<4,2.0,female div/dep/mar,none,2.0,real estate,22.0,none,own,1.0,skilled,1.0,none,yes
2,no checking,12.0,critical/other existing credit,education,2096.0,<100,4<=X<7,2.0,male single,none,3.0,real estate,49.0,none,own,1.0,unskilled resident,2.0,none,yes
3,<0,42.0,existing paid,furniture/equipment,7882.0,<100,4<=X<7,2.0,male single,guarantor,4.0,life insurance,45.0,none,for free,1.0,skilled,2.0,none,yes
4,<0,24.0,delayed previously,new car,4870.0,<100,1<=X<4,3.0,male single,none,4.0,no known property,53.0,none,for free,2.0,skilled,2.0,none,yes


In [8]:
dataset.target[:5]

0    good
1     bad
2    good
3    good
4     bad
Name: class, dtype: category
Categories (2, object): [good, bad]

In [9]:
dataset.target = dataset.target.astype('category').cat.codes
dataset.target[:5]

0    0
1    1
2    0
3    0
4    1
dtype: int8

In [10]:
X_train, X_test, y_train, y_test = train_test_split(dataset.data, 
                                                    dataset.target,
                                                    test_size=0.25, 
                                                    random_state=RANDOM_SEED,)
X_train.shape, X_test.shape

((750, 20), (250, 20))

## Model

In [0]:
model = ModelsReview(X_train, y_train, X_test, type_of_estimator='classifier', random_state=RANDOM_SEED)

# or Simply
model = ModelsReviewClassifier(X_train, y_train, X_test, random_state=RANDOM_SEED)

In [12]:
review = model.opt(timeout=2000, verbose=1) 

100%|██████████| 10/10 [47:17<00:00, 283.71s/it, Model: MLP | Best Score roc_auc_score = 0.7203 +- 0.04331 ]


In [13]:
review

Unnamed: 0,score_opt,model_score,score_std,model_name,model_param,wrapper_params,cat_encoder,target_encoder
0,0.7899,0.8392,0.049314,LightGBM,"{'random_seed': 42, 'early_stopping_rounds': 5...","{'need_norm_data': False, 'early_stopping': Fa...",OneHotEncoder,JamesSteinEncoder
1,0.8199,0.8291,0.00922,KNeighbors,"{'n_jobs': -1, 'n_neighbors': 150, 'weights': ...","{'need_norm_data': True, 'scaler_name': 'MinMa...",OneHotEncoder,JamesSteinEncoder
2,0.6956,0.7419,0.046312,LinearSVM,"{'verbose': 0, 'random_state': 42, 'tol': 0.00...","{'need_norm_data': True, 'norm_data': True, 's...",OneHotEncoder,JamesSteinEncoder
3,0.807,0.8203,0.013293,LinearModel,"{'verbose': 0, 'fit_intercept': False, 'C': 0....","{'need_norm_data': True, 'scaler_name': 'Robus...",OneHotEncoder,JamesSteinEncoder
4,0.6747,0.7228,0.048086,SGD,"{'max_iter': 5000, 'verbose': 0, 'fit_intercep...","{'need_norm_data': True, 'norm_data': True, 's...",OneHotEncoder,JamesSteinEncoder
5,0.7839,0.8302,0.046312,RandomForest,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{'need_norm_data': False},OneHotEncoder,JamesSteinEncoder
6,0.807,0.837,0.030017,ExtraTrees,"{'verbose': 0, 'random_state': 42, 'n_jobs': -...",{'need_norm_data': False},OneHotEncoder,JamesSteinEncoder
7,0.6574,0.7303,0.072899,XGBoost,"{'verbosity': 0, 'early_stopping_rounds': 100,...","{'need_norm_data': False, 'early_stopping': Fa...",OneHotEncoder,JamesSteinEncoder
8,0.8002,0.8336,0.03337,CatBoost,"{'verbose': 0, 'early_stopping_rounds': 50, 't...","{'need_norm_data': False, 'early_stopping': True}",OneHotEncoder,JamesSteinEncoder
9,0.7095,0.776,0.066515,MLP,"{'verbose': 0, 'random_state': 42, 'max_iter':...","{'need_norm_data': True, 'scaler_name': 'Robus...",OneHotEncoder,JamesSteinEncoder


In [14]:
predicts = model.predict()

100%|██████████| 10/10 [01:07<00:00,  6.79s/it]


In [18]:
pd.DataFrame(predicts)

Unnamed: 0,model_name,predict_test,predict_train
0,0_LightGBM,"[0.33710323243720536, 0.4377319236597748, 0.48...","[0.26684486309110855, 0.195029184038383, 0.441..."
1,1_KNeighbors,"[0.372, 0.326, 0.41200000000000003, 0.30466666...","[0.32666666666666666, 0.14666666666666667, 0.4..."
2,2_LinearSVM,"[0.9999999999999999, 0.9999999999999999, 0.999...","[0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 0.0, 1.0, ..."
3,3_LinearModel,"[0.5720534148766729, 0.6572557887438732, 0.800...","[0.45480057904961524, 0.2951061080952058, 0.84..."
4,4_SGD,"[0.8999999999999999, 0.9999999999999999, 0.999...","[0.0, 0.0, 1.0, 0.0, 0.0, 1.0, 0.0, 0.0, 1.0, ..."
5,5_RandomForest,"[0.37226504329004334, 0.5245888888888888, 0.68...","[0.3688333333333334, 0.17307936507936503, 0.52..."
6,6_ExtraTrees,"[0.4190824400179773, 0.542102956199289, 0.6586...","[0.35953010031099064, 0.20199736495057985, 0.4..."
7,7_XGBoost,"[0.1, 0.4, 0.7, 0.1, 0.0, 0.1, 0.0, 0.2, 0.0, ...","[0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
8,8_CatBoost,"[0.4329892924312411, 0.49063796908949664, 0.51...","[0.4593566430413601, 0.4478723861755469, 0.542..."
9,9_MLP,"[0.4285098190739281, 0.38119179843973944, 0.51...","[0.3324338860278719, 0.15738012862707415, 0.61..."


In [21]:
print('Test AUC: ', round(sklearn.metrics.roc_auc_score(y_test, pd.DataFrame(predicts)['predict_test'].mean()),4))

Test AUC:  0.7985
