In [1]:
import sys
sys.path.append('..')
from deeptables.models import deeptable,deepnets
from deeptables.utils import consts,dt_logging,batch_trainer

from datasets import utils as dsutils
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, roc_curve
import pandas as pd
import numpy as np
from tensorflow.keras.initializers import RandomNormal
from tensorflow.keras.regularizers import l2
from tensorflow import keras
import warnings
warnings.filterwarnings("ignore")

import logging

logging.basicConfig(level=logging.ERROR)
pd.set_option('display.max_rows',500)

This means that in case of installing LightGBM from PyPI via the ``pip install lightgbm`` command, you don't need to install the gcc compiler anymore.
Instead of that, you need to install the OpenMP library, which is required for running LightGBM on the system with the Apple Clang compiler.
You can install the OpenMP library by the following command: ``brew install libomp``.


In [2]:
data = dsutils.load_adult()

Base dir:/Users/jack/workspace/aps/deeptables/examples/datasets


In [3]:
conf = deeptable.ModelConfig(
    auto_discrete=True,
    auto_categorization=True,
    cat_exponent=0.4,
    cat_remain_numeric=True,
)

bt = batch_trainer.BatchTrainer(data, 'x_14',
                                eval_size=0.2,
                                validation_size=0.2,
                                metrics=['AUC', 'accuracy', 'recall', 'precision', 'f1'],
                                verbose=1,
                                dt_epochs=1,
                                dt_config=conf,
                                dt_nets=[['dnn_nets','fm_nets'],['dnn_nets','fm_nets','cross_nets']],
                                cross_validation=True,
                                num_folds=2,
                                )
ms = bt.start()

2 class detected, {' <=50K', ' >50K'}, so inferred as a [binary classification] task
Start training DT model.['dnn_nets', 'fm_nets']
metrics:['AUC', 'accuracy', 'recall', 'precision', 'f1']
Fitting model...
Start cross validation
2 class detected, {' <=50K', ' >50K'}, so inferred as a [binary classification] task
Preparing features cost:0.036856889724731445
Imputation cost:0.10047602653503418
Categorical encoding cost:0.15257000923156738
Discretization cost:0.07267594337463379
fit_transform cost:0.38202500343322754
transform X_eval
transform_X cost:0.6763067245483398
Iterators:StratifiedKFold(n_splits=2, random_state=9527, shuffle=True)
Injected a callback [EarlyStopping]. monitor:val_AUC, patience:1, mode:max

Fold:1



[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (15)', 'input_continuous_all: (6)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: [10, 17, 8, 16, 7, 6, 3, 42, 17, 5, 14, 5, 5, 5, 4]
output_dims: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 66)
---------------------------------------------------------
nets: ['dnn_nets', 'fm_nets']
---------------------------------------------------------
dnn: input_shape (None, 66), output_shape (None, 64)
fm: input_shape (None, 15, 4), output_shape (None, 1)
-----------------------------------------

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   40.5s finished



------------OOF------------ score:
{'auc': 0.8184193003848422, 'accuracy': 0.5471053439803439, 'recall': 0.9161515295609447, 'precision': 0.3390427029563585, 'f1': 0.49492657447446164}

------------CV------------ Eval score:
{'auc': 0.8308142088762362, 'accuracy': 0.5449101796407185, 'recall': 0.9308093994778068, 'precision': 0.33286647992530344, 'f1': 0.4903713892709766}
DT finished.
DT - ['dnn_nets', 'fm_nets'] - done in 42s
----------------------------------------------------------

Start training DT model.['dnn_nets', 'fm_nets', 'cross_nets']
metrics:['AUC', 'accuracy', 'recall', 'precision', 'f1']
Fitting model...
Start cross validation
2 class detected, {' <=50K', ' >50K'}, so inferred as a [binary classification] task
Preparing features cost:0.03641700744628906
Imputation cost:0.09661602973937988
Categorical encoding cost:0.15231585502624512
Discretization cost:0.059618234634399414
fit_transform cost:0.3658161163330078
transform X_eval
transform_X cost:0.7705960273742676
Iterat

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.


>>>>>>>>>>>>>>>>>>>>>> Model Desc <<<<<<<<<<<<<<<<<<<<<<< 
---------------------------------------------------------
inputs:
---------------------------------------------------------
['all_categorical_vars: (15)', 'input_continuous_all: (6)']
---------------------------------------------------------
embeddings:
---------------------------------------------------------
input_dims: [10, 17, 8, 16, 7, 6, 3, 42, 17, 5, 14, 5, 5, 5, 4]
output_dims: [4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4]
dropout: 0.3
---------------------------------------------------------
dense: dropout: 0
batch_normalization: False
---------------------------------------------------------
concat_embed_dense: shape: (None, 66)
---------------------------------------------------------
nets: ['dnn_nets', 'fm_nets', 'cross_nets']
---------------------------------------------------------
dnn: input_shape (None, 66), output_shape (None, 64)
fm: input_shape (None, 15, 4), output_shape (None, 1)
cross: input_shape (None, 6

[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:   47.2s finished



------------OOF------------ score:
{'auc': 0.8404534059313957, 'accuracy': 0.5534398034398035, 'recall': 0.9494373117768268, 'precision': 0.34618274287695777, 'f1': 0.507369134338472}

------------CV------------ Eval score:
{'auc': 0.8482474919052714, 'accuracy': 0.5403040073698756, 'recall': 0.9490861618798956, 'precision': 0.3327231121281464, 'f1': 0.4927143341240257}
DT finished.
DT - ['dnn_nets', 'fm_nets', 'cross_nets'] - done in 49s
----------------------------------------------------------



In [4]:
ms.leaderboard(top=100)

Unnamed: 0,model,type,*auc,accuracy,recall,precision,f1
0,"conf-1 - ['dnn_nets', 'fm_nets', 'cross_nets']...",cv-eval,0.848247,0.540304,0.949086,0.332723,0.492714
1,"conf-1 - ['dnn_nets', 'fm_nets', 'cross_nets']...",oof,0.840453,0.55344,0.949437,0.346183,0.507369
2,"conf-1 - ['dnn_nets', 'fm_nets'] - CV - eval",cv-eval,0.830814,0.54491,0.930809,0.332866,0.490371
3,"conf-1 - ['dnn_nets', 'fm_nets'] - CV - oof",oof,0.818419,0.547105,0.916152,0.339043,0.494927
