# *Modern Deep Learning for Tabular Data*, Chapter 10

**Meta-Optimization**

This notebook contains the complementary code discussed in Chapter 10 of *Modern Deep Learning for Tabular Data*.

External Kaggle links to datasets used in this notebook:
- [Forest Cover Type Dataset](https://www.kaggle.com/datasets/uciml/forest-cover-type-dataset)
- [Mouse Protein Expression Dataset](https://www.kaggle.com/datasets/washingtongold/mpempe)

You can download these datasets from Kaggle, or import these notebooks into Kaggle and connect them internally.

---

## Imports

In [None]:
# installing packages
!pip install hyperopt
!pip install autokeras

# data management
import numpy as np                   # for linear algebra
import pandas as pd                  # for tabular data manipulation and processing
from skimage import io               # for input/output processing

# machine learning
import sklearn                       # for data prep and classical ML
import tensorflow as tf              # for deep learning
from tensorflow import keras         # for deep learning
import keras
import keras.layers as L             # for easy NN layer access

# data visualization and graphics
import matplotlib.pyplot as plt      # for visualization fundamentals
import seaborn as sns                # for pretty visualizations
import cv2                           # for image manipulation

# misc
from tqdm.notebook import tqdm       # for progress bars
import math                          # for calculation
import sys                           # for system manipulation
import os                            # for file manipulation

# meta-optimization
import hyperopt                      # for parameter optimization
import autokeras as ak               # for AutoML

---

## HyperOpt Syntax

Attempting to minimize $(x - 1)^2$:

In [None]:
plt.figure(figsize=(10, 5))
x = np.linspace(-5, 5, 100)
y = (x - 1)**2
plt.plot(x, y, color='red')
plt.scatter([1], [0], color='red')

plt.grid()
plt.show()

In [None]:
# define the search space
from hyperopt import hp
space = {'x':hp.normal('x', mu=0, sigma=10)}

# define objective function
def obj_func(params):
    return (params['x']-1)**2

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=500)

Attempting to minimize $\sin^2 x$:

In [None]:
plt.figure(figsize=(10, 5), dpi=400)
x = np.linspace(2, 4, 1000)
y = np.sin(x)**2
plt.plot(x, y, color='red')
plt.scatter([np.pi], [0], color='red')

x = np.linspace(-2, 8, 1000)
y = np.sin(x)**2
plt.plot(x, y, color='red', alpha=0.3, linestyle='--')
plt.scatter([np.pi], [0], color='red')

plt.grid()
plt.show()

In [None]:
# define the search space
from hyperopt import hp
space = {'x':hp.uniform('x', 2, 4)}

# define objective function
def obj_func(params):
    return np.sin(params['x'])**2

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=10)
print(best)

best = fmin(obj_func, space, algo=tpe.suggest, max_evals=100)
print(best)

best = fmin(obj_func, space, algo=tpe.suggest, max_evals=1000)
print(best)

Attempting to find the minimum of a very discontinuous function, $1/{x^2} + x^2$:

In [None]:
plt.figure(figsize=(10, 5), dpi=400)
x = np.linspace(-3.99214, -0.25049, 100)
y = 1/(x**2) + (x)**2
plt.plot(x, y, color='red')

x = np.linspace(0.25049, 3.99214, 100)
y = 1/(x**2) + (x)**2
plt.plot(x, y, color='red')
plt.scatter([-1, 1], [2, 2], color='red')

plt.grid()
plt.show()

In [None]:
# define the search space
from hyperopt import hp
space = {'x':hp.normal('x', mu=0, sigma=10)}

# define objective function
def obj_func(params):
    if params['x']==0:
        return {'status':'fail'}
    return {'loss':1/(params['x']**2) + params['x']**2,
            'status':'ok'}

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=500)

In [None]:
best

Finding the minimum of a 'very very discontinuous' function: $1/({\sqrt{x} - \sqrt{\sin(x^2)}})$

In [None]:
plt.figure(figsize=(10, 5), dpi=400)
x = np.linspace(0.1, 10, 10_000)
y = 1/(np.sqrt(x)) - np.sqrt(np.sin(x**2))
plt.plot(x, y, color='red')

plt.grid()
plt.show()

In [None]:
# define the search space
from hyperopt import hp
space = {'x':hp.uniform('x', -10, 10)}

# define objective function
def obj_func(params):
    result = 1/np.sqrt(params['x']) - np.sqrt(np.sin(params['x']**2))
    if result == np.nan:
        return {'status':'fail'}
    return {'loss': result, 'status':'ok'}

# perform minimization procedure
from hyperopt import fmin, tpe
best = fmin(obj_func, space, algo=tpe.suggest, max_evals=1000)

In [None]:
plt.figure(figsize=(10, 5), dpi=400)
x = np.linspace(0.1, 10, 10_000)
y = 1/(np.sqrt(x)) - np.sqrt(np.sin(x**2))
plt.plot(x, y, color='red')
plt.scatter([best['x']], 1/(np.sqrt(best['x'])) - np.sqrt(np.sin(best['x']**2)), color='black')
plt.grid()
plt.show()

## Optimizing a Model

Loading the Higgs Boson dataset.

In [None]:
import numpy as np
import pandas as pd
train_data = pd.read_csv('../input/higs-bonsons-and-background-process/train.csv').drop('id', axis=1)
X_train = train_data.drop('class', axis=1)
y_train = train_data['class']

valid_data = pd.read_csv('../input/higs-bonsons-and-background-process/test.csv').drop('id', axis=1).replace('?', np.nan).dropna()
X_valid = valid_data.drop('class', axis=1)
y_valid = valid_data['class']

Optimizing the (classical) machine learning algorithm and its parameters.

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, AdaBoostClassifier
from sklearn.neural_network import MLPClassifier

from sklearn.metrics import f1_score

from hyperopt import hp
from hyperopt import fmin, tpe

space = {}

models = [{'model': LogisticRegression,
           'parameters':{'penalty':hp.choice('lr_penalty', ['none', 'l2', 'l1', 'elasticnet'])}},
          {'model': DecisionTreeClassifier,
           'parameters': {'criterion': hp.choice('dtr_criterion', ['gini', 'entropy']),
                          'max_depth': hp.quniform('dtr_max_depth', 1, 30, 1)}},
          {'model': RandomForestClassifier,
           'parameters': {
               'criterion': hp.choice('rfr_criterion', ['gini', 'entropy']),
               'max_depth': hp.quniform('rfr_max_depth', 1, 30, q=1),
               'n_estimators': hp.qnormal('rfr_n_estimators', 100, 30, 1)}},
          {'model': GradientBoostingClassifier,
           'parameters': 
           {'criterion': hp.choice('gbr_criterion', ['friedman_mse', 
                                                     'squared_error', 
                                                     'mse', 'mae']),
           'n_estimators': hp.qnormal('gbr_n_estimators', 100, 30, 1),
           'max_depth': hp.quniform('gbr_max_depth', 1, 30, q=1)}},
          {'model': AdaBoostClassifier,
           'parameters': {
               'n_estimators': hp.qnormal('abr_n_estimators', 50, 15, 1),
               'learning_rate': hp.uniform('abr_learning_rate', 1e-3, 10)}
          },
          {'model': MLPClassifier,
           'parameters':{'activation': hp.choice('mlp_activation', ['logistic', 'tanh', 'relu'])}
          }]

space['models'] = hp.choice('models', models)

def objective(params):
    cleanedParams = {}
    for param in params['models']['parameters']:
        value = params['models']['parameters'][param]
        if param == 'n_estimators':
            if value < 1:
                value = 1
            value = int(value)
        cleanedParams[param] = value
    
    model = params['models']['model'](**cleanedParams)
    model.fit(X_train, y_train)
    return -f1_score(model.predict(X_valid), y_valid)

best = fmin(objective, space, algo=tpe.suggest, max_evals=100)

Optimizing the training parameters of a neural network.

In [None]:
from tensorflow.keras.optimizers import Adam, SGD, RMSprop, Adagrad
from sklearn.metrics import f1_score
from hyperopt import hp
from hyperopt import fmin, tpe

space = {}

optimizers = [{'optimizer':SGD,
              'parameters':{
                  'learning_rate': hp.uniform('sgd_lr', 1e-5, 1),
                  'momentum': hp.uniform('sgd_mom', 0, 1),
                  'nesterov': hp.choice('sgd_nest', [False, True])
              }},
              {'optimizer':RMSprop,
              'parameters':{
                  'learning_rate': hp.uniform('rms_lr', 1e-5, 1),
                  'momentum': hp.uniform('rms_mom', 0, 1),
                  'rho': hp.normal('rms_rho', 1.0, 0.3),
                  'centered': hp.choice('rms_cent', [False, True])
              }},
              {'optimizer':Adam,
              'parameters':{
                  'learning_rate': hp.uniform('adam_lr', 1e-5, 1),
                  'beta_1': hp.uniform('adam_beta1', 0.3, 0.9999999999),
                  'beta_2': hp.uniform('adam_beta2', 0.3, 0.9999999999),
                  'amsgrad': hp.choice('amsgrad', [False, True])
              }},
              {'optimizer':Adagrad,
              'parameters':{
                  'learning_rate': hp.uniform('adagrad_lr', 1e-5, 1),
                  'initial_accumulator_value': hp.uniform('adagrad_iav', 0.0, 1.0)
              }}]
space['optimizers'] = hp.choice('optimizers', optimizers)


from keras.callbacks import ReduceLROnPlateau
space['lr_manage'] = {'factor': hp.uniform('lr_factor', 0.01, 0.95),
                      'patience': hp.quniform('lr_patience', 3, 20, q=1)}

from keras.callbacks import EarlyStopping

bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def build_NN(input_dim = len(X_train.columns)):
    model = keras.models.Sequential()
    model.add(L.Input((input_dim,)))
    model.add(L.Dense(input_dim, activation='relu'))
    model.add(L.Dense(input_dim, activation='relu'))
    model.add(L.Dense(input_dim, activation='relu'))
    model.add(L.BatchNormalization())
    model.add(L.Dense(16, activation='relu'))
    model.add(L.Dense(16, activation='relu'))
    model.add(L.Dense(16, activation='relu'))
    model.add(L.Dense(1, activation='sigmoid'))
    return model
    
    # later - build control over how many branches, etc.

def objective(params):
    model = build_NN()
    es = EarlyStopping(patience=5)
    rlrop = ReduceLROnPlateau(**params['lr_manage'])
    
    optimizer = params['optimizers']['optimizer']
    optimizer_params = params['optimizers']['parameters']
    model.compile(loss='binary_crossentropy',
                  optimizer=optimizer(**optimizer_params))
    model.fit(X_train, y_train, callbacks=[es, rlrop],
              epochs = 50, verbose = 0)
    valid_loss = bce(model.predict(np.array(X_valid).astype(np.float16)), 
                     np.array(y_valid).reshape((len(y_valid),1)).astype(np.float16)).numpy()
    return valid_loss

best = fmin(objective, space, algo=tpe.suggest, max_evals=100);

Optimizing the architecture of a neural network.

In [None]:
def build_NN(num_branches,
             num28repeats, num16repeats,
             join_method, numOutRepeats):
    inp = L.Input((28,))
    out_tensors = []
    for i in range(int(num_branches)):
        x = L.Dense(28, activation='relu')(inp)
        for i in range(int(num28repeats-1)):
            x = L.Dense(28, activation='relu')(x)
        for i in range(int(num16repeats-1)):
            x = L.Dense(16, activation='relu')(x)
        out_tensors.append(x)
    if num_branches == 1:
        join = out_tensors[0]
    elif join_method == 'concat':
        join = L.Concatenate()(out_tensors)
    else:
        join = L.Add()(out_tensors)
    x = L.Dense(16, activation='relu')(join)
    for i in range(int(numOutRepeats-1)):
        x = L.Dense(16, activation='relu')(x)
    out = L.Dense(1, activation='sigmoid')(x)
    return keras.models.Model(inputs=inp, outputs=out)

space = {}
space['optimizers'] = hp.choice('optimizers', optimizers)
space['lr_manage'] = {'factor': hp.uniform('lr_factor', 0.01, 0.95),
                      'patience': hp.quniform('lr_patience', 3, 20, q=1)}
space['architecture'] = {'num_branches': hp.quniform('num_branches', 1, 5, q=1),
                         'num28repeats': hp.quniform('num28repeats', 1, 5, q=1),
                         'num16repeats': hp.quniform('num16repeats', 1, 5, q=1),
                         'join_method': hp.choice('join_method', ['add', 'concat']),
                         'numOutRepeats': hp.quniform('numOutRepeats', 1, 5, q=1)}

def objective(params):
    model = build_NN(**params['architecture'])
    es = EarlyStopping(patience=5)
    rlrop = ReduceLROnPlateau(**params['lr_manage'])
    
    optimizer = params['optimizers']['optimizer']
    optimizer_params = params['optimizers']['parameters']
    model.compile(loss='binary_crossentropy',
                  metrics=['accuracy'],
                  optimizer=optimizer(**optimizer_params))
    model.fit(X_train, y_train, callbacks=[es, rlrop],
              epochs = 50, verbose = 0)
    valid_loss = bce(model.predict(np.array(X_valid).astype(np.float16)), 
                     np.array(y_valid).reshape((len(y_valid),1)).astype(np.float16)).numpy()
    return valid_loss

best = fmin(objective, space, algo=tpe.suggest, max_evals=100);

best_model = build_NN(best['num_branches'],
                      best['num28repeats'],
                      best['num16repeats'],
                      space['architecture']['join_method'][best['join_method']],
                      best['numOutRepeats'])
keras.utils.plot_model(best_model, dpi=400, show_shapes=True)

Optimizing regression for a classical machine learning model and its parameters (as opposed to classification, previously shown)

In [None]:
models = [{'model': LinearRegression,
           'parameters':{}},
          {'model': Lasso,
           'parameters': {'alpha': hp.uniform('lr_alpha', 0, 5),
                          'normalize': hp.choice('lr_normalize', [True, False])}},
          {'model': DecisionTreeRegressor,
           'parameters': {'criterion': hp.choice('dtr_criterion', ['squared_error', 'friedman_mse',
                                                'absolute_error', 'poisson']),
                          'max_depth': hp.quniform('dtr_max_depth', 1, 30, 1)}},
          {'model': RandomForestRegressor,
           'parameters': {
               'criterion': hp.choice('rfr_criterion', ['squared_error', 'friedman_mse',
                                                    'absolute_error', 'poisson']),
               'max_depth': hp.quniform('rfr_max_depth', 1, 30, q=1),
               'n_estimators': hp.qnormal('rfr_n_estimators', 100, 30, 1)}},
          {'model': GradientBoostingRegressor,
           'parameters': {'criterion': hp.choice('gbr_criterion', ['squared_error', 'absolute_error',
                                                'huber', 'quantile']),
           'n_estimators': hp.qnormal('gbr_n_estimators', 100, 30, 1),
           'criterion': hp.choice('gbr_criterion', ['squared_error', 'friedman_mse',
                                                'absolute_error', 'poisson']),
           'max_depth': hp.quniform('gbr_max_depth', 1, 30, q=1)}},
          {'model': AdaBoostRegressor,
           'parameters': {'n_estimators': hp.qnormal('abr_n_estimators', 50, 15, 1),
           'loss': hp.choice('abr_loss', ['linear', 'square', 'exponential'])}},
          {'model': MLPRegressor,
           'parameters':{'activation': hp.choice('mlp_activation', ['logistic', 'tanh', 'relu'])}}]

 # build objective function
def objective(params):
    cleanedParams = {}
    for param in params['models']['parameters']:
        value = params['models']['parameters'][param]
        if param == 'n_estimators':
            value = int(value)
        cleanedParams[param] = value
    
    model = params['models']['model'](**cleanedParams)
    model.fit(X_train, y_train)
    return mae(model.predict(X_valid), y_valid)

best = fmin(objective, space, algo=tpe.suggest, max_evals=1000)
best

## Optimizing the Data Pipeline

Loading the Ames Housing dataset.

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/hjhuney/Data/master/AmesHousing/train.csv')
df = df.dropna(axis=1, how='any').drop('Id', axis=1)
x = df.drop('SalePrice', axis=1)
y = df['SalePrice']

In [None]:
plt.figure(figsize=(20, 10), dpi=400)
p = sns.displot(y, color='blue', height=8, aspect=15/8)
plt.show()

Optimizing the categorical encodings for each categorical feature.

In [None]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error as mae

from category_encoders.ordinal import OrdinalEncoder
from category_encoders.one_hot import OneHotEncoder
from category_encoders.binary import BinaryEncoder
from category_encoders.target_encoder import TargetEncoder
from category_encoders.count import CountEncoder
from category_encoders.leave_one_out import LeaveOneOutEncoder
from category_encoders.james_stein import JamesSteinEncoder
from category_encoders.cat_boost import CatBoostEncoder

space = {}
encoders = [[OrdinalEncoder(), False],
            [OneHotEncoder(), False],
            [BinaryEncoder(), True],
            [TargetEncoder(), True],
            [CountEncoder(), True],
            [LeaveOneOutEncoder(), True],
            [JamesSteinEncoder(), True],
            [CatBoostEncoder(), True]]

cat_features = []
for colIndex, colName in enumerate(x.columns):
    # find categorical variables to process
    if type(x.iloc[0, colIndex]) == str or len(x[colName].unique()) <= 5:
        cat_features.append(colName)
        space[f'{colName}_cat_enc'] = hp.choice(f'{colName}_cat_enc', encoders)
        
# build objective function
def objective(params):
    x_ = pd.DataFrame()
    for colName in cat_features:
        colValues = np.array(x[colName])
        encoder = params[f'{colName}_cat_enc'][0]
        if params[f'{colName}_cat_enc'][1]:
            transformed = encoder.fit_transform(colValues, y)
        else:
            transformed = encoder.fit_transform(colValues)
        x_ = pd.concat([x_, transformed], axis=1)
    nonCatCols = [col for col in x.columns if (col not in cat_features)]
    x_ = pd.concat([x_, x[nonCatCols]], axis=1)
    
    X_train, X_valid, y_train, y_valid = tts(x_, y, train_size = 0.8, random_state = 42)
    
    model = RandomForestRegressor(random_state = 42)
    model.fit(X_train, y_train)
    return mae(model.predict(X_valid), y_valid)

best = fmin(objective, space, algo=tpe.suggest, max_evals=1000);

Optimizing the categorical encoding for a neural network.

In [None]:
from sklearn.metrics import mean_absolute_error as mae

from category_encoders.ordinal import OrdinalEncoder
from category_encoders.one_hot import OneHotEncoder
from category_encoders.binary import BinaryEncoder
from category_encoders.target_encoder import TargetEncoder
from category_encoders.count import CountEncoder
from category_encoders.leave_one_out import LeaveOneOutEncoder
from category_encoders.james_stein import JamesSteinEncoder
from category_encoders.cat_boost import CatBoostEncoder

space = {}
encoders = [[OrdinalEncoder(), False],
            [OneHotEncoder(), False],
            [BinaryEncoder(), True],
            [TargetEncoder(), True],
            [CountEncoder(), True],
            [LeaveOneOutEncoder(), True],
            [JamesSteinEncoder(), True],
            [CatBoostEncoder(), True]]

cat_features = []
for colIndex, colName in enumerate(x.columns):
    # find categorical variables to process
    if type(x.iloc[0, colIndex]) == str or len(x[colName].unique()) <= 5:
        cat_features.append(colName)
        space[f'{colName}_cat_enc'] = hp.choice(f'{colName}_cat_enc', encoders)
        
# build objective function
def objective(params):
    x_ = pd.DataFrame()
    for colName in cat_features:
        colValues = np.array(x[colName])
        encoder = params[f'{colName}_cat_enc'][0]
        if params[f'{colName}_cat_enc'][1]:
            transformed = encoder.fit_transform(colValues, y)
        else:
            transformed = encoder.fit_transform(colValues)
        x_ = pd.concat([x_, transformed], axis=1)
    nonCatCols = [col for col in x.columns if (col not in cat_features)]
    x_ = pd.concat([x_, x[nonCatCols]], axis=1)
    
    X_train, X_valid, y_train, y_valid = tts(x_, y, train_size = 0.8, random_state = 42)
    
    model = buildRegressionNN(input_dim = len(X_train.columns))
    model.fit(X_train.astype(np.float32), y_train, epochs = 20, verbose = 0)
    return mae(model.predict(X_valid), y_valid)

def buildRegressionNN(input_dim):
    model = Sequential()
    model.add(L.Input((input_dim,)))
    for i in range(3):
        model.add(L.Dense(32, activation='relu'))
    for i in range(3):
        model.add(L.Dense(16, activation='relu'))
    model.add(L.Dense(1, activation='relu'))
    model.compile(optimizer='adam', loss='mse')
    return model
    
best = fmin(objective, space, algo=tpe.suggest, max_evals=1000);

Optimizing the model and the data pipeline.

In [None]:
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error as mae

from category_encoders.ordinal import OrdinalEncoder
from category_encoders.one_hot import OneHotEncoder
from category_encoders.binary import BinaryEncoder
from category_encoders.target_encoder import TargetEncoder
from category_encoders.count import CountEncoder
from category_encoders.leave_one_out import LeaveOneOutEncoder
from category_encoders.james_stein import JamesSteinEncoder
from category_encoders.cat_boost import CatBoostEncoder

space = {}
encoders = [[OrdinalEncoder(), False],
            [OneHotEncoder(), False],
            [BinaryEncoder(), True],
            [TargetEncoder(), True],
            [CountEncoder(), True],
            [LeaveOneOutEncoder(), True],
            [JamesSteinEncoder(), True],
            [CatBoostEncoder(), True]]

cat_features = []
for colIndex, colName in enumerate(x.columns):
    # find categorical variables to process
    if type(x.iloc[0, colIndex]) == str or len(x[colName].unique()) <= 5:
        cat_features.append(colName)
        space[f'{colName}_cat_enc'] = hp.choice(f'{colName}_cat_enc', encoders)
space['model'] = hp.choice('model',
                           [LinearRegression, Lasso,
                            DecisionTreeRegressor, RandomForestRegressor,
                            GradientBoostingRegressor, AdaBoostRegressor,
                            MLPRegressor])
        
# build objective function
def objective(params):
    x_ = pd.DataFrame()
    for colName in cat_features:
        colValues = np.array(x[colName])
        encoder = params[f'{colName}_cat_enc'][0]
        if params[f'{colName}_cat_enc'][1]:
            transformed = encoder.fit_transform(colValues, y)
        else:
            transformed = encoder.fit_transform(colValues)
        x_ = pd.concat([x_, transformed], axis=1)
    nonCatCols = [col for col in x.columns if (col not in cat_features)]
    x_ = pd.concat([x_, x[nonCatCols]], axis=1)
    
    X_train, X_valid, y_train, y_valid = tts(x_, y, train_size = 0.8, random_state = 42)
    
    model = params['model']()
    model.fit(X_train, y_train)
    return mae(model.predict(X_valid), y_valid)

# best = fmin(objective, space, algo=tpe.suggest, max_evals=1000);

Optimizing everything - the encoding mechanisms, the machine learning model, the machine learning model parameters

In [None]:
from sklearn.linear_model import LinearRegression, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, AdaBoostRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_absolute_error as mae

from category_encoders.ordinal import OrdinalEncoder
from category_encoders.one_hot import OneHotEncoder
from category_encoders.binary import BinaryEncoder
from category_encoders.target_encoder import TargetEncoder
from category_encoders.count import CountEncoder
from category_encoders.leave_one_out import LeaveOneOutEncoder
from category_encoders.james_stein import JamesSteinEncoder
from category_encoders.cat_boost import CatBoostEncoder

space = {}
encoders = [[OrdinalEncoder(), False],
            [OneHotEncoder(), False],
            [BinaryEncoder(), True],
            [TargetEncoder(), True],
            [CountEncoder(), True],
            [LeaveOneOutEncoder(), True],
            [JamesSteinEncoder(), True],
            [CatBoostEncoder(), True]]

models = [{'model': LinearRegression,
           'parameters':{}},
          {'model': Lasso,
           'parameters': {'alpha': hp.uniform('lr_alpha', 0, 5),
                          'normalize': hp.choice('lr_normalize', [True, False])}},
          {'model': DecisionTreeRegressor,
           'parameters': {'criterion': hp.choice('dtr_criterion', ['squared_error', 'friedman_mse',
                                                'absolute_error', 'poisson']),
                          'max_depth': hp.quniform('dtr_max_depth', 1, 30, 1)}},
          {'model': RandomForestRegressor,
           'parameters': {
               'criterion': hp.choice('rfr_criterion', ['squared_error', 'friedman_mse',
                                                    'absolute_error', 'poisson']),
               'max_depth': hp.quniform('rfr_max_depth', 1, 30, q=1),
               'n_estimators': hp.qnormal('rfr_n_estimators', 100, 30, 1)}},
          {'model': GradientBoostingRegressor,
           'parameters': {'criterion': hp.choice('gbr_criterion', ['squared_error', 'absolute_error',
                                                'huber', 'quantile']),
           'n_estimators': hp.qnormal('gbr_n_estimators', 100, 30, 1),
           'criterion': hp.choice('gbr_criterion', ['squared_error', 'friedman_mse',
                                                'absolute_error', 'poisson']),
           'max_depth': hp.quniform('gbr_max_depth', 1, 30, q=1)}},
          {'model': AdaBoostRegressor,
           'parameters': {'n_estimators': hp.qnormal('abr_n_estimators', 50, 15, 1),
           'loss': hp.choice('abr_loss', ['linear', 'square', 'exponential'])}},
          {'model': MLPRegressor,
           'parameters':{'activation': hp.choice('mlp_activation', ['logistic', 'tanh', 'relu'])}}]

cat_features = []
for colIndex, colName in enumerate(x.columns):
    # find categorical variables to process
    if type(x.iloc[0, colIndex]) == str or len(x[colName].unique()) <= 5:
        cat_features.append(colName)
        space[f'{colName}_cat_enc'] = hp.choice(f'{colName}_cat_enc', encoders)
space['models'] = hp.choice('models', models)

# build objective function
def objective(params):
    x_ = pd.DataFrame()
    for colName in cat_features:
        colValues = np.array(x[colName])
        encoder = params[f'{colName}_cat_enc'][0]
        if params[f'{colName}_cat_enc'][1]:
            transformed = encoder.fit_transform(colValues, y)
        else:
            transformed = encoder.fit_transform(colValues)
        x_ = pd.concat([x_, transformed], axis=1)
    nonCatCols = [col for col in x.columns if (col not in cat_features)]
    x_ = pd.concat([x_, x[nonCatCols]], axis=1)
    
    X_train, X_valid, y_train, y_valid = tts(x_, y, train_size = 0.8, random_state = 42)
    
    cleanedParams = {}
    for param in params['models']['parameters']:
        value = params['models']['parameters'][param]
        if param == 'n_estimators':
            if value < 1:
                value = 1
            value = int(value)
        cleanedParams[param] = value
    
    model = params['models']['model'](**cleanedParams)
    model.fit(X_train, y_train)
    return mae(model.predict(X_valid), y_valid)

best = fmin(objective, space, algo=tpe.suggest, max_evals=500)

## AutoKeras

In [None]:
import autokeras as ak
input_node = ak.StructuredDataInput()
output_node = ak.StructuredDataBlock(categorical_encoding=True)(input_node)
output_node = ak.ClassificationHead()(output_node)
clf = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=100
)
clf.fit(X_train, y_train, epochs=100) #, verbose=1)
keras.utils.plot_model(clf.export_model(), show_shapes=True, dpi=400)
model = clf.export_model()
bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
bce(model.predict(np.array(X_valid).astype(np.float16)), 
                  np.array(y_valid).reshape((len(y_valid),1)).astype(np.float16)).numpy()
df = pd.read_csv('https://raw.githubusercontent.com/hjhuney/Data/master/AmesHousing/train.csv')
df = df.dropna(axis=1, how='any').drop('Id', axis=1)
x = df.drop('SalePrice', axis=1)
y = df['SalePrice']

# X_train, X_valid, y_train, y_valid = tts(x, y, train_size = 0.8, random_state = 42)

input_node = ak.StructuredDataInput()
output_node = ak.StructuredDataBlock(categorical_encoding=True)(input_node)
output_node = ak.RegressionHead()(output_node)
clf = ak.AutoModel(
    inputs=input_node, outputs=output_node, overwrite=True, max_trials=100
)
clf.fit(x, y, epochs=100)
keras.utils.plot_model(clf.export_model(), show_shapes=True, dpi=400)