In [1]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, OneHotEncoder, StandardScaler

### DataSource

In [None]:
# class DataSource:        
    
#     def data_preprocess():

#     def data_load_split():

In [2]:
def data_load_split(filename, target=None, ignore=None, problem="Binary"):
    """
    Args:
        filename: from where to load
        target: list
        ignore: list
        
    Returns:
        X: inputs DataFrame
        y: target DataFrame
    """
    df = pd.read_csv(filename)
    
    target = target
    ignore = ignore
    inputs = sorted(set(df.columns) - set(target) - set(ignore))
    
    return df[inputs], df[target]

In [3]:
filename = "./data/titanic_train.csv"
target = ["Survived"]
ignore = ["Name", "Cabin", "Ticket"]

X, y = data_load_split(filename, target, ignore)

In [229]:
def set_problem(y, problem="Regression"):
    if problem == "Regression" or "Binary":
        y_copy = y.astype('float64')
    elif problem == "Classification":
        y_copy = y.astype('object')
    else:
        raise("E: Problem is not set")
        
    return y_copy

In [4]:
def define_problem(y):
    if y.dtypes[0] in ['int64', 'float64'] and y.nunique()[0] == 2:
        problem = "Binary"
    elif y.dtypes[0] in ['object', 'bool']:
        problem = "Classification"
    else:
        problem = "Regression"
    
    return problem

In [9]:
def data_preprocess(X, y, problem="Regression"):
    
    # Data type detection
    numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
    categorical_ix = X.select_dtypes(include=['object', 'bool']).columns
    
    # Data transform
    num_transform = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    cat_transform = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value="Missing")),
        ('oh_encoder', OneHotEncoder(sparse=False))
    ])
    
    transform_x = ColumnTransformer(transformers=[
        ('num', num_transform, numerical_ix),
        ('cat', cat_transform, categorical_ix)
    ])
    if problem == "Regression" or "Binary":
        transform_y = ColumnTransformer(transformers=[
            ('num', Normalizer(), y.columns)
        ])
    else:
        transform_y = ColumnTransformer(transformers=[
            ('cat', cat_transform, y.columns)
        ])
        
    return transform_x.fit_transform(X), transform_y.fit_transform(y)

In [10]:
trans_X, trans_y = data_preprocess(X, y, problem="Regression")

In [11]:
trans_X, trans_y

(array([[ 0.        , -0.46097065, -0.4852419 , ...,  0.        ,
          0.        ,  0.        ],
        [-0.21422328, -0.46097065, -0.4852419 , ...,  0.        ,
          0.        ,  0.        ],
        [-0.06201858, -0.46097065, -0.4852419 , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 0.        , -0.46097065,  0.44524944, ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  1.78908189,  6.9586888 , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        , -0.46097065, -0.4852419 , ...,  0.        ,
          0.        ,  0.        ]]), array([[0.],
        [0.],
        [1.],
        ...,
        [1.],
        [0.],
        [0.]]))

### MLP

In [None]:
# class MLP:
    
#     def __init__():
        
#     def network_frame():
        
#     def build_block():
        
#     def auto_fit():
        
#     def get_param_grid(problem):

In [130]:
def build_model(trans_X, trans_y, hidden_layers=1, units=[16], problem='Regression'):
    
    models = []
    structure_params = []
    
    grid = [np.arange(hidden_layers)+1, units]
    for param_tuple in itertools.product(*grid):
        structure_param = {'hidden_layers': param_tuple[0],
                            'units': param_tuple[1]}
        
        # input layer
        model = keras.Sequential()
        model.add(keras.layers.Dense(16, input_shape=(trans_X.shape[1],)))
        
        # hidden layer block
        for _ in range(structure_param['hidden_layers']):
            model.add(keras.layers.Dense(structure_param['units'], activation='relu'))
        
        # output layer
        if problem == 'Regression':
            model.add(keras.layers.Dense(1))
        elif problem == 'Binary':
            model.add(keras.layers.Dense(1, activation='sigmoid'))
        else:
            model.add(keras.layers.Dense(trans_y.shape[1], activation='softmax'))
        models.append(model)
        structure_params.append(structure_param)
    
    return models, structure_params

In [169]:
structured_models, model_structures = build_model(trans_X, trans_y, hidden_layers=2, units=[16, 32], problem='Binary')

In [170]:
structured_models

[<tensorflow.python.keras.engine.sequential.Sequential at 0x140fa4190>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x13c46c590>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141031f50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141047c50>]

In [171]:
model_structures

[{'hidden_layers': 1, 'units': 16},
 {'hidden_layers': 1, 'units': 32},
 {'hidden_layers': 2, 'units': 16},
 {'hidden_layers': 2, 'units': 32}]

In [150]:
def create_optimizer(optimizers=['adam'], lrs=[0.01], auto=False):
    created_optimizers = []
    optimizer_informations = []
    
    optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                         'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                         'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}
    
    if auto:
        lrs = [0.001, 0.01, 0.02, 0.1]
        opt_grid = [optimizer_classes.keys(), lrs]    
    else:
        opt_grid = [optimizers, lrs]
        
    for opt_tuple in itertools.product(*opt_grid):
        opt_param = {
            'optimizer_name': opt_tuple[0],
            'lr': opt_tuple[1]
        }
        
        opt_class = optimizer_classes.get(opt_param['optimizer_name'])
        created_optimizers.append(opt_class(opt_param['lr']))
        optimizer_informations.append(opt_param)
        
    return created_optimizers, optimizer_informations

In [165]:
created_optimizers, optimizer_informations = create_optimizer(auto=True)

In [166]:
optimizer_informations

[{'optimizer_name': 'adadelta', 'lr': 0.001},
 {'optimizer_name': 'adadelta', 'lr': 0.01},
 {'optimizer_name': 'adadelta', 'lr': 0.02},
 {'optimizer_name': 'adadelta', 'lr': 0.1},
 {'optimizer_name': 'sgd', 'lr': 0.001},
 {'optimizer_name': 'sgd', 'lr': 0.01},
 {'optimizer_name': 'sgd', 'lr': 0.02},
 {'optimizer_name': 'sgd', 'lr': 0.1},
 {'optimizer_name': 'adam', 'lr': 0.001},
 {'optimizer_name': 'adam', 'lr': 0.01},
 {'optimizer_name': 'adam', 'lr': 0.02},
 {'optimizer_name': 'adam', 'lr': 0.1},
 {'optimizer_name': 'adagrad', 'lr': 0.001},
 {'optimizer_name': 'adagrad', 'lr': 0.01},
 {'optimizer_name': 'adagrad', 'lr': 0.02},
 {'optimizer_name': 'adagrad', 'lr': 0.1},
 {'optimizer_name': 'adamax', 'lr': 0.001},
 {'optimizer_name': 'adamax', 'lr': 0.01},
 {'optimizer_name': 'adamax', 'lr': 0.02},
 {'optimizer_name': 'adamax', 'lr': 0.1},
 {'optimizer_name': 'rmsprop', 'lr': 0.001},
 {'optimizer_name': 'rmsprop', 'lr': 0.01},
 {'optimizer_name': 'rmsprop', 'lr': 0.02},
 {'optimizer_na

In [167]:
created_optimizers, optimizer_informations = create_optimizer(optimizers=['adam', 'adadelta'], lrs=[0.01, 0.02])

In [168]:
created_optimizers, optimizer_informations

([<tensorflow.python.keras.optimizer_v2.adam.Adam at 0x140fa41d0>,
  <tensorflow.python.keras.optimizer_v2.adam.Adam at 0x140fa4210>,
  <tensorflow.python.keras.optimizer_v2.adadelta.Adadelta at 0x140fa4290>,
  <tensorflow.python.keras.optimizer_v2.adadelta.Adadelta at 0x140fa42d0>],
 [{'optimizer_name': 'adam', 'lr': 0.01},
  {'optimizer_name': 'adam', 'lr': 0.02},
  {'optimizer_name': 'adadelta', 'lr': 0.01},
  {'optimizer_name': 'adadelta', 'lr': 0.02}])

In [215]:
def _compile_model(structured_models, model_structures, created_optimizers, optimizer_informations, problem='Regression'):
    
    # compile network
    if problem == "Regression":
        loss = keras.losses.MSE
        metrics = ['MSE', 'MAE']
    elif problem == "Binary":
        loss = keras.losses.binary_crossentropy
        metrics = ['accuracy']
    else:
        loss = keras.losses.categorical_crossentropy
        metrics = ['accuracy']
    
    compiled_models = []
    compiled_model_informations = []
    
    compile_grid = [zip(structured_models, model_structures), zip(created_optimizers ,optimizer_informations)]
    for compile_tuple in itertools.product(*compile_grid):
        compile_param = {'model': compile_tuple[0][0],
                         'optimizer': compile_tuple[1][0]}
        model_info = {'structure_info': compile_tuple[0][1],
                       'optimizer_info': compile_tuple[1][1]}
        
        model = compile_param['model']
        model.compile(optimizer=compile_param['optimizer'],
                      loss=loss,
                      metrics=metrics)
        
        compiled_models.append(model)
        compiled_model_informations.append(model_info)
    
    return compiled_models, compiled_model_informations

In [216]:
compiled_models, compiled_model_informations = _compile_model(structured_models, model_structures, created_optimizers, optimizer_informations, problem='Binary')

In [217]:
compiled_models

[<tensorflow.python.keras.engine.sequential.Sequential at 0x140fa4190>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x140fa4190>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x140fa4190>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x140fa4190>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x13c46c590>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x13c46c590>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x13c46c590>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x13c46c590>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141031f50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141031f50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141031f50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141031f50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x141047c50>,
 <tensorflow.python.keras.engine.sequential.Sequential at 0x1410

In [218]:
compiled_model_informations

[{'structure_info': {'hidden_layers': 1, 'units': 16},
  'optimizer_info': {'optimizer_name': 'adam', 'lr': 0.01}},
 {'structure_info': {'hidden_layers': 1, 'units': 16},
  'optimizer_info': {'optimizer_name': 'adam', 'lr': 0.02}},
 {'structure_info': {'hidden_layers': 1, 'units': 16},
  'optimizer_info': {'optimizer_name': 'adadelta', 'lr': 0.01}},
 {'structure_info': {'hidden_layers': 1, 'units': 16},
  'optimizer_info': {'optimizer_name': 'adadelta', 'lr': 0.02}},
 {'structure_info': {'hidden_layers': 1, 'units': 32},
  'optimizer_info': {'optimizer_name': 'adam', 'lr': 0.01}},
 {'structure_info': {'hidden_layers': 1, 'units': 32},
  'optimizer_info': {'optimizer_name': 'adam', 'lr': 0.02}},
 {'structure_info': {'hidden_layers': 1, 'units': 32},
  'optimizer_info': {'optimizer_name': 'adadelta', 'lr': 0.01}},
 {'structure_info': {'hidden_layers': 1, 'units': 32},
  'optimizer_info': {'optimizer_name': 'adadelta', 'lr': 0.02}},
 {'structure_info': {'hidden_layers': 2, 'units': 16},
 