In [2]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf

from tensorflow import keras
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, OneHotEncoder, StandardScaler

### DataLoader

In [29]:
def DataPreprocessor(source,
                     target=None,
                     ignore=None,
                     random_state=42):
    
    df = pd.read_csv(source)
    
    target = target
    ignore = ignore

    inputs = sorted(set(df.columns) - set(target) - set(ignore))

    # Data type detection
    numerical_ix = df[inputs].select_dtypes(include=['int64', 'float64']).columns
    categorical_ix = df[inputs].select_dtypes(include=['object', 'bool']).columns
    
    # Data transforms
    cat_transform = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='constant', fill_value='Missing')),
        ('oh_encoder', OneHotEncoder(sparse=False))
    ])
    num_transform = Pipeline(steps=[
        ('imputer', SimpleImputer(strategy='mean')),
        ('scaler', StandardScaler())
    ])
    
    transform_x = ColumnTransformer(transformers=[
        ('cat', cat_transform, categorical_ix),
        ('num', num_transform, numerical_ix)
    ])
    transform_y = ColumnTransformer(transformers=[
        ('num', Normalizer(), pd.Index(target))
    ])
    
    trans_x = transform_x.fit_transform(df[inputs])
    trans_y = transform_y.fit_transform(df)
    
    return train_test_split(trans_x, trans_y, test_size=0.2, random_state=random_state)

In [30]:
source = "../../data/titanic/titanic_train.csv"
target = ["Survived"]
ignore = ["Name", "Cabin", "Ticket"]

DataPreprocessor(source, target=target, ignore=ignore)

[array([[ 0.        ,  0.        ,  1.        , ..., -0.4852419 ,
         -0.51633742,  0.38117247],
        [ 0.        ,  1.        ,  0.        , ..., -0.4852419 ,
         -0.40120865, -0.81792695],
        [ 0.        ,  0.        ,  1.        , ..., -0.4852419 ,
         -0.47066651,  0.03744813],
        ...,
        [ 0.        ,  0.        ,  1.        , ..., -0.4852419 ,
         -0.49778361,  1.21030905],
        [ 0.        ,  0.        ,  1.        , ...,  0.44524944,
         -0.34221705,  1.54878569],
        [ 0.        ,  1.        ,  0.        , ..., -0.4852419 ,
         -0.42975298, -0.71822065]]),
 array([[ 0.00000000e+00,  0.00000000e+00,  1.00000000e+00, ...,
          1.37574077e+00, -2.82116031e-01,  7.14228490e-04],
        [ 0.00000000e+00,  0.00000000e+00,  1.00000000e+00, ...,
         -4.85241901e-01, -2.91075143e-01, -1.14735168e-01],
        [ 0.00000000e+00,  0.00000000e+00,  1.00000000e+00, ...,
         -4.85241901e-01, -4.99130904e-01,  1.63274888e+

### MLP Model

In [None]:
# mlp_params

In [None]:
class MLP(tf.keras.Model):
    
    def __init__(self,
                 target=None,
                 features=None,
                 optimizers='Adadelta',
                 learning_rate=0.01):
        target = target
        features = features
        
        super(MLP, self).__init__()
        self.