In [1]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

### DataSource

In [2]:
class DataSource:
    
    def __init__(self, filename):
        self.df = pd.read_csv(filename)
        
    def data_load_split(self, target=None, ignore=None):
        self.target = target
        self.ignore = ignore
        self.inputs = sorted(set(self.df.columns) - set(self.target) - set(self.ignore))
        
        self.X = self.df[self.inputs]
        self.y = self.df[self.target]
        
        return self.X, self.y
    
    def define_problem(self):
        if self.y.dtypes[0] in ['int64', 'float64'] and self.y.nunique()[0] == 2:
            self.problem = "Binary"
        elif self.y.dtypes[0] in ['object', 'bool']:
            self.problem = "Classification"
        else:
            self.problem = "Regression"
    
        return self.problem
    
    def data_preprocess(self, X, y, problem="Regression"):

        # Data type detection
        numerical_ix = self.X.select_dtypes(include=['int64', 'float64']).columns
        categorical_ix = self.X.select_dtypes(include=['object', 'bool']).columns

        # Data transform
        num_transform = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='mean')),
            ('scaler', StandardScaler())
        ])
        cat_transform = Pipeline(steps=[
            ('imputer', SimpleImputer(strategy='constant', fill_value="Missing")),
            ('oh_encoder', OneHotEncoder(sparse=False))
        ])

        transform_x = ColumnTransformer(transformers=[
            ('num', num_transform, numerical_ix),
            ('cat', cat_transform, categorical_ix)
        ])
        
        if problem == "Regression" or "Binary":
            transform_y = ColumnTransformer(transformers=[
                ('num', Normalizer(), y.columns)
            ])
        else:
            transform_y = ColumnTransformer(transformers=[
                ('cat', cat_transform, y.columns)
            ])
            
        self.trans_X = transform_x.fit_transform(self.X)
        self.trans_y = transform_y.fit_transform(self.y)

        return self.trans_X, self.trans_y
    
    def train_val_split(self, X, y, ratio=0.2, random_state=42):
        return train_test_split(X, y, test_size=ratio, random_state=random_state)

In [3]:
filename="./data/Churn_Modelling.csv"
ds = DataSource(filename)

In [4]:
ds.df.columns

Index(['RowNumber', 'CustomerId', 'Surname', 'CreditScore', 'Geography',
       'Gender', 'Age', 'Tenure', 'Balance', 'NumOfProducts', 'HasCrCard',
       'IsActiveMember', 'EstimatedSalary', 'Exited'],
      dtype='object')

In [5]:
X, y = ds.data_load_split(target=['Exited'], 
                          ignore=["RowNumber", "CustomerId", "Surname"])

problem = ds.define_problem()
trans_X, trans_y = ds.data_preprocess(X, y, problem=problem)
X_train, X_val, y_train, y_val = ds.train_val_split(trans_X, trans_y, ratio=0.2, random_state=42)

### MLP

In [6]:
class MLP:
    
    def __init__(self, problem="Regression"):
        self.problem = problem
        tf.random.set_seed(42)
    def build_structure(self, max_hidden_layers=1, units=[16], use_all=False):
        self.structures = []
        self.structures_info = []
        if use_all:
            self.max_hidden_layers = 3
            self.units = [16, 32, 64, 128, 256]
        else:
            self.max_hidden_layers = max_hidden_layers
            self.units = units
    
        grid = [np.arange(self.max_hidden_layers)+1, self.units]
        for param_tuple in itertools.product(*grid):
            structure_param = {'hidden_layers': param_tuple[0],
                                'units': param_tuple[1]}

            # input layer
            model = keras.Sequential()
            model.add(keras.layers.Dense(16, input_shape=(trans_X.shape[1],)))

            # hidden layer block
            for _ in range(structure_param['hidden_layers']):
                model.add(keras.layers.Dense(structure_param['units'], activation='relu'))

            # output layer
            if problem == 'Regression':
                model.add(keras.layers.Dense(1))
            elif problem == 'Binary':
                model.add(keras.layers.Dense(1, activation='sigmoid'))
            else:
                model.add(keras.layers.Dense(trans_y.shape[1], activation='softmax'))

            self.structures.append(model)
            self.structures_info.append(structure_param)

        return self.structures, self.structures_info
    
    def create_optimizer(self, optimizers=['adam'], lrs=[0.01], use_all=False):
        self.created_optimizers = []
        self.optimizers_info = []
        self.optimizers = optimizers
        self.lrs = lrs

        self.optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                                  'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                                  'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}

        if use_all:
            self.lrs = [0.001, 0.01, 0.02, 0.1]
            opt_grid = [self.optimizer_classes.keys(), self.lrs]    
        else:
            opt_grid = [self.optimizers, self.lrs]

        for opt_tuple in itertools.product(*opt_grid):
            opt_param = {
                'optimizer_name': opt_tuple[0],
                'lr': opt_tuple[1]
            }

            opt_class = self.optimizer_classes.get(opt_param['optimizer_name'])
            self.created_optimizers.append(opt_class(opt_param['lr']))
            self.optimizers_info.append(opt_param)

        return self.created_optimizers, self.optimizers_info
    
    def _compile_model(self):
        if self.problem == "Regression":
            self.loss = keras.losses.MSE
            self.metrics = ['MSE', 'MAE']
        elif self.problem == "Binary":
            self.loss = keras.losses.binary_crossentropy
            self.metrics = ['accuracy']
        else:
            self.loss = keras.losses.categorical_crossentropy
            self.metrics = ['accuracy']

        self.compiled_models = []
        self.compiled_models_info = []

        compile_grid = [zip(self.structures, self.structures_info), zip(self.created_optimizers ,self.optimizers_info)]
        for compile_tuple in itertools.product(*compile_grid):
            compile_param = {'model': compile_tuple[0][0],
                             'optimizer': compile_tuple[1][0]}
            model_info = {'structure_info': compile_tuple[0][1],
                           'optimizer_info': compile_tuple[1][1]}

            model_body = compile_param['model']
            model = keras.models.clone_model(model_body)
            model.compile(optimizer=compile_param['optimizer'],
                               loss=self.loss,
                               metrics=self.metrics)

            self.compiled_models.append(model)
            self.compiled_models_info.append(model_info)

        return self.compiled_models, self.compiled_models_info
    
    def train_models(self, models, X_train, y_train, X_val=None, y_val=None,
                     batch_size=None, epochs=1, verbose=0, callbacks=None,
                     shuffle=True, steps_per_epoch=None):
        self.X_train = X_train
        self.y_train = y_train
        self.X_val = X_val
        self.y_val = y_val

        self.trained_models = []
        self.val_losses = []
        for model in models:
            model.fit(x=self.X_train, y=self.y_train,
                      batch_size=batch_size, epochs=epochs,
                      verbose=verbose, callbacks=callbacks,
                      validation_data=(self.X_val, self.y_val), shuffle=shuffle)
            
            val_loss = model.evaluate(self.X_val, self.y_val, verbose=0)
            self.trained_models.append(model)
            self.val_losses.append(val_loss[0])
            print("{} model is trained. best val loss is : {}".format(model.name, val_loss))
            
        return self.trained_models
    
    
def select_best_model(trained_models, val_losses, models_info):
    best_idx = np.argmin(val_losses)
    best_model = trained_models[best_idx]
    best_model_info = compiled_models_info[best_idx]
    return best_model, best_model_info

In [7]:
mlp = MLP(problem=problem)
structures, structures_info = mlp.build_structure(max_hidden_layers=2, units=[16, 32], use_all=True)
created_optimizers, optimizers_info = mlp.create_optimizer(optimizers=['adam', 'adadelta'], lrs=[0.1, 0.2], use_all=True)
compiled_models, compiled_models_info = mlp._compile_model()

callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

trained_models = mlp.train_models(compiled_models,
                                  X_train, y_train, X_val, y_val,
                                  batch_size=64, epochs=10,
                                  callbacks=[callback])

sequential model is trained. best val loss is : [0.5693475580178201, 0.7425]
sequential model is trained. best val loss is : [0.6567623415663838, 0.607]
sequential model is trained. best val loss is : [0.6122007313370704, 0.689]
sequential model is trained. best val loss is : [0.42731234400346874, 0.8165]
sequential model is trained. best val loss is : [0.4787380304224789, 0.797]
sequential model is trained. best val loss is : [0.3955547634640243, 0.8315]
sequential model is trained. best val loss is : [0.3741130788696464, 0.845]
sequential model is trained. best val loss is : [0.3394285263120528, 0.866]
sequential model is trained. best val loss is : [0.34077215121585325, 0.8615]
sequential model is trained. best val loss is : [0.3384260125242399, 0.8575]
sequential model is trained. best val loss is : [0.336495436080595, 0.866]
sequential model is trained. best val loss is : [0.37371558615531963, 0.8225]
sequential model is trained. best val loss is : [0.546890786729753, 0.7995]
sequ

sequential_4 model is trained. best val loss is : [0.33474322826839437, 0.863]
sequential_4 model is trained. best val loss is : [0.3379878114989906, 0.859]
sequential_4 model is trained. best val loss is : [0.4955947651863098, 0.8035]
sequential_4 model is trained. best val loss is : [0.49688056847080586, 0.8035]
sequential_4 model is trained. best val loss is : [0.3612929663255345, 0.849]
sequential_4 model is trained. best val loss is : [0.34114012358198303, 0.866]
sequential_4 model is trained. best val loss is : [0.33815911610440025, 0.8625]
sequential_4 model is trained. best val loss is : [0.34098202369605135, 0.8625]
sequential_4 model is trained. best val loss is : [0.3342560724648799, 0.8655]
sequential_4 model is trained. best val loss is : [0.3346465979677642, 0.8605]
sequential_4 model is trained. best val loss is : [0.3339990663089758, 0.857]
sequential_4 model is trained. best val loss is : [0.33732331546855676, 0.87]
sequential_4 model is trained. best val loss is : [0.

sequential_8 model is trained. best val loss is : [0.3349049675294898, 0.863]
sequential_8 model is trained. best val loss is : [0.33327998079063764, 0.863]
sequential_8 model is trained. best val loss is : [0.34348898149806584, 0.8575]
sequential_8 model is trained. best val loss is : [0.3350608881990445, 0.8675]
sequential_8 model is trained. best val loss is : [0.34419178303200587, 0.86]
sequential_8 model is trained. best val loss is : [0.34927433562754934, 0.854]
sequential_8 model is trained. best val loss is : [0.4955255789756775, 0.8035]
sequential_9 model is trained. best val loss is : [0.6046169226020575, 0.806]
sequential_9 model is trained. best val loss is : [0.4439584925090894, 0.805]
sequential_9 model is trained. best val loss is : [0.4142082126932219, 0.822]
sequential_9 model is trained. best val loss is : [0.34796829730132595, 0.8595]
sequential_9 model is trained. best val loss is : [0.49973608503118155, 0.8035]
sequential_9 model is trained. best val loss is : [0.3

sequential_13 model is trained. best val loss is : [0.6247547112405301, 0.795]
sequential_13 model is trained. best val loss is : [0.47757517144456507, 0.8035]
sequential_13 model is trained. best val loss is : [0.42503948737867175, 0.809]
sequential_13 model is trained. best val loss is : [0.3422838659810368, 0.8595]
sequential_13 model is trained. best val loss is : [0.49327501755580305, 0.8035]
sequential_13 model is trained. best val loss is : [0.3905889719584957, 0.834]
sequential_13 model is trained. best val loss is : [0.37167109713237734, 0.848]
sequential_13 model is trained. best val loss is : [0.33633737243885115, 0.865]
sequential_13 model is trained. best val loss is : [0.33611926267556735, 0.8595]
sequential_13 model is trained. best val loss is : [0.3444029004887343, 0.855]
sequential_13 model is trained. best val loss is : [0.45856970714777706, 0.8035]
sequential_13 model is trained. best val loss is : [0.49573832387477157, 0.8035]
sequential_13 model is trained. best v

In [8]:
for i in range(len(trained_models)):
    print(trained_models[i].optimizer)

<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x1165612d0>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x13cacedd0>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x13caced50>
<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta object at 0x13cacefd0>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x13cacee90>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x13cad3cd0>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x13cad3d10>
<tensorflow.python.keras.optimizer_v2.gradient_descent.SGD object at 0x13cad3d90>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x13cad3ed0>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x13cad3d50>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x13cad3410>
<tensorflow.python.keras.optimizer_v2.adam.Adam object at 0x13cad3f10>
<tensorflow.python.keras.optimizer_v2.adagrad.Adagrad object at 0x13cad3

In [9]:
best_model, best_model_info = select_best_model(trained_models, mlp.val_losses, compiled_models_info)

In [10]:
best_model.evaluate(X_val, y_val)



[0.3288666603593244, 0.866]

In [11]:
best_model_info

{'structure_info': {'hidden_layers': 1, 'units': 64},
 'optimizer_info': {'optimizer_name': 'adam', 'lr': 0.01}}