In [1]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import Normalizer, OneHotEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
# import random

# seed_num = 42
# np.random.seed(seed_num)
# random.seed(seed_num)
# tf.random.set_seed(seed_num)

# from tensorflow.keras import backend as K

# session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
#                                         inter_op_parallelism_threads=1)
# sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
# K.session_conf(sess)

### DataSource

In [2]:
class DataSource:
    
    def __init__(self, filename):
        self.df = pd.read_csv(filename)
    
    
    def data_load_split(self, target=None, ignore=None):
        self.target = target
        self.ignore = ignore
        self.inputs = sorted(set(self.df.columns) - set(self.target) - set(self.ignore))
        
        self.X = self.df[self.inputs]
        self.y = self.df[self.target]


    def define_problem(self):
        if self.y.dtypes[0] in ['int64', 'float64'] and self.y.nunique()[0] == 2:
            self.problem = "Binary"
        elif self.y.dtypes[0] in ['object', 'bool']:
            self.problem = "Classification"
        else:
            self.problem = "Regression"


    def train_val_split(self, ratio=0.2, random_state=42):
        self.X_train, self.X_val, self.y_train, self.y_val = train_test_split(self.X, self.y,
                                                                              test_size=ratio,
                                                                              random_state=random_state)

    
    def data_preprocess(self, X, y, train_set=True):
        if train_set:
            # Data type detection
            numerical_ix = X.select_dtypes(include=['int64', 'float64']).columns
            categorical_ix = X.select_dtypes(include=['object', 'bool']).columns

            # Data transform
            num_transform = Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='mean')),
                ('scaler', StandardScaler())
            ])
            cat_transform = Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='constant', fill_value="Missing")),
                ('oh_encoder', OneHotEncoder(sparse=False, handle_unknown='ignore'))
            ])

            self.transform_x = ColumnTransformer(transformers=[
                ('num', num_transform, numerical_ix),
                ('cat', cat_transform, categorical_ix)
            ])
            self.trans_X_train = self.transform_x.fit_transform(X)
            
            if self.problem == "Classification":
                self.transform_y = ColumnTransformer(transformers=[
                    ('cat', cat_transform, y.columns)
                ])
            else:
                self.trans_y_train = y
        else:
            self.trans_X_val = self.transform_x.transform(X)
            if self.problem == "Classification":
                self.trans_y_val = self.transform_y.transform(y)
            else:
                self.trans_y_val = y
                
                
filename="./data/titanic_train.csv"
ds = DataSource(filename)

ds.data_load_split(target=['Survived'], 
                   ignore=["Name", "Cabin", "Ticket"])
ds.define_problem()
ds.train_val_split(ratio=0.2, random_state=42)
ds.data_preprocess(ds.X_train, ds.y_train, train_set=True)
ds.data_preprocess(ds.X_val, ds.y_val, train_set=False)

### MLP

In [None]:
keras.initializers.glorot_uniform(seed=42)

In [47]:
class MLP:
    
    def __init__(self, X, y, problem="Regression"):
        tf.random.set_seed(42)
        self.problem = problem
        self.X = X
        self.y = y


    def build_structure(self, max_hidden_layers=1, units=[16], use_all=False):
        self.structures = []
        self.structures_info = []
        if use_all:
            self.max_hidden_layers = 3
            self.units = [16, 32, 64, 128, 256]
        else:
            self.max_hidden_layers = max_hidden_layers
            self.units = units
    
        grid = [np.arange(self.max_hidden_layers)+1, self.units]
        for param_tuple in itertools.product(*grid):
            structure_param = {'hidden_layers': param_tuple[0],
                                'units': param_tuple[1]}

            # input layer
            model = keras.Sequential()
            model.add(keras.layers.Dense(16, input_shape=(self.X.shape[1],)))

            # hidden layer block
            for _ in range(structure_param['hidden_layers']):
                tf.random.set_seed(42)
                model.add(keras.layers.Dense(structure_param['units'], activation='relu',
                                            kernel_initializer="normal"))
                tf.random.set_seed(42)

            # output layer
            if self.problem == 'Regression':
                model.add(keras.layers.Dense(1))
            elif self.problem == 'Binary':
                model.add(keras.layers.Dense(1, activation='sigmoid'))
            else:
                model.add(keras.layers.Dense(self.y.shape[1], activation='softmax'))

            self.structures.append(model)
            self.structures_info.append(structure_param)
            tf.keras.backend.clear_session()
            

            
    def build_single_structure(self, hidden_layers=1, units=16):
        self.structures = []
        self.structures_info = []
        self.hidden_layers = hidden_layers
        self.units = units
        
        # input layer
        model = keras.Sequential()
        model.add(keras.layers.Dense(16, input_shape=(self.X.shape[1],)))
        
        # hidden layer
        for _ in range(hidden_layers):
            tf.random.set_seed(42)
            model.add(keras.layers.Dense(self.units, activation='relu',
                                            kernel_initializer="normal"))
            tf.random.set_seed(42)
        
        # output layer
        if self.problem == 'Regression':
            model.add(keras.layers.Dense(1))
        elif self.problem == 'Binary':
            model.add(keras.layers.Dense(1, activation='sigmoid'))
        else:
            model.add(keras.layers.Dense(self.y.shape[1], activation='softmax'))
            
        self.structures.append(model)
        self.structures_info.append({'hidden_layers': self.hidden_layers,
                                     'units': self.units})
        tf.keras.backend.clear_session()

        
    def create_optimizer(self, optimizers=['adam'], lrs=[0.01], use_all=False):
#         tf.random.set_seed(42)
        self.created_optimizers = []
        self.optimizers_info = []
        self.optimizers = optimizers
        self.lrs = lrs

        self.optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                                  'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                                  'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}

        if use_all:
            self.lrs = [0.001, 0.01, 0.02, 0.1]
            opt_grid = [self.optimizer_classes.keys(), self.lrs]    
        else:
            opt_grid = [self.optimizers, self.lrs]

        for opt_tuple in itertools.product(*opt_grid):
            opt_param = {
                'optimizer_name': opt_tuple[0],
                'lr': opt_tuple[1]
            }

            opt_class = self.optimizer_classes.get(opt_param['optimizer_name'])
            self.created_optimizers.append(opt_class(opt_param['lr']))
            self.optimizers_info.append(opt_param)

    
    def compile_model(self):
#         tf.random.set_seed(42)
        if self.problem == "Regression":
            self.loss = keras.losses.MSE
            self.metrics = ['MSE', 'MAE']
        elif self.problem == "Binary":
            self.loss = keras.losses.binary_crossentropy
            self.metrics = ['accuracy']
        else:
            self.loss = keras.losses.categorical_crossentropy
            self.metrics = ['accuracy']

        self.compiled_models = []
        self.compiled_models_info = []

        compile_grid = [zip(self.structures, self.structures_info), zip(self.created_optimizers ,self.optimizers_info)]
        for compile_tuple in itertools.product(*compile_grid):
            compile_param = {'model': compile_tuple[0][0],
                             'optimizer': compile_tuple[1][0]}
            model_info = {'structure_info': compile_tuple[0][1],
                           'optimizer_info': compile_tuple[1][1]}

            model_body = compile_param['model']
            model = keras.models.clone_model(model_body)
            model.compile(optimizer=compile_param['optimizer'],
                               loss=self.loss,
                               metrics=self.metrics)

            self.compiled_models.append(model)
            self.compiled_models_info.append(model_info)

    
    def train_models(self, models, X_train, y_train, X_val=None, y_val=None,
                     batch_size=None, epochs=1, verbose=0, callbacks=None,
                     shuffle=True, steps_per_epoch=None):
#         tf.random.set_seed(42)

        if callbacks:
            self.callbacks = callbacks
        else:
            self.callbacks = tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                              patience=5, restore_best_weights=True)

        self.trained_models = []
        self.val_losses = []
        for model in models:
            model.fit(x=X_train, y=y_train,
                      batch_size=batch_size, epochs=epochs,
                      verbose=verbose, callbacks=callbacks,
                      validation_data=(X_val, y_val), shuffle=shuffle)
            
            val_loss = model.evaluate(X_val, y_val, verbose=0)
            self.trained_models.append(model)
            self.val_losses.append(val_loss[0])
            print("{} model is trained. best val loss is : {}".format(model.name, val_loss))
                
    
def select_best_model(trained_models, val_losses, models_info):
    best_idx = np.argmin(val_losses)
    best_model = trained_models[best_idx]
    best_model_info = models_info[best_idx]
    return best_model, best_model_info

## Titaninc

In [48]:
mlp = MLP(ds.trans_X_train, ds.trans_y_train, problem=ds.problem)
mlp.build_structure(max_hidden_layers=3, units=[16], use_all=False)
mlp.create_optimizer(optimizers=['adam'], lrs=[0.01], use_all=False)
mlp.compile_model()
mlp.train_models(mlp.compiled_models,
                 ds.trans_X_train, ds.trans_y_train, ds.trans_X_val, ds.trans_y_val,
                 batch_size=64, epochs=10)

sequential model is trained. best val loss is : [0.744559679712568, 0.8095238095238095]
sequential model is trained. best val loss is : [0.7926248561768304, 0.8142857142857143]
sequential model is trained. best val loss is : [0.8642358473369054, 0.7476190476190476]


In [None]:
# init
# sequential model is trained. best val loss is : [0.7812652843339103, 0.8095238095238095]
# sequential model is trained. best val loss is : [1.0299395141147432, 0.8047619047619048]
# sequential model is trained. best val loss is : [0.9414586668922788, 0.7476190476190476]

# init, hidden앞
# sequential model is trained. best val loss is : [0.7851679313750494, 0.8]
# sequential model is trained. best val loss is : [1.1065275385266258, 0.7952380952380952]
# sequential model is trained. best val loss is : [1.04793796085176, 0.7095238095238096]

# init, hidden앞, hidden뒤
# sequential model is trained. best val loss is : [0.7572807045209975, 0.8]
# sequential model is trained. best val loss is : [0.9114891551789783, 0.780952380952381]
# sequential model is trained. best val loss is : [1.1249423844473703, 0.719047619047619]

# init, hidden앞, hidden뒤, hidden=kernel_initializer=glorot_uniform
# sequential model is trained. best val loss is : [0.7759955338069371, 0.8047619047619048]
# sequential model is trained. best val loss is : [1.0129560129983084, 0.819047619047619]
# sequential model is trained. best val loss is : [0.7714474700746082, 0.819047619047619]

# init, hidden앞, hidden뒤, hidden=kernel_initializer=zeros
# sequential model is trained. best val loss is : [0.6714708396366664, 0.6047619047619047]
# sequential model is trained. best val loss is : [0.6713804721832275, 0.6047619047619047]
# sequential model is trained. best val loss is : [0.6712687969207763, 0.6047619047619047]

# init, hidden앞, hidden뒤, hidden=kernel_initializer=normal
# sequential model is trained. best val loss is : [0.744559679712568, 0.8095238095238095]
# sequential model is trained. best val loss is : [0.7926248561768304, 0.8142857142857143]
# sequential model is trained. best val loss is : [0.8642358473369054, 0.7476190476190476]

In [45]:
mlp1 = MLP(ds.trans_X_train, ds.trans_y_train, problem=ds.problem)
mlp1.build_single_structure(hidden_layers=1, units=16)
mlp1.create_optimizer(optimizers=['adam'], lrs=[0.01], use_all=False)
mlp1.compile_model()
mlp1.train_models(mlp1.compiled_models,
                  ds.trans_X_train, ds.trans_y_train, ds.trans_X_val, ds.trans_y_val,
                  batch_size=64, epochs=10)

sequential model is trained. best val loss is : [0.7665734472728911, 0.8]


In [None]:
# sequential model is trained. best val loss is : [0.7399208488918486, 0.8142857142857143]

# sequential model is trained. best val loss is : [0.7851679313750494, 0.8]

# sequential model is trained. best val loss is : [0.7572807045209975, 0.8]

# sequential model is trained. best val loss is : [0.7759955338069371, 0.8047619047619048]

# sequential model is trained. best val loss is : [0.6714708396366664, 0.6047619047619047]

# sequential model is trained. best val loss is : [0.744559679712568, 0.8095238095238095]

In [46]:
mlp2 = MLP(ds.trans_X_train, ds.trans_y_train, problem=ds.problem)
mlp2.build_single_structure(hidden_layers=2, units=16)
mlp2.create_optimizer(optimizers=['adam'], lrs=[0.01], use_all=False)
mlp2.compile_model()
mlp2.train_models(mlp2.compiled_models,
                  ds.trans_X_train, ds.trans_y_train, ds.trans_X_val, ds.trans_y_val,
                  batch_size=64, epochs=10)

sequential model is trained. best val loss is : [0.9591352769306728, 0.7904761904761904]


In [None]:
# sequential model is trained. best val loss is : [0.9132801021848406, 0.8095238095238095]

# sequential model is trained. best val loss is : [0.6909157412392752, 0.7857142857142857]

# sequential model is trained. best val loss is : [0.7798875859805516, 0.8380952380952381]

# sequential model is trained. best val loss is : [0.8847121284121559, 0.8095238095238095]

# sequential model is trained. best val loss is : [0.6714708396366664, 0.6047619047619047]

# sequential model is trained. best val loss is : [0.6464605382510594, 0.6904761904761905]

In [None]:
trained_models

In [None]:
best_model, best_model_info = select_best_model(mlp.trained_models, mlp.val_losses, mlp.compiled_models_info)
best_model.evaluate(ds.trans_X_val, ds.trans_y_val)

In [None]:
best_model_info

In [None]:
import matplotlib.pyplot as plt
plt.plot(sorted(mlp.val_losses)[:20], '.')

In [None]:
trained_models = mlp.train_models(mlp.compiled_models,
                                  ds.trans_X_train, ds.trans_y_train, ds.trans_X_val, ds.trans_y_val,
                                  batch_size=64, epochs=10)

In [None]:
best_model, best_model_info = select_best_model(mlp.trained_models, mlp.val_losses, mlp.compiled_models_info)
best_model.evaluate(ds.trans_X_val, ds.trans_y_val)

In [None]:
best_model_info