In [1]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from src.DataSource import DataSource

In [2]:
filename = "./data/titanic_train.csv"
ds = DataSource(filename)

In [3]:
ds.data_load_split(target=['Survived'],
                   ignore=['Name', 'Cabin', 'Ticket'])
ds.define_problem()
ds.train_val_split(ratio=0.2, random_state=42)

In [4]:
ds.problem

'Binary'

In [5]:
ds.data_preprocess(ds.X_train, ds.y_train, train_set=True)
ds.data_preprocess(ds.X_val, ds.y_val, train_set=False)

In [34]:
def get_single_mlp(X, y, problem, hidden_layer=1, unit=16):
    tf.random.set_seed(42)
    
    tf.keras.backend.clear_session()  # clear graph session
    model = keras.Sequential()
    # input layer
    model.add(keras.layers.Input(shape=(X.shape[1],)))
    # hidden layer
    for _ in range(hidden_layer):
        model.add(keras.layers.Dense(unit, activation='relu'))
    # output layer
    if problem == "Regression":
        model.add(keras.layers.Dense(1))
    elif problem == "Binary":
        model.add(keras.layers.Dense(1, activation='sigmoid'))
    else:
        model.add(keras.layers.Dense(y.shape[1], activation='softmax'))
    
    return model

def get_mlps(X, y, problem, max_hidden_layers=1, units=[16], use_all=False):
    if use_all:
        max_hidden_layers = 3
        units = [16, 32, 64, 128, 256]
    else:
        max_hidden_layers = max_hidden_layers
        units = units
        
    structure_grid = [np.arange(max_hidden_layers)+1, units]
    structured_models = []
    for param_tuple in itertools.product(*structure_grid):
        model = get_single_mlp(X, y, problem,
                               hidden_layer=param_tuple[0],
                               unit=param_tuple[1])
        structured_models.append(model)
        
    return structured_models

In [35]:
model1 = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layer=1, unit=16) #1
model2 = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layer=2, unit=16) #3
model3 = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layer=2, unit=32) #4
models = get_mlps(ds.trans_X_train, ds.trans_y_train, ds.problem, max_hidden_layers=2, units=[16, 32])
model_list = [model1, model2, model3]
models_1 = model_list + models

#### default compile

In [8]:
# if model1, model2 results are same with models(1, 2)
# get_single_mlp & get_mlps code above are okay
# model order: 1, 3, 4, 1, 2, 3, 4
for model in models_1:
    model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.01),
              loss=keras.losses.binary_crossentropy,
              metrics=['accuracy'])
    
    callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(x=ds.trans_X_train, y=ds.trans_y_train,
          batch_size=64, epochs=10,
          verbose=0, callbacks=[callbacks],
          validation_data=(ds.trans_X_val, ds.trans_y_val), shuffle=True)
    
    model.evaluate(ds.trans_X_val, ds.trans_y_val)



#### when optimizer is set `just before` compile

In [11]:
# if model1, model2 results are same with models(1, 2)
# get_single_mlp & get_mlps code above are okay
# model order: 1, 3, 4, 1, 2, 3, 4
for model in models_1:
    opt = keras.optimizers.Adam(learning_rate=0.01)
    model.compile(optimizer=opt,
              loss=keras.losses.binary_crossentropy,
              metrics=['accuracy'])
    
    callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(x=ds.trans_X_train, y=ds.trans_y_train,
          batch_size=64, epochs=10,
          verbose=0, callbacks=[callbacks],
          validation_data=(ds.trans_X_val, ds.trans_y_val), shuffle=True)
    
    model.evaluate(ds.trans_X_val, ds.trans_y_val)



#### when optimizer is `already set` before compile
- it doesn't follow above two results
- **Every optimizer should be set before compiling each model**

In [13]:
# if model1, model2 results are same with models(1, 2)
# get_single_mlp & get_mlps code above are okay
# model order: 1, 3, 4, 1, 2, 3, 4
opt = keras.optimizers.Adam(learning_rate=0.01)

for model in models_1:
    model.compile(optimizer=opt,
              loss=keras.losses.binary_crossentropy,
              metrics=['accuracy'])
    
    callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(x=ds.trans_X_train, y=ds.trans_y_train,
          batch_size=64, epochs=10,
          verbose=0, callbacks=[callbacks],
          validation_data=(ds.trans_X_val, ds.trans_y_val), shuffle=True)
    
    model.evaluate(ds.trans_X_val, ds.trans_y_val)



In [14]:
def compile_model(problem, model, optimizer='adam', lr=0.01):
    # automatically set loss and metrics according to problem
    if problem == "Regression":
        loss = keras.losses.MSE
        metrics = ['MSE', 'MAE']
    elif problem == "Binary":
        loss = keras.losses.binary_crossentropy
        metrics = ['accuracy']
    else:
        loss = keras.losses.categorical_crossentropy
        metrics = ['accuracy']
    
    # match optimizer argument to optimizer class
    optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                         'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                         'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}
    optimizer_class = optimizer_classes[optimizer]
    
    optimizer_info = {'optimizer': optimizer,
                      'lr': lr}
    
    opt = optimizer_class(learning_rate=lr)
    model.compile(optimizer=opt,
                           loss=loss,
                           metrics=metrics)

#### When using `compile_model` function
- result is same with above two results

In [16]:
# if model1, model2 results are same with models(1, 2)
# get_single_mlp & get_mlps code above are okay
for model in models_1:
    compile_model(ds.problem, model, optimizer='adam', lr=0.01)
    
    callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(x=ds.trans_X_train, y=ds.trans_y_train,
          batch_size=64, epochs=10,
          verbose=0, callbacks=[callbacks],
          validation_data=(ds.trans_X_val, ds.trans_y_val), shuffle=True)
    
    model.evaluate(ds.trans_X_val, ds.trans_y_val)



In [31]:
def get_single_mlp(X, y, problem, hidden_layer=1, unit=16):
    tf.random.set_seed(42)
    
    tf.keras.backend.clear_session()  # clear graph session
    model = keras.Sequential()
    # input layer
    model.add(keras.layers.Input(shape=(X.shape[1],)))
    # hidden layer
    for _ in range(hidden_layer):
        model.add(keras.layers.Dense(unit, activation='relu'))
    # output layer
    if problem == "Regression":
        model.add(keras.layers.Dense(1))
    elif problem == "Binary":
        model.add(keras.layers.Dense(1, activation='sigmoid'))
    else:
        model.add(keras.layers.Dense(y.shape[1], activation='softmax'))
    
    return model

def compile_model(problem, model, optimizer='adam', lr=0.01):
    # automatically set loss and metrics according to problem
    if problem == "Regression":
        loss = keras.losses.MSE
        metrics = ['MSE', 'MAE']
    elif problem == "Binary":
        loss = keras.losses.binary_crossentropy
        metrics = ['accuracy']
    else:
        loss = keras.losses.categorical_crossentropy
        metrics = ['accuracy']
    
    # match optimizer argument to optimizer class
    optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                         'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                         'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}
    optimizer_class = optimizer_classes[optimizer]
    
    optimizer_info = {'optimizer': optimizer,
                      'lr': lr}
    
    opt = optimizer_class(learning_rate=lr)
    model.compile(optimizer=opt,
                           loss=loss,
                           metrics=metrics)
    
def train_model(model, X_train, y_train, X_val, y_val,
                batch_size=None, epochs=1, verbose=0, callbacks=None,
                shuffle=True, steps_per_epoch=None):
    # set callbacks; EarlyStopping
    if callbacks:
        callbacks = callbacks
    else:
        callbacks = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                  patience=5,
                                                  restore_best_weights=True)
    
    model.fit(x=X_train, y=y_train,
              batch_size=batch_size, epochs=epochs,
              verbose=verbose, callbacks=callbacks,
              validation_data=(X_val, y_val), shuffle=shuffle)
    
    val_loss = model.evaluate(X_val, y_val, verbose=verbose)
    print("{} model is trained. best val loss is: {}".format(model.name, val_loss))
    
    return model, val_loss

#### Need to define `auto_fit` function
1. set the `param_grid`: (hidden_layers, units, optimizers, lrs)
2. call whole functions making trained models according to the `param_grid`
  - `get_single_mlp`
  - `compile_model`
  - `train_model`

In [37]:
def auto_fit(problem, X_train, y_train, X_val, y_val,
             hidden_layers=[1], units=[16],
             optimizers=['adam'], lrs=[0.001],
             batch_size=None, epochs=1, verbose=0,
             callbacks=None, shuffle=True,
             steps_per_epoch=None,
             use_all=False):
    
    if use_all:
        hidden_layers = [1, 2, 3]
        units = [16, 32, 64, 128, 256]
        optimizers = ['adam', 'adadelta', 'adamax', 'adagrad', 'sgd', 'rmsprop']
        lrs = [0.001, 0.01, 0.02, 0.1]
    else:
        hidden_layers = hidden_layers
        units = units
        optimizers = optimizers
        lrs = lrs
        
    models = []
    val_losses = []
    param_info = []
    param_grid = [hidden_layers, units, optimizers, lrs]
    for param_tuple in itertools.product(*param_grid):
        hidden_layer = param_tuple[0]
        unit = param_tuple[1]
        optimizer = param_tuple[2]
        lr = param_tuple[3]
        
        param_dict = {'hidden_layer': hidden_layer,
                      'unit': unit,
                      'optimizer': optimizer,
                      'lr': lr}
        
        model= get_single_mlp(X_train, y_train, problem,
                              hidden_layer=hidden_layer, unit=unit)
        
        compile_model(problem, model, optimizer=optimizer, lr=lr)
        
        model, val_loss = train_model(model, X_train, y_train, X_val, y_val,
                                      batch_size=batch_size, epochs=epochs, verbose=verbose,
                                      callbacks=callbacks, shuffle=shuffle,
                                      steps_per_epoch=steps_per_epoch)
    
        models.append(model)
        val_losses.append(val_loss)
        param_info.append(param_dict)
        
    return models, param_info, val_losses

In [25]:
X_train = ds.trans_X_train
y_train = ds.trans_y_train
X_val = ds.trans_X_val
y_val = ds.trans_y_val

In [38]:
models, param_info, val_losses = auto_fit(ds.problem, X_train, y_train, X_val, y_val,
                                          hidden_layers=[1, 2], units=[16, 32],
                                          optimizers=['adam', 'adadelta'], lrs=[0.01, 0.02],
                                          batch_size=64, epochs=10, verbose=0)

sequential model is trained. best val loss is: [0.45781596785499934, 0.7952380952380952]
sequential model is trained. best val loss is: [0.4156928491024744, 0.8333333333333334]
sequential model is trained. best val loss is: [0.7017272103400457, 0.46190476190476193]
sequential model is trained. best val loss is: [0.6979270492281232, 0.4857142857142857]
sequential model is trained. best val loss is: [0.4900653515543256, 0.8047619047619048]
sequential model is trained. best val loss is: [0.4315620473452977, 0.819047619047619]
sequential model is trained. best val loss is: [0.6954697807629903, 0.5571428571428572]
sequential model is trained. best val loss is: [0.6903100428127107, 0.5904761904761905]
sequential model is trained. best val loss is: [0.41507263211976914, 0.8380952380952381]
sequential model is trained. best val loss is: [0.43994153227124894, 0.8380952380952381]
sequential model is trained. best val loss is: [0.6860601101602827, 0.6]
sequential model is trained. best val loss i

In [None]:
# model1, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layer=1, unit=16) #1
# model2, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layer=2, unit=16) #3
# model3, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layer=2, unit=32) #4
# models, models_info = get_mlps(ds.trans_X_train, ds.trans_y_train, ds.problem, max_hidden_layers=2, units=[16, 32])
# model_list = [model1, model2, model3]
# models_1 = model_list + models
# hidden=1, unit=16, 'adam', lr=0.01  --  loss: 0.4578 - accuracy: 0.7952 
# hidden=2, unit=16, 'adam', lr=0.01  --  loss: 0.4151 - accuracy: 0.8381
# hidden=2, unit=32, 'adam', lr=0.01  --  loss: 0.4548 - accuracy: 0.8143
# hidden=1, unit=16, 'adam', lr=0.01  --  loss: 0.4578 - accuracy: 0.7952
# hidden=1, unit=32, 'adam', lr=0.01  --  loss: 0.4901 - accuracy: 0.8048
# hidden=2, unit=16, 'adam', lr=0.01  --  loss: 0.4151 - accuracy: 0.8381
# hidden=2, unit=32, 'adam', lr=0.01  --  loss: 0.4548 - accuracy: 0.8143

In [39]:
param_info

[{'hidden_layer': 1, 'unit': 16, 'optimizer': 'adam', 'lr': 0.01},
 {'hidden_layer': 1, 'unit': 16, 'optimizer': 'adam', 'lr': 0.02},
 {'hidden_layer': 1, 'unit': 16, 'optimizer': 'adadelta', 'lr': 0.01},
 {'hidden_layer': 1, 'unit': 16, 'optimizer': 'adadelta', 'lr': 0.02},
 {'hidden_layer': 1, 'unit': 32, 'optimizer': 'adam', 'lr': 0.01},
 {'hidden_layer': 1, 'unit': 32, 'optimizer': 'adam', 'lr': 0.02},
 {'hidden_layer': 1, 'unit': 32, 'optimizer': 'adadelta', 'lr': 0.01},
 {'hidden_layer': 1, 'unit': 32, 'optimizer': 'adadelta', 'lr': 0.02},
 {'hidden_layer': 2, 'unit': 16, 'optimizer': 'adam', 'lr': 0.01},
 {'hidden_layer': 2, 'unit': 16, 'optimizer': 'adam', 'lr': 0.02},
 {'hidden_layer': 2, 'unit': 16, 'optimizer': 'adadelta', 'lr': 0.01},
 {'hidden_layer': 2, 'unit': 16, 'optimizer': 'adadelta', 'lr': 0.02},
 {'hidden_layer': 2, 'unit': 32, 'optimizer': 'adam', 'lr': 0.01},
 {'hidden_layer': 2, 'unit': 32, 'optimizer': 'adam', 'lr': 0.02},
 {'hidden_layer': 2, 'unit': 32, 'opti

In [None]:
model, param_info, val_losses = auto_fit(ds.problem, X_train, y_train, X_val, y_val,
                                          hidden_layers=[2], units=[32],
                                          optimizers=['adam'], lrs=[0.02],
                                          batch_size=64, epochs=10, verbose=0)