In [1]:
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras

from src.DataSource import DataSource

In [2]:
filename = "./data/titanic_train.csv"
ds = DataSource(filename)

In [3]:
ds.data_load_split(target=['Survived'],
                   ignore=['Name', 'Cabin', 'Ticket'])
ds.define_problem()
ds.train_val_split(ratio=0.2, random_state=42)

In [94]:
ds.problem

'Binary'

In [4]:
ds.data_preprocess(ds.X_train, ds.y_train, train_set=True)
ds.data_preprocess(ds.X_val, ds.y_val, train_set=False)

In [32]:
def get_single_mlp(X, y, problem, hidden_layers=1, unit=16):
    tf.random.set_seed(42)
    
    structure_info = {'hidden_layers': hidden_layers,
                      'units': unit}
    
    tf.keras.backend.clear_session()  # clear graph session
    model = keras.Sequential()
    # input layer
    model.add(keras.layers.Input(shape=(X.shape[1],)))
    # hidden layer
    for _ in range(hidden_layers):
#         tf.random.set_seed(42)
        model.add(keras.layers.Dense(unit, activation='relu'))
    # output layer
    if problem == "Regression":
        model.add(keras.layers.Dense(1))
    elif problem == "Binary":
        model.add(keras.layers.Dense(1, activation='sigmoid'))
    else:
        model.add(keras.layers.Dense(y.shape[1], activation='softmax'))
    
    return model, structure_info


def get_mlps(X, y, problem, max_hidden_layers=1, units=[16], use_all=False):
    if use_all:
        max_hidden_layers = 3
        units = [16, 32, 64, 128, 256]
    else:
        max_hidden_layers = max_hidden_layers
        units = units
        
    structure_grid = [np.arange(max_hidden_layers)+1, units]
    structured_models = []
    structures_info = []
    for param_tuple in itertools.product(*structure_grid):
        model, structure_info = get_single_mlp(X, y, problem,
                                               hidden_layers=param_tuple[0],
                                               unit=param_tuple[1])
        structured_models.append(model)
        structures_info.append(structure_info)
        
    return structured_models, structures_info


def compile_model(problem, structured_model, optimizer='adam', lr=0.01):
    # automatically set loss and metrics according to problem
    if problem == "Regression":
        loss = keras.losses.MSE
        metrics = ['MSE', 'MAE']
    elif problem == "Binary":
        loss = keras.losses.binary_crossentropy
        metrics = ['accuracy']
    else:
        loss = keras.losses.categorical_crossentropy
        metrics = ['accuracy']
    
    # match optimizer argument to optimizer Class
    optimizer_classes = {'adadelta': keras.optimizers.Adadelta, 'sgd': keras.optimizers.SGD,
                         'adam': keras.optimizers.Adam, 'adagrad': keras.optimizers.Adagrad,
                         'adamax': keras.optimizers.Adamax, 'rmsprop': keras.optimizers.RMSprop}
    optimizer_class = optimizer_classes[optimizer]
    
    optimizer_info = {'optimizer': optimizer,
                      'lr': lr}
    
    # compile model
    compiled_model = keras.models.clone_model(structured_model)  # avoid overriding model when compile_models
    compiled_model.compile(optimizer=optimizer_class(lr),
                           loss=loss,
                           metrics=metrics)
    
    return compiled_model, optimizer_info


def compile_models(problem, structured_models, structures_info, optimizers=['adam'], lrs=[0.01], use_all=False):
    if use_all:
        optimizers = ['adadelta', 'sgd', 'adam', 'adagrad', 'adamax', 'rmsprop']
        lrs = [0.001, 0.01, 0.02, 0.1]
    else:
        optimizers = optimizers
        lrs = lrs
    
    compiled_models = []
    compiled_models_info = []
    compile_grid = [zip(structured_models, structures_info), optimizers, lrs]
    for compile_tuple in itertools.product(*compile_grid):
        model = compile_tuple[0][0]
        model_info = compile_tuple[0][1]
        optimizer = compile_tuple[1]
        lr = compile_tuple[2]
        
        model, optimizer_info = compile_model(problem, model,
                                              optimizer=optimizer, lr=lr)
        compiled_models.append(model)
        compiled_models_info.append({"structure": model_info,
                                     "optimizer": optimizer_info})
        
    return compiled_models, compiled_models_info


def train_model(compiled_model, X_train, y_train, X_val=None, y_val=None,
                batch_size=None, epochs=1, verbose=0, callbacks=None,
                shuffle=True, steps_per_epoch=None):
    if callbacks:
        callbacks = callbacks
    else:
        callbacks = keras.callbacks.EarlyStopping(monitor='val_loss',
                                                  patience=5,
                                                  restore_best_weights=True)
    compiled_model.fit(x=X_train, y=y_train,
                       batch_size=batch_size, epochs=epochs,
                       verbose=verbose, callbacks=callbacks,
                       validation_data=(X_val, y_val), shuffle=shuffle)
    val_loss = compiled_model.evaluate(X_val, y_val, verbose=verbose)
    print("{} model is trained. best val loss is: {}".format(compiled_model.name, val_loss))
    
    return compiled_model, val_loss


def train_models(compiled_models, X_train, y_train, X_val=None, y_val=None,
                 batch_size=None, epochs=1, verbose=0, callbacks=None,
                 shuffle=True, steps_per_epoch=None):
    trained_models = []
    val_losses = []
    for compiled_model in compiled_models:
        trained_model, val_loss = train_model(compiled_model,
                                              X_train, y_train,
                                              X_val=X_val, y_val=y_val,
                                              batch_size=batch_size, epochs=epochs,
                                              verbose=verbose, callbacks=callbacks,
                                              shuffle=shuffle, steps_per_epoch=steps_per_epoch)
        trained_models.append(trained_model)
        val_losses.append(val_loss)
        
    return trained_models, val_losses

In [116]:
model1, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layers=1, unit=16)
model2, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layers=2, unit=16)

In [117]:
models, models_info = get_mlps(ds.trans_X_train, ds.trans_y_train, ds.problem,
                               max_hidden_layers=2, units=[16])

In [118]:
models_1 = [model1, model2]
for model in models_1:
    model.compile(optimizer=keras.optimizers.Adadelta(0.1),
              loss=keras.losses.binary_crossentropy,
              metrics=['accuracy'])
    
# for model in models_1:
    callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(x=ds.trans_X_train, y=ds.trans_y_train,
          batch_size=64, epochs=10,
          verbose=0, callbacks=[callbacks],
          validation_data=(ds.trans_X_val, ds.trans_y_val), shuffle=True)
    
# for model in models_1:
    model.evaluate(ds.trans_X_val, ds.trans_y_val)



In [119]:
for model in models:
    model.compile(optimizer=keras.optimizers.Adadelta(0.1),
              loss=keras.losses.binary_crossentropy,
              metrics=['accuracy'])

# for model in models:
    callbacks=tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

    model.fit(x=ds.trans_X_train, y=ds.trans_y_train,
          batch_size=64, epochs=10,
          verbose=0, callbacks=[callbacks],
          validation_data=(ds.trans_X_val, ds.trans_y_val), shuffle=True)

# for model in models:
    model.evaluate(ds.trans_X_val, ds.trans_y_val)



### with self code

In [124]:
model1, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layers=1, unit=16)
model2, model_info = get_single_mlp(ds.trans_X_train, ds.trans_y_train, ds.problem, hidden_layers=2, unit=16)

In [125]:
models, models_info = get_mlps(ds.trans_X_train, ds.trans_y_train, ds.problem,
                               max_hidden_layers=2, units=[16])

In [122]:
models_1 = [model1, model2]
for model in models_1:
    model, _ = compile_model(ds.problem, model, optimizer='adadelta', lr=0.1)
    
    model, _ = train_model(model, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)
    

sequential model is trained. best val loss is: [0.6487677699043637, 0.680952380952381]
sequential model is trained. best val loss is: [0.6704074865295774, 0.6142857142857143]


In [128]:
for model in models:
    model, _ = compile_model(ds.problem, model, optimizer='adadelta', lr=0.1)
    
    model, _ = train_model(model, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)
    

sequential model is trained. best val loss is: [0.6629807767413911, 0.6428571428571429]
sequential model is trained. best val loss is: [0.6637158388183231, 0.7523809523809524]


In [109]:
model1, _ = compile_model(ds.problem, model1, optimizer='adadelta', lr=0.1)
model2, _ = compile_model(ds.problem, model2, optimizer='adadelta', lr=0.1)

(<tensorflow.python.keras.engine.sequential.Sequential at 0x7fe4972dbf98>,
 {'optimizer': 'adadelta', 'lr': 0.1})

In [74]:
trained_model1, _ = train_model(compiled_model1, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)
trained_model2, _ = train_model(compiled_model2, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)

sequential model is trained. best val loss is: [0.6487677699043637, 0.680952380952381]
sequential model is trained. best val loss is: [0.6704074865295774, 0.6142857142857143]


In [77]:
compiled_model1.optimizer

<tensorflow.python.keras.optimizer_v2.adadelta.Adadelta at 0x7fe500e80780>

In [78]:
compiled_model1.optimizer.lr

<tf.Variable 'Adadelta/learning_rate:0' shape=() dtype=float32, numpy=0.1>

In [92]:
compiled_model1.weights[3]

<tf.Variable 'dense_1_1/bias:0' shape=(1,) dtype=float64, numpy=array([-0.01316631])>

In [52]:
for model in models_1:
    compile_model(ds.problem, model, optimizer='adadelta', lr=0.1)
    train_model(model, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)

sequential model is trained. best val loss is: [0.6356775516555423, 0.7428571428571429]
sequential model is trained. best val loss is: [0.6269156666029068, 0.7380952380952381]


In [26]:
models, models_info = get_mlps(ds.trans_X_train, ds.trans_y_train, ds.problem,
                               max_hidden_layers=2, units=[16, 32])
models, models_info

([<tensorflow.python.keras.engine.sequential.Sequential at 0x7fe4bce35240>,
  <tensorflow.python.keras.engine.sequential.Sequential at 0x7fe4bcd4d898>,
  <tensorflow.python.keras.engine.sequential.Sequential at 0x7fe4bcdd32b0>,
  <tensorflow.python.keras.engine.sequential.Sequential at 0x7fe4bcd62ac8>],
 [{'hidden_layers': 1, 'units': 16},
  {'hidden_layers': 1, 'units': 32},
  {'hidden_layers': 2, 'units': 16},
  {'hidden_layers': 2, 'units': 32}])

In [27]:
compiled_models, compiled_models_info = compile_models(ds.problem, models, models_info,
                                                       optimizers=['adam', 'adam'],
                                                       lrs=[0.1, 0.2])
compiled_models_info

[{'structure': {'hidden_layers': 1, 'units': 16},
  'optimizer': {'optimizer': 'adam', 'lr': 0.1}},
 {'structure': {'hidden_layers': 1, 'units': 16},
  'optimizer': {'optimizer': 'adam', 'lr': 0.2}},
 {'structure': {'hidden_layers': 1, 'units': 16},
  'optimizer': {'optimizer': 'adam', 'lr': 0.1}},
 {'structure': {'hidden_layers': 1, 'units': 16},
  'optimizer': {'optimizer': 'adam', 'lr': 0.2}},
 {'structure': {'hidden_layers': 1, 'units': 32},
  'optimizer': {'optimizer': 'adam', 'lr': 0.1}},
 {'structure': {'hidden_layers': 1, 'units': 32},
  'optimizer': {'optimizer': 'adam', 'lr': 0.2}},
 {'structure': {'hidden_layers': 1, 'units': 32},
  'optimizer': {'optimizer': 'adam', 'lr': 0.1}},
 {'structure': {'hidden_layers': 1, 'units': 32},
  'optimizer': {'optimizer': 'adam', 'lr': 0.2}},
 {'structure': {'hidden_layers': 2, 'units': 16},
  'optimizer': {'optimizer': 'adam', 'lr': 0.1}},
 {'structure': {'hidden_layers': 2, 'units': 16},
  'optimizer': {'optimizer': 'adam', 'lr': 0.2}},


In [28]:
# for model in compiled_models:
#     print(model.optimizer)
#     print(model.optimizer.lr)

In [29]:
trained_model, val_loss = train_model(compiled_model, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)

sequential model is trained. best val loss is: [0.423308185168675, 0.8238095238095238]


In [30]:
trained_model, val_loss

(<tensorflow.python.keras.engine.sequential.Sequential at 0x7fe4bcdcfda0>,
 [0.423308185168675, 0.8238095238095238])

In [31]:
trained_models, val_losses = train_models(compiled_models, ds.trans_X_train, ds.trans_y_train,
                            ds.trans_X_val, ds.trans_y_val, batch_size=64, epochs=10)

sequential model is trained. best val loss is: [0.423308185168675, 0.8238095238095238]
sequential model is trained. best val loss is: [0.4525207661447071, 0.8333333333333334]
sequential model is trained. best val loss is: [0.4323851846513294, 0.7952380952380952]
sequential model is trained. best val loss is: [0.5033811021418798, 0.8238095238095238]
sequential model is trained. best val loss is: [0.44564959719067526, 0.7952380952380952]
sequential model is trained. best val loss is: [0.5284496937479292, 0.7857142857142857]
sequential model is trained. best val loss is: [0.45328284388496765, 0.8095238095238095]
sequential model is trained. best val loss is: [0.5562368279411679, 0.8142857142857143]
sequential model is trained. best val loss is: [0.5119102006866818, 0.8476190476190476]
sequential model is trained. best val loss is: [0.5166502918515886, 0.8142857142857143]
sequential model is trained. best val loss is: [0.5570542812347412, 0.7952380952380952]
sequential model is trained. be

In [99]:
val_losses

[[0.6487677699043637, 0.680952380952381],
 [0.6321581142289298, 0.7476190476190476],
 [0.44149042339552014, 0.7904761904761904],
 [0.47842450312205725, 0.7857142857142857],
 [0.6223019951865787, 0.7238095238095238],
 [0.6013998832021441, 0.7666666666666667],
 [0.47113809017908004, 0.7714285714285715],
 [0.5103121842656817, 0.8047619047619048],
 [0.660325829188029, 0.6761904761904762],
 [0.6169747193654378, 0.7571428571428571],
 [0.4495628365448543, 0.8095238095238095],
 [0.5072513847124009, 0.819047619047619],
 [0.6146161232675825, 0.7619047619047619],
 [0.5705262615567162, 0.780952380952381],
 [0.5405400565692356, 0.8142857142857143],
 [0.49578750616028194, 0.7523809523809524]]