$$ \huge\text{Kaggle Titanic: Hyperparameter Search} $$

$$ \small\text{Titanic: Machine Learning from Disaster}$$

$$ \large\text{Andrew Ribeiro - June 2018 -  Andrew@kexp.io }$$
![Titanic](notebook_images/4679-004-8C0793EF.jpg)

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

# General Libraries 
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import gc
    
# Scikit-Learn Libraries
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

# Keras Libraries 
from keras.callbacks import ModelCheckpoint,EarlyStopping
from keras.models import load_model
from keras.layers import Input, Dense, Dropout
from keras.models import Model
import keras.backend as K

# Interactive Widgets
from ipywidgets import interact

# Load Data
trainDF = pd.read_csv("./data/titanic/train.csv")
testDF = pd.read_csv("./data/titanic/test.csv")


  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
def meanBaseline(df):
    nRows = df["Survived"].shape[0]
    mean = df["Survived"].sum()/nRows
    return log_loss(df["Survived"],np.full(nRows,mean))

In [3]:
# Our prepprocessing function which is applied to every row of the target dataframe. 
def preprocessRow(row):
    # Process Categorical Variables - One-Hot-Encoding
    sex      = [0,0]
    embarked = [0,0,0]
    pclass   = [0,0,0]
    
    if row["Sex"] == "male":
        sex = [0,1]
    elif row["Sex"] == "female":
        sex = [1,0]
    
    if row["Embarked"] == "S":
        embarked = [0,0,1]
    elif row["Embarked"] == "C":
        embarked = [0,1,0]
    elif row["Embarked"] == "Q":
        embarked = [1,0,0]
    
    if row["Pclass"] == 1:
        pclass   = [0,0,1]
    elif row["Pclass"] == 2:
        pclass   = [0,1,0]
    elif row["Pclass"] == 3:
        pclass   = [1,0,0]
 
    return pclass+sex+[row["Age"],row["SibSp"],row["Parch"],row["Fare"]]+embarked

# Fill Missing Values
testDF = testDF.fillna(0)
trainDF = trainDF.fillna(0).sample(frac=1)

# Preprocess Data
data = np.stack(trainDF.apply(preprocessRow,axis=1).as_matrix())

# View what the training vectors look like. 
pd.DataFrame(data).head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.0,0.0,1.0,1.0,0.0,15.0,0.0,1.0,211.3375,0.0,0.0,1.0
1,0.0,1.0,0.0,1.0,0.0,33.0,1.0,2.0,27.75,0.0,0.0,1.0
2,1.0,0.0,0.0,0.0,1.0,22.0,0.0,0.0,7.225,0.0,1.0,0.0
3,0.0,0.0,1.0,0.0,1.0,58.0,0.0,2.0,113.275,0.0,1.0,0.0
4,0.0,0.0,1.0,0.0,1.0,47.0,0.0,0.0,25.5875,0.0,0.0,1.0


In [4]:
train_x, validation_x, train_y , validation_y = train_test_split(data, trainDF["Survived"].as_matrix(), test_size=0.2)

In [9]:
# Training Hyperparameters
batch_size = 300
epochs = 600

# Model Hyperparameters 
lossFn = 'binary_crossentropy'
optimizer = 'adam'

# Model Architecture 
def trainModel(nLayers,layerSize,dropoutPercent,lossFn,optimizer,batch_size,epochs):
    K.clear_session()
        
    inputs = Input(shape=(train_x.shape[1],),name="input")
    x = None 
    
    for layer in range(nLayers):
        if x == None:
            x = inputs

        x = Dense(layerSize, activation='sigmoid',name="fc"+str(layer))(x)
        x = Dropout(dropoutPercent,name="fc_dropout_"+str(layer))(x)

    out = Dense(1,activation='sigmoid', name="output")(x)

    model = Model(inputs=inputs, outputs=out)
    model.compile(optimizer=optimizer,
                  loss=lossFn,
                  metrics=['acc'])
    
    hist = model.fit(train_x, train_y,validation_data=(validation_x,validation_y), 
                     batch_size=batch_size,epochs=epochs, verbose=0)

    del model 
    gc.collect()
    
    return (nLayers,layerSize,dropoutPercent,lossFn,optimizer,batch_size,epochs),hist


def gridSearch(layersGrid,layerSizeGrid,dropoutGrid,batchGrid):
    results = {}
    for nLayers in layersGrid:
        for layerSize in layerSizeGrid:
            for dropoutPercent in dropoutGrid:
                for batch_size in batchGrid:
                    idx,hist = trainModel(nLayers,layerSize,dropoutPercent,lossFn,optimizer,batch_size,epochs)
                    results[idx] = hist

                    loss = hist.history['loss'][-1]
                    val_loss = hist.history['val_loss'][-1]
                    print("{0:3} x {1:3} Layers with {2:2f} dropout and batch size of {3:3} | {4:5f} @ {5:5f}%"
                          .format(nLayers,layerSize,dropoutPercent,batch_size,loss,np.abs((1 - (loss/val_loss))*100)))
    return results

In [None]:
layersGrid = np.arange(1,7)
layerSizeGrid = np.arange(5,200,30)
dropoutGrid = np.arange(0,1,0.10)
batchGrid = np.arange(50,400,50)

res = gridSearch(layersGrid,layerSizeGrid,dropoutGrid,batchGrid)

  1 x   5 Layers with 0.000000 dropout and batch size of  50 | 0.392815 @ 18.441103%
  1 x   5 Layers with 0.000000 dropout and batch size of 100 | 0.412536 @ 16.291910%
  1 x   5 Layers with 0.000000 dropout and batch size of 150 | 0.411545 @ 14.620942%
  1 x   5 Layers with 0.000000 dropout and batch size of 200 | 0.424532 @ 13.556138%
  1 x   5 Layers with 0.000000 dropout and batch size of 250 | 0.434791 @ 12.706046%
  1 x   5 Layers with 0.000000 dropout and batch size of 300 | 0.439519 @ 11.832256%
  1 x   5 Layers with 0.000000 dropout and batch size of 350 | 0.483606 @ 9.589988%
  1 x   5 Layers with 0.100000 dropout and batch size of  50 | 0.407524 @ 11.900760%
  1 x   5 Layers with 0.100000 dropout and batch size of 100 | 0.421880 @ 12.041523%
  1 x   5 Layers with 0.100000 dropout and batch size of 150 | 0.426762 @ 10.850315%
  1 x   5 Layers with 0.100000 dropout and batch size of 200 | 0.468927 @ 7.494878%
  1 x   5 Layers with 0.100000 dropout and batch size of 250 | 0.47

  1 x  35 Layers with 0.400000 dropout and batch size of  50 | 0.400410 @ 15.403421%
  1 x  35 Layers with 0.400000 dropout and batch size of 100 | 0.413086 @ 12.643614%
  1 x  35 Layers with 0.400000 dropout and batch size of 150 | 0.405049 @ 13.651195%
  1 x  35 Layers with 0.400000 dropout and batch size of 200 | 0.432729 @ 8.298724%
  1 x  35 Layers with 0.400000 dropout and batch size of 250 | 0.441281 @ 6.672898%
  1 x  35 Layers with 0.400000 dropout and batch size of 300 | 0.428403 @ 10.805376%
  1 x  35 Layers with 0.400000 dropout and batch size of 350 | 0.452865 @ 8.036481%
  1 x  35 Layers with 0.500000 dropout and batch size of  50 | 0.409717 @ 13.245347%
  1 x  35 Layers with 0.500000 dropout and batch size of 100 | 0.427299 @ 10.226480%
  1 x  35 Layers with 0.500000 dropout and batch size of 150 | 0.419957 @ 11.915401%
  1 x  35 Layers with 0.500000 dropout and batch size of 200 | 0.431920 @ 8.822403%
  1 x  35 Layers with 0.500000 dropout and batch size of 250 | 0.4361

  1 x  65 Layers with 0.700000 dropout and batch size of 350 | 0.499747 @ 1.438360%
  1 x  65 Layers with 0.800000 dropout and batch size of  50 | 0.430211 @ 8.245997%
  1 x  65 Layers with 0.800000 dropout and batch size of 100 | 0.446561 @ 5.600694%
  1 x  65 Layers with 0.800000 dropout and batch size of 150 | 0.453469 @ 4.726603%
  1 x  65 Layers with 0.800000 dropout and batch size of 200 | 0.459255 @ 3.319126%
  1 x  65 Layers with 0.800000 dropout and batch size of 250 | 0.458033 @ 4.622236%
  1 x  65 Layers with 0.800000 dropout and batch size of 300 | 0.472685 @ 3.010510%
  1 x  65 Layers with 0.800000 dropout and batch size of 350 | 0.499416 @ 0.047587%
  1 x  65 Layers with 0.900000 dropout and batch size of  50 | 0.479832 @ 1.598962%
  1 x  65 Layers with 0.900000 dropout and batch size of 100 | 0.504463 @ 4.433264%
  1 x  65 Layers with 0.900000 dropout and batch size of 150 | 0.506842 @ 3.812713%
  1 x  65 Layers with 0.900000 dropout and batch size of 200 | 0.516781 @ 6.

  1 x 125 Layers with 0.100000 dropout and batch size of 300 | 0.388067 @ 15.780122%
  1 x 125 Layers with 0.100000 dropout and batch size of 350 | 0.420619 @ 12.798477%
  1 x 125 Layers with 0.200000 dropout and batch size of  50 | 0.356542 @ 21.865169%
  1 x 125 Layers with 0.200000 dropout and batch size of 100 | 0.368077 @ 19.031383%
  1 x 125 Layers with 0.200000 dropout and batch size of 150 | 0.389780 @ 13.868122%
  1 x 125 Layers with 0.200000 dropout and batch size of 200 | 0.392351 @ 15.414750%
  1 x 125 Layers with 0.200000 dropout and batch size of 250 | 0.396463 @ 14.628902%
  1 x 125 Layers with 0.200000 dropout and batch size of 300 | 0.397212 @ 13.913770%
  1 x 125 Layers with 0.200000 dropout and batch size of 350 | 0.437295 @ 11.941641%
  1 x 125 Layers with 0.300000 dropout and batch size of  50 | 0.376884 @ 18.229900%
  1 x 125 Layers with 0.300000 dropout and batch size of 100 | 0.404931 @ 13.017036%
  1 x 125 Layers with 0.300000 dropout and batch size of 150 | 0.

In [1]:
def findBestParams_1(res):
    lowestLoss = 10000
    lowestDiff = 10000
    lowestRes = None

    for result in res:
        hist = res[result]
        loss = hist.history['loss'][-1]
        val_loss = hist.history['val_loss'][-1]
        diff = np.abs((1 - (loss/val_loss))*100)

        if(diff<lowestDiff and loss < lowestLoss):
            lowestDiff = diff
            lowestLoss = loss
            lowestRes = result
            
    return lowestRes

def findBestParams_2(res):
    lowestLoss = 10000
    lowestRes = None

    for result in res:
        hist = res[result]
        val_loss = hist.history['val_loss'][-1]

        if(val_loss < lowestLoss):
            lowestLoss = val_loss
            lowestRes = result
            
    return lowestRes

print(findBestParams_1(res))
print(findBestParams_2(res))

NameError: name 'res' is not defined

In [42]:
def learningCurves(hist):
    histAcc_train = hist.history['acc']
    histLoss_train = hist.history['loss']
    histAcc_validation = hist.history['val_acc']
    histLoss_validation = hist.history['val_loss']
    maxValAcc = np.max(histAcc_validation)
    minValLoss = np.min(histLoss_validation)

    plt.figure(figsize=(12,12))

    plt.plot(range(epochs),np.full(epochs,meanBaseline(trainDF)),label="Unbiased Estimator", color="red")

    plt.plot(range(epochs),histLoss_train, label="Training Loss", color="#acc6ef")
    plt.plot(range(epochs),histAcc_train, label="Training Accuracy", color = "#005ff9" )

    plt.plot(range(epochs),histLoss_validation, label="Validation Loss", color="#a7e295")
    plt.plot(range(epochs),histAcc_validation, label="Validation Accuracy",color="#3ddd0d")

    plt.scatter(np.argmax(histAcc_validation),maxValAcc,zorder=10,color="green")
    plt.scatter(np.argmin(histLoss_validation),minValLoss,zorder=10,color="green")

    plt.xlabel('Epochs',fontsize=14)
    plt.title("Learning Curves",fontsize=20)

    plt.legend()
    plt.show()

    print("Max validation accuracy: {0}".format(maxValAcc))
    print("Minimum validation loss: {0}".format(minValLoss))

def visGridSearch(layers,layerSize,dropout):
    learningCurves(res[(layers,layerSize,dropout,lossFn,optimizer,batch_size,epochs)])

interact(visGridSearch,layers=layersGrid,layerSize=layerSizeGrid,dropout=dropoutGrid)

<function __main__.visGridSearch>