# Titanic in Keras

In [115]:
import pandas as pd
import numpy as np

test = pd.read_csv('../input/test.csv')
train = pd.read_csv('../input/train.csv')

In [116]:
# Split target and features
target = train['Survived']
features = train.drop(['Survived'], axis=1)

# Clean up / engineer features
def engineer(data):
    data = data.fillna(data['Age'].mean())
    
    embarked_mapping = { 'Q' : 1, 'C' : 2, 'S' : 3} # Southampton -> Cherbourg -> Queenstown (integer represents "time on board")
    data['Embarked'] = data['Embarked'].fillna('S')
    data['Embarked'] = data['Embarked'].apply(lambda x: embarked_mapping[x] if x in embarked_mapping else x)
    
    data.loc[train['Sex'] == 'female', 'Sex'] = 1
    data.loc[train['Sex'] == 'male', 'Sex'] = 0
    return data

In [118]:
# Select and prepare features for Keras (needs to be a Numpy Array)
features_list = ['Pclass', 'Age', 'Sex', 'Embarked', 'SibSp', 'Parch', 'Fare']
features = engineer(features)[features_list].as_matrix()

In [140]:
from keras.models import Sequential
from keras.layers import Dense

# Specify the model
model = Sequential()
model.add(Dense(100, activation='relu', input_shape=[features.shape[1],]))
model.add(Dense(100 ,activation='relu'))
model.add(Dense(100 ,activation='relu'))
model.add(Dense(100 ,activation='relu'))
model.add(Dense(100 ,activation='relu'))
model.add(Dense(100 ,activation='relu'))
model.add(Dense(1))
model.summary()

____________________________________________________________________________________________________
Layer (type)                     Output Shape          Param #     Connected to                     
dense_57 (Dense)                 (None, 100)           800         dense_input_15[0][0]             
____________________________________________________________________________________________________
dense_58 (Dense)                 (None, 100)           10100       dense_57[0][0]                   
____________________________________________________________________________________________________
dense_59 (Dense)                 (None, 100)           10100       dense_58[0][0]                   
____________________________________________________________________________________________________
dense_60 (Dense)                 (None, 100)           10100       dense_59[0][0]                   
___________________________________________________________________________________________

In [141]:
# Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['accuracy'])
# Fit
model.fit(features, target, validation_split=0.2)

Train on 712 samples, validate on 179 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x11e525cc0>

In [142]:
predictions = model.predict(engineer(test)[features_list].as_matrix())

In [143]:
predictions[predictions > .5] = 1
predictions[predictions <= .5] = 0

In [144]:
submission = pd.DataFrame(data={'PassengerId': test['PassengerId'], 'Survived': predictions[:,0].astype(int)})


# This is what we do if we don't use 'index=False' below
#submission.set_index('PassengerId', drop=True, inplace=True)
submission.to_csv("submission_keras.csv", index=False)