In [1]:
import numpy as np
import pandas as pd
from pandas.plotting import scatter_matrix
import keras
from keras.models import Sequential
from keras.layers import Dense,MaxPooling2D,Flatten,Dropout
from keras.layers.convolutional import Conv2D
from keras import backend

In [2]:
input_neurons=10
output_neurons=1

In [3]:
np.random.seed(7)

In [4]:
train_df = pd.read_csv('train.csv')
test_df = pd.read_csv('test.csv')

In [5]:
def simplify_ages(df):
    df['Age'] = df['Age'].fillna(df.Age.mean())
    bins = (-1, 0, 5, 12, 18, 25, 35, 60, 120)
    group_names = ['Unknown', 'Baby', 'Child', 'Teenager', 'Student', 'Young Adult', 'Adult', 'Senior']
    categories = pd.cut(df['Age'], bins, labels=group_names)
    df['Age'] = categories.cat.codes 
    return df

def simplify_cabins(df):
    df['Cabin'] = df['Cabin'].fillna('N')
    df['Cabin'] = df['Cabin'].apply(lambda x: x[0])
    df['Cabin'] =  pd.Categorical(df['Cabin'])
    df['Cabin'] = df['Cabin'].cat.codes 
    return df

def simplify_fares(df):
    df['Fare'] = df.Fare.fillna(df.Fare.mean())
    bins = (-1, 0, 8, 15, 31, 1000)
    group_names = ['Unknown', 'First Quartile', 'Second Quartile',
                   'Third Quartile', 'Fourth Quartile']
    categories = pd.cut(df['Fare'], bins, labels=group_names)
    df['Fare'] = categories.cat.codes 
    return df

def simplify_sex(df):
    df['Sex'] = pd.Categorical(df['Sex'])
    df['Sex'] = df['Sex'].cat.codes 
    return df

def simplify_embarked(df):
    df['Embarked'] = df.Embarked.fillna(df.Embarked.mode()[0])
    df['Embarked'] = pd.Categorical(df['Embarked'])
    df['Embarked'] = df['Embarked'].cat.codes + 1
    return df

def normalize_titles(df):
    df['Title'] = df['Title'].replace('Mme', 'Mrs')
    df['Title'] = df['Title'].replace('Ms', 'Mrs')      
    df['Title'] = df['Title'].replace('Mrs', 'Mrs')
    df['Title'] = df['Title'].replace('Mlle', 'Miss')     
    df['Title'] = df['Title'].replace('Miss', 'Miss')
    df['Title'] = df['Title'].replace('Master', 'Master')
    df['Title'] = df['Title'].replace('Mr', 'Mr')
    df['Title'] = df['Title'].replace('Capt', 'Officer')
    df['Title'] = df['Title'].replace('Major', 'Officer')
    df['Title'] = df['Title'].replace('Dr', 'Officer')
    df['Title'] = df['Title'].replace('Col', 'Officer')
    df['Title'] = df['Title'].replace('Rev', 'Officer') 
    df['Title'] = df['Title'].replace('Jonkheer', 'Royalty')    
    df['Title'] = df['Title'].replace('Don', 'Royalty')
    df['Title'] = df['Title'].replace('Dona', 'Royalty')
    df['Title'] = df['Title'].replace('Countess', 'Royalty')
    df['Title'] = df['Title'].replace('Lady', 'Royalty')
    df['Title'] = df['Title'].replace('Sir', 'Royalty')
    return df

def simplify_titles(df):
    df['Title'] = df.Name.str.extract(' ([A-Za-z]+)\.', expand=False)
    df=normalize_titles(df)
    df['Title'] = pd.Categorical(df['Title'])
    df['Title'] = df['Title'].cat.codes + 1
    return df

def simplify_family_size_and_is_alone(df):
    df['FamilySize'] = df ['SibSp'] + df['Parch'] + 1
    df['IsAlone'] = 1
    df.loc[df['FamilySize'] > 1,'IsAlone'] = 0
    return df

def simplify_is_child(df):
    df['IsChild'] = 0
    df.loc[df['Age'] < 18,'IsChild'] = 1
    return df

def transform_features(df):
    df = simplify_titles(df)
    df= simplify_is_child(df)
    df = simplify_ages(df)
    df = simplify_cabins(df)
    df = simplify_family_size_and_is_alone(df)
    df = simplify_fares(df)
    df = simplify_sex(df)
    df = simplify_embarked(df)
    return df

In [6]:
train_df=transform_features(train_df)
test_df=transform_features(test_df)

In [7]:
# Train Data Frame
xtrain_df = train_df.drop(['PassengerId','Ticket','Survived','Name','Parch','SibSp'], axis=1)
ytrain_df = train_df['Survived']
# Test Data Frame 
xtest_df = test_df.drop(['PassengerId','Ticket','Name','Parch','SibSp'], axis=1)

In [8]:
model = Sequential()
model.add(Dense(32, input_dim=input_neurons, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(5, activation='relu'))
model.add(Dropout(0.1))
model.add(Dense(output_neurons, activation='sigmoid'))

In [9]:
model.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=['accuracy'])

In [10]:
model.fit(xtrain_df, ytrain_df,epochs=50, batch_size=1,verbose=0)


<keras.callbacks.History at 0x7f34d8fda310>

In [11]:
scores = model.evaluate(xtrain_df, ytrain_df)
print("\n%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))


accuracy: 82.27%


In [13]:
predictions = model.predict(xtest_df,verbose=0)
predictions=predictions.flatten()
results = pd.Series(predictions,name="Survived")
submission = pd.concat([pd.Series(range(892,1310),name = "PassengerId"),results],axis = 1)
submission.to_csv("titanic_datagen.csv",index=False)
# Clear error in tensorflow for session
backend.clear_session()

In [17]:
submission2 = submission.copy()

In [20]:
submission2.Survived = (submission2.Survived > 0.5).astype(int)

In [21]:
submission2.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0


In [14]:
submission.head()

Unnamed: 0,PassengerId,Survived
0,892,0.092666
1,893,0.20561
2,894,0.124794
3,895,0.112868
4,896,0.256776


In [23]:
submission2.to_csv("titanic.csv",index=False)