In [122]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import rcParams
import re
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import tensorflow as tf
import tensorflow.contrib.keras as keras
from sklearn.model_selection import train_test_split


np.random.seed(123)
tf.set_random_seed(123)




df_train = pd.read_csv("C:/Users/henry/Downloads/all/train.csv")
df_test = pd.read_csv("C:/Users/henry/Downloads/all/test.csv")

df_train['Title'] = df_train.Name.apply(lambda x: re.search(' ([A-Z][a-z]+)\.', x).group(1))
df_test['Title'] = df_test.Name.apply(lambda x: re.search(' ([A-Z][a-z]+)\.', x).group(1))

Title_Dictionary = {
        "Capt":       "Officer",
        "Col":        "Officer",
        "Major":      "Officer",
        "Dr":         "Officer",
        "Rev":        "Officer",
        "Jonkheer":   "Royalty",
        "Don":        "Royalty",
        "Sir" :       "Royalty",
        "the Countess":"Royalty",
        "Dona":       "Royalty",
        "Lady" :      "Royalty",
        "Mme":        "Mrs",
        "Ms":         "Mrs",
        "Mrs" :       "Mrs",
        "Mlle":       "Miss",
        "Miss" :      "Miss",
        "Mr" :        "Mr",
        "Master" :    "Master"
                   }
    
# we map each title to correct category
df_train['Title'] = df_train.Title.map(Title_Dictionary)
df_test['Title'] = df_test.Title.map(Title_Dictionary)
age_high_zero_died = df_train[(df_train["Age"] > 0) & 
                              (df_train["Survived"] == 0)]
age_high_zero_surv = df_train[(df_train["Age"] > 0) & 
                              (df_train["Survived"] == 1)]
age_group = df_train.groupby(["Sex","Pclass","Title"])["Age"]
df_train.loc[df_train.Age.isnull(), 'Age'] = df_train.groupby(['Sex','Pclass','Title']).Age.transform('median')


interval = (0, 5, 12, 18, 25, 35, 60, 120)
cats = ['babies', 'Children', 'Teen', 'Student', 'Young', 'Adult', 'Senior']

df_train["Age_cat"] = pd.cut(df_train.Age, interval, labels=cats)

df_train["Age_cat"].head()

interval = (0, 5, 12, 18, 25, 35, 60, 120)
cats = ['babies', 'Children', 'Teen', 'Student', 'Young', 'Adult', 'Senior']

df_test["Age_cat"] = pd.cut(df_test.Age, interval, labels=cats)

#Filling the NA's with -0.5
df_train.Fare = df_train.Fare.fillna(-0.5)

#intervals to categorize
quant = (-1, 0, 8, 15, 31, 600)

#Labels without input values
label_quants = ['NoInf', 'quart_1', 'quart_2', 'quart_3', 'quart_4']

#doing the cut in fare and puting in a new column
df_train["Fare_cat"] = pd.cut(df_train.Fare, quant, labels=label_quants)

df_test.Fare = df_test.Fare.fillna(-0.5)

quant = (-1, 0, 8, 15, 31, 1000)
label_quants = ['NoInf', 'quart_1', 'quart_2', 'quart_3', 'quart_4']

df_test["Fare_cat"] = pd.cut(df_test.Fare, quant, labels=label_quants)

del df_train["Fare"]
del df_train["Ticket"]
del df_train["Age"]
del df_train["Cabin"]
del df_train["Name"]

#same in df_test
del df_test["Fare"]
del df_test["Ticket"]
del df_test["Age"]
del df_test["Cabin"]
del df_test["Name"]
df_train["Embarked"] = df_train["Embarked"].fillna('S')


df_train["FSize"] = df_train["Parch"] + df_train["SibSp"] + 1

df_test["FSize"] = df_test["Parch"] + df_test["SibSp"] + 1

del df_train["SibSp"]
del df_train["Parch"]

del df_test["SibSp"]
del df_test["Parch"]

df_train = pd.get_dummies(df_train, columns=["Sex","Embarked","Age_cat","Fare_cat","Title"],\
                          prefix=["Sex","Emb","Age","Fare","Prefix"], drop_first=True)

df_test = pd.get_dummies(df_test, columns=["Sex","Embarked","Age_cat","Fare_cat","Title"],\
                         prefix=["Sex","Emb","Age","Fare","Prefix"], drop_first=True)
X_train = df_train.drop(["Survived","PassengerId"],axis=1)
y_train = df_train["Survived"]

TestID = df_test["PassengerId"]
X_test = df_test.drop(["PassengerId"],axis=1)

#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y)



model = keras.models.Sequential()


model.add(
    keras.layers.Dense(
        units=60,    
        input_dim=20,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh'))

model.add(keras.layers.Dropout(0.50))

model.add(
    keras.layers.Dense(
        units=90,    
        input_dim=60,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh'))

model.add(keras.layers.Dropout(0.50))

model.add(keras.layers.Dense(
        units=40,    
        input_dim=90,
        kernel_initializer='glorot_uniform',
        bias_initializer='zeros',
        activation='tanh'))

model.add(keras.layers.Dropout(0.50))

model.add(keras.layers.Dense(1,
                kernel_initializer='glorot_uniform',
                activation='sigmoid'))
model.summary()


sgd_optimizer = keras.optimizers.SGD(lr = 0.001, momentum = .99)

model.compile(loss='binary_crossentropy', optimizer=sgd_optimizer, metrics=['accuracy'])



model.fit(X_train, y_train, 
                    epochs=30, batch_size=10, verbose=2)
#scores = model.evaluate(X_train, y_train, batch_size=64)
#print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))

y_pred = model.predict_classes(X_test, verbose=0)
y_pred.resize(418,)

#dfOut = pd.DataFrame({"PassengerId" : TestID, "Survived" : y_pred})
#dfOut.to_csv("C:/Users/henry/Downloads/all/submission14.csv", index=False)
#print(dfOut)
"""

# list all data in history
print(history.history.keys())

plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()"""

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_418 (Dense)            (None, 60)                1260      
_________________________________________________________________
dropout_267 (Dropout)        (None, 60)                0         
_________________________________________________________________
dense_419 (Dense)            (None, 90)                5490      
_________________________________________________________________
dropout_268 (Dropout)        (None, 90)                0         
_________________________________________________________________
dense_420 (Dense)            (None, 40)                3640      
_________________________________________________________________
dropout_269 (Dropout)        (None, 40)                0         
_________________________________________________________________
dense_421 (Dense)            (None, 1)                 41        
Total para

"\n\n# list all data in history\nprint(history.history.keys())\n\nplt.plot(history.history['acc'])\nplt.plot(history.history['val_acc'])\nplt.title('Model Accuracy')\nplt.ylabel('Accuracy')\nplt.xlabel('Epoch')\nplt.legend(['train', 'test'], loc='upper left')\nplt.show()"