In [None]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold

seed = 0

train = pd.read_csv("./dataset/train.csv") #Age, Cabin, Embarked  비어있음
test = pd.read_csv("./dataset/test.csv") #Age, Fare, Cabin 비어있음
titanic = train.append(test, ignore_index=True)

passengerId = test.PassengerId

train_idx = len(train)
test_idx = len(titanic) - len(test)

#First name정보로 Title mapping
titanic['Title'] = titanic.Name.apply(lambda x: x.split(',')[1].split('.')[0].strip())
fixed_titles = {
    "Capt":       "Special",
    "Col":        "Special",
    "Major":      "Special",
    "Jonkheer":   "Royal",
    "Don":        "Royal",
    "Sir" :       "Royal",
    "Dr":         "Special",
    "Rev":        "Special",
    "the Countess":"Royal",
    "Dona":       "Royal",
    "Mme":        "Mrs",
    "Mlle":       "Miss",
    "Ms":         "Mrs",
    "Mr" :        "Mr",
    "Mrs" :       "Mrs",
    "Miss" :      "Miss",
    "Master" :    "Mr",
    "Lady" :      "Royal"
}
titanic.Title = titanic.Title.map(fixed_titles)

#Title, Sex 기반으로 Age 채워주기
age_analysis = titanic.groupby(["Sex", "Title"])
age_analysis.Age.median()
titanic.Age = age_analysis.Age.apply(lambda x: x.fillna(x.median()))

#Fare값 채워주고 로그변환
titanic.Fare = titanic.Fare.fillna(titanic.Fare.median())
titanic.Fare = titanic.Fare.map(lambda i: np.log(i) if i > 0 else 0)

#더미변수
sex_dummies = pd.get_dummies(titanic.Sex, prefix="Sex")
pclass_dummies = pd.get_dummies(titanic.Pclass, prefix="Pclass")
title_dummies = pd.get_dummies(titanic.Title, prefix="Title")

#원하는 13개의 feature들만 titanic_dummies에 남기기
titanic_dummies = pd.concat([titanic, sex_dummies, pclass_dummies, title_dummies], axis=1)
titanic_dummies.drop(['PassengerId','SibSp','Parch','Sex','Title', 'Pclass','Cabin','Embarked', 'Name', 'Ticket'], axis=1, inplace=True)

train = titanic_dummies[:train_idx]
test = titanic_dummies[test_idx:]

X = train.drop('Survived', axis=1).values 
Y = train.Survived.values

X_test = test.drop('Survived', axis=1).values 

#Cross validation
skf = StratifiedKFold(n_splits = 10, shuffle = True, random_state = 5)
AC = []
for train, validation in skf.split(X, Y):
    model = Sequential()
    model.add(Dense(20, input_dim=12, activation="relu"))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(1, kernel_initializer="uniform", activation='sigmoid'))
    model.compile(loss = 'binary_crossentropy', optimizer = 'adam', metrics = ['accuracy'])
    model.fit(X[train], Y[train], epochs=110, batch_size=5)
    k_accuracy = '%.4f' % (model.evaluate(X[validation], Y[validation])[1])
    AC.append(k_accuracy)
model.summary()
print('\nK-fold_cross_validation_AC: {}'.format(AC))


prediction = model.predict(X_test)
last_result = []
for num in prediction:
    if num >= 0.5:
        last_result.append(1)
    else:
        last_result.append(0)
last_result
len(last_result)

result = pd.DataFrame({'PassengerId': passengerId, 'Survived': last_result})
result.to_csv('./dataset/Final_result.csv', index=False)
