In [None]:
print('Hello Kaggle')


# 1. Intuition

### This part gives you a glimpse of data, and you'll have a basic idea of what matters for surviving.
### Exactly, this part is call EDA( Exploratory data analysis), but I think Intuition better discribes this part.
### **Let's Start !**

In [None]:
from tensorflow import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout

import pandas as pd
import numpy as np
import seaborn as sns
from IPython.display import display

sns.set()


In [None]:
try:
    Raw = pd.read_csv(r'../Data/train.csv')
except:
    Raw = pd.read_csv(r'../input/titanic/train.csv')


Raw.head()


In [None]:
Raw.info()


## Some Draws

In [None]:
sns.displot(Raw, x='Pclass', hue='Survived')


### People from 'Pclass 1' are more likely to survive.
### People from 'Pclass 3' are more likely to die.
### **'Pclass' matters.**

In [None]:
sns.displot(Raw, x='Sex', hue='Survived')


### Male are more likely to survive.
### Female are more likely to die.
### **'Sex' matters.**


In [None]:
sns.displot(x='Age', hue='Survived', data=Raw, bins=50)


### Babies and children are more likely to survive.
### Others are more likely to die.
### **'Age' matters.**

In [None]:
sns.displot(x='SibSp', hue='Survived', data=Raw, bins=10)


### People with no distant kin are more likely to die.
### People with 1 distant kin are more likely to survive.
### **'SibSp' matters.**

In [None]:
sns.displot(data=Raw, x='Parch', hue='Survived', bins=8)


### People with no parent or child are more likely to die.
### People with one or two parents or children are more likely to survive.
### **'Parch' matters.**

In [None]:
sns.scatterplot(data=Raw, x='PassengerId', y='Fare', hue='Survived')


### Those who paid 500+￡ all survived.
### **'Fare' matters.**
### Label the 'Fare' attr at the cost of some information.

In [None]:
FareThreshold = [-float('inf'), 5, 10, 15, 20, 25, 40,
                 60, 100, 150, 300, float('inf')]
FareLabels = ['5', '10', '15', '20', '25',
              '40', '60', '100', '150', '300', '300+']
Duplicate = Raw.copy()
Duplicate['Fare'] = pd.cut(
    Duplicate['Fare'], bins=FareThreshold, labels=FareLabels)
sns.displot(data=Duplicate, x='Fare', hue='Survived')


In [None]:
sns.displot(data=Raw.dropna(subset=['Embarked']), x='Embarked', hue='Survived')


### **'Embarked' matters.**

### Too many 'Cabin' null values.
### **Ignore 'Cabin'**

### Too many 'Ticket' values, and hard to label them.
### **Ignore 'Ticket'**

# 2. Preprocessing

### This part preprocesses the data.

In [None]:
GenderFactorized = Raw
GenderFactorized['Sex'] = Raw['Sex'].replace(['male', 'female'], [0, 1])
GenderFactorized.head()


In [None]:
EmbarkedFactorized = GenderFactorized
EmbarkedFactorized['Embarked'] = GenderFactorized['Embarked'].replace(
    ['S', 'C', 'Q'], [0, 1, 2])
EmbarkedFactorized.head()


In [None]:
Feature = GenderFactorized[['Survived', 'Pclass',
                            'Sex', 'Age', 'SibSp', 'Parch', 'Fare','Embarked']]

Feature['Fare'].replace(np.nan, Feature['Fare'].median(), inplace=True)
Feature['Age'].replace(np.nan, Feature['Age'].median(), inplace=True)


Feature = Feature.dropna()

display(Feature.shape)
Feature.head()


# 3. Modeling and Fitting

### This part models and fits data.


#### Do not split the training data

def SplitDataFrame(df, frac, axis=0, reset_index=True) -> list:
    '''Split df according to frac.
    Return a list, which stores two parts of df'''

    if axis == 0:
        Threshold = int(df.shape[0]*frac)
        part1 = df.iloc[0: Threshold, :].reset_index(drop=True)
        part2 = df.iloc[Threshold:, :].reset_index(drop=True)
    elif axis == 1:
        Threshold = df.shape[1]*frac
        part1 = df.iloc[:, 0: Threshold].reset_index(drop=True)
        part2 = df.iloc[:, Threshold:].reset_index(drop=True)
    else:
        print('Key "axis" is "0" or "1"')
    return [part1, part2]


Splited = SplitDataFrame(Feature, 0.8)

display(Splited[0].shape)
display(Splited[0])

display(Splited[1].shape)
display(Splited[1])


TrainData = np.array(Splited[0].iloc[:, 1:])
TrainLabel = np.array(Splited[0].iloc[:, 0])
ValidationData = np.array(Splited[1].iloc[:, 1:])
ValidationLabel = np.array(Splited[1].iloc[:, 0])


display(TrainData.shape)
display(TrainLabel.shape)
display(ValidationData.shape)
display(ValidationLabel.shape)


In [None]:
TrainData = np.array(Feature.iloc[:, 1:])
TrainLabel = np.array(Feature.iloc[:, 0])
display(TrainData.shape)
display(TrainLabel.shape)


In [None]:
Model = Sequential()

Model.add(Dropout(0.2))
Model.add(Dense(16, activation='relu'))
Model.add(Dense(8, activation='relu'))
Model.add(Dense(1, activation='sigmoid'))
Model.compile(loss='binary_crossentropy', optimizer='adam',
              metrics=['accuracy'])
Epochs = 500
History = Model.fit(TrainData, TrainLabel, epochs=Epochs, verbose=0)
Acc = History.history['accuracy']
Loss = History.history['loss']

sns.scatterplot(x=range(Epochs), y=Acc)


Model.evaluate(ValidationData, ValidationLabel)


In [None]:
sns.scatterplot(x=range(Epochs), y=Loss)


# 4. Predict

### This part predicts the answer.


In [None]:
try:
    Test = pd.read_csv(r'../Data/test.csv')
except:
    Test = pd.read_csv(r'../input/titanic/test.csv')

Test['Embarked'] = Test['Embarked'].replace(['S', 'C', 'Q'], [0, 1, 2])
Test['Fare'] = Test['Fare'].replace(np.nan, Test['Fare'].mean())
Test['Age'] = Test['Age'].replace(np.nan, Test['Age'].mean())
Test['Sex'] = Test['Sex'].replace(['male', 'female'], [0, 1])

Test = Test[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']]


TestData = np.array(Test)
TestData.shape


In [None]:
Predict = Model.predict(TestData)
BinaryResult = (Predict > 0.5).astype(int)
BinaryResult.shape


In [None]:
BinaryResult = BinaryResult.reshape(418)


In [None]:
try:
    Result = pd.read_csv(r'../Data/gender_submission.csv')
except:
    Result = pd.read_csv(r'../input/titanic/gender_submission.csv')
Result['Survived'] = BinaryResult
Result


In [None]:
Result.to_csv(r'..\Submission.csv', index=False)
keras.backend.clear_session()
