In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Import library
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import train_test_split
from tensorflow import keras
from tensorflow.keras import layers

# EDA : Exploratory Data Analysis

In [None]:
df = pd.read_csv('/kaggle/input/titanic/train.csv')
df.head()

In [None]:
df_test = pd.read_csv('/kaggle/input/titanic/test.csv')
df_test.head()

In [None]:
df.drop(['Name', 'Ticket','Cabin', 'Embarked'], axis = 1, inplace = True)
df_test.drop(['Name', 'Ticket','Cabin', 'Embarked'], axis = 1, inplace = True)

# FEATURE ENGINEERING

In [None]:
df.Age = df.Age.fillna(df.Age.mean())
df_test.Age = df_test.Age.fillna(df_test.Age.mean())
df.info()

In [None]:
df.info()

In [None]:
#Adding new features : Family_size
df['Family_size'] = df['SibSp'] + df ['Parch'] + 1
df_test['Family_size'] = df_test['SibSp'] + df_test['Parch'] + 1
numerical_cols = ['Age', 'Pclass', 'Family_size']

#Standardizing numerical data 
sc_X = StandardScaler()
sc_X_train = sc_X.fit_transform(df[numerical_cols])
sc_X_test = sc_X.fit_transform(df_test[numerical_cols])
#Convert to table format - StandardScaler 
sc_X_train = pd.DataFrame(data=sc_X_train, columns=["Age", "Pclass","Family_size"])
sc_X_test = pd.DataFrame(data=sc_X_test, columns=["Age", "Pclass","Family_size"])


In [None]:
df.drop(['SibSp', 'Parch'], axis = 1)
df.replace(['male', 'female'], [-1,1], inplace = True)
df_test.drop(['SibSp', 'Parch'], axis = 1)
df_test.replace(['male', 'female'], [-1,1], inplace = True)

X = pd.concat([df.Sex, sc_X_train], axis=1)
y = df.Survived

X_test = pd.concat([df_test.Sex, sc_X_test], axis=1)
y_pred = []


X_train, X_valid, y_train, y_valid = train_test_split(X, y, test_size = 0.2, random_state = 420)

# Building model using Deep Learning with 3 layers

In [None]:
#Dropout and BatchNormalization to avoid overfitting
model = keras.Sequential([
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.3),
    layers.BatchNormalization(),
    layers.Dense(1, activation = 'sigmoid'),
])
    
model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['binary_accuracy'],
)
early_stopping = keras.callbacks.EarlyStopping(
    patience=25,
    min_delta=0.01,
    restore_best_weights=True,
)

#fitting the model into training dataset
history = model.fit(
    X_train, y_train,
    validation_data=(X_valid, y_valid),
    batch_size= 32,
    epochs=100,
    callbacks=[early_stopping],
    verbose=0, # hide the output because we have so many epochs
)

In [None]:
#Model Evaluation
history_df = pd.DataFrame(history.history)
# Start the plot at epoch 5
history_df.loc[:, ['loss', 'val_loss']].plot()
history_df.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot()

print(("Best Validation Loss: {:0.4f}" +\
      "\nBest Validation Accuracy: {:0.4f}")\
      .format(history_df['val_loss'].min(), 
              history_df['val_binary_accuracy'].max()))

In [None]:
#Input the model into test dataset
y_pred = model.predict(X_test).argmax(axis = 1)
output = pd.DataFrame({'PassengerId': df_test.PassengerId, 'Survived': y_pred})
output.to_csv('submission.csv', index=False)