In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
def normalize_data(df):
  normalize_cols = ['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked']
  for col in normalize_cols:
    df[col] = (df[col] - df[col].mean()) / df[col].std()
  return df

In [None]:
def process_data(df):
  df = df.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

  df = df.dropna()

  df['Sex'] = df['Sex'].map(df['Sex'].value_counts())
  df['Embarked'] = df['Embarked'].map(df['Embarked'].value_counts())

  df = normalize_data(df)
  return df

In [None]:
df = pd.read_csv('data/train.csv')
df.head()

In [None]:
sns.heatmap(df.corr(), cmap='YlGnBu')
plt.show()

In [None]:
df = process_data(df)
df.head()

In [None]:
df.describe()

In [None]:
train, test = np.split(df.sample(frac=1), [int(.75*len(df))])
print(len(train), len(test))

In [None]:
train.head()

In [None]:
test.head()

## model

In [None]:
model = tf.keras.Sequential([
  tf.keras.layers.Dense(64, activation='relu', input_shape=(7,)),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid')
])
model.summary()

In [None]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(df.drop('Survived', axis=1), df['Survived'], epochs=100, validation_split=0.25)

In [None]:
plt.plot(history.epoch, history.history["val_loss"], 'g', label='Training loss')
plt.title('Training loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
model.evaluate(test.drop('Survived', axis=1), test['Survived'])
