In [None]:
# Import dependencies
import numpy as np
import tensorflow as tf
import pandas as pd  # For playing with CSV files
import helper_functions.utils as utils  # Write any function here
from My_model import *  # To import the model class
from matplotlib import pyplot as plt  # To plot graphs
# I used it to split dataset into Train-Dev-Test sets
from sklearn import model_selection as skMS

In [None]:
# Import the data
train_df = pd.read_csv("dataset/train.csv")
test_df = pd.read_csv("dataset/test.csv")

In [None]:
# Change categorical symbols to numerical-like data
for df in [train_df, test_df]:
    
    df['Embarked'].replace({'S': 0, 'C': 1, 'Q': 2}, inplace=True)
    df['Sex'].replace({'male': 0, 'female': 1}, inplace=True)

# Replace NaN with mean values
train_df['Age'] = train_df['Age'].fillna(26.3)
test_df['Age'] = test_df['Age'].fillna(26.3)
train_df['Fare'] = train_df['Fare'].fillna(32.2)
test_df['Fare'] = test_df['Fare'].fillna(32.2)
train_df['Embarked'] = train_df['Embarked'].fillna(0)
test_df['Embarked'] = test_df['Embarked'].fillna(0)

In [None]:
# Study age and categorize it into bands
train_df.get(['Age','Survived']).plot.hist(by='Survived', bins=6)
train_df['AgeBand'] = pd.cut(train_df['Age'], 3)
print(train_df[['AgeBand', 'Survived']].groupby(['AgeBand'], as_index=False).mean().sort_values(by='AgeBand', ascending=True))

for df in [train_df, test_df]:
    
    df.loc[ df['Age'] <= 27, 'Age'] = 0
    df.loc[(df['Age'] > 27) & (df['Age'] <= 60), 'Age'] = 1
    df.loc[ df['Age'] > 60, 'Age'] = 2

print(train_df.head())



In [None]:
# Drop AgeBand column
train_df = train_df.drop(columns=['AgeBand'])

In [None]:
# Categorizing Fare
# Study Fare and categorize it into bands
train_df.get(['Fare','Survived']).plot.hist(by='Survived', bins=3)
train_df['FareBand'] = pd.cut(train_df['Fare'], 3)
print(train_df[['FareBand', 'Survived']].groupby(['FareBand'], as_index=False).mean().sort_values(by='FareBand', ascending=True))

for df in [train_df, test_df]:
    
    df.loc[ df['Fare'] <= 170, 'Fare'] = 0
    df.loc[(df['Fare'] > 170) & (df['Fare'] <= 340), 'Fare'] = 1
    df.loc[ df['Fare'] > 340, 'Fare'] = 2

print(train_df.head())

In [None]:
# Drop FareBand column
train_df = train_df.drop(columns=['FareBand'])

In [None]:
# Drop unused features

train_df = train_df.drop(columns=['PassengerId', 'Name', 'Cabin', 'Ticket'])
test_df = test_df.drop(columns=['PassengerId', 'Name', 'Cabin', 'Ticket'])

y = train_df['Survived'].values  # This is the outputs of the training data
train_df = train_df.drop(columns=['Survived'])  # Drop the outputs from the inputs
X = train_df.values  # Prepared inputs into numpy array

train_df.head()

In [None]:
# Splitting dataset into train_set and dev_set
X_train, X_dev, y_train, y_dev = skMS.train_test_split(X, y, test_size=0.2)
X_dev, X_test, y_dev, y_test = skMS.train_test_split(
    X_dev, y_dev, test_size=0.5)

In [None]:
model = My_Model()
print(model.summary())
model.compile(optimizer="adam",
              loss="mse",
              metrics=["mae"])


In [None]:
hist = model.fit(
    x=X_train,
    y=y_train,
    batch_size=None,
    epochs=200,
    verbose='auto',
    callbacks=None,
    validation_split=0.0,
    validation_data=(X_dev, y_dev),
    shuffle=True,
    class_weight=None,
    # sample_weight=weights_train,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_batch_size=None,
    validation_freq=1,
    max_queue_size=10,
    workers=1,
    use_multiprocessing=False
)

In [None]:
plt.plot(hist.history['loss'])
plt.plot(hist.history['mae'])
plt.plot(hist.history['val_loss'])
plt.plot(hist.history['val_mae'])
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train_loss', 'mae', 'val_loss', 'val_mae'], loc='upper left')
plt.show()

print(model.evaluate(X_test, y_test, verbose='auto', return_dict=True))

In [None]:
model.save(
    'Titanic_model',
    overwrite=True,
    include_optimizer=True,
    save_format=None,
    signatures=None,
    options=None,
    save_traces=True,
)

In [None]:
model = tf.keras.models.load_model('Titanic_model')
model.summary()
X_test_sub = test_df.values

Preds = model.predict(X_test_sub)
Predictions = []

for pred in range(0, len(Preds)):
    if (Preds[pred] >= 0.5):
        Predictions.append(1)
    else:
        Predictions.append(0)

data = {'PassengerId': df['PassengerId'].values, 'Survived': Predictions}
df_submission = pd.DataFrame(data=data)
df_submission.to_csv(path_or_buf='submission.csv', index=False)