# Score : 0.6268

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

### Load and Explore Data

In [2]:
titanic_full = pd.read_csv('./data/train.csv', index_col='PassengerId')
titanic_test = pd.read_csv('./data/test.csv', index_col='PassengerId')
titanic_full.head()

Unnamed: 0_level_0,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [3]:
titanic_full.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 891 entries, 1 to 891
Data columns (total 11 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Survived  891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   Name      891 non-null    object 
 3   Sex       891 non-null    object 
 4   Age       714 non-null    float64
 5   SibSp     891 non-null    int64  
 6   Parch     891 non-null    int64  
 7   Ticket    891 non-null    object 
 8   Fare      891 non-null    float64
 9   Cabin     204 non-null    object 
 10  Embarked  889 non-null    object 
dtypes: float64(2), int64(4), object(5)
memory usage: 83.5+ KB


In [4]:
titanic_full.isna().sum()

Survived      0
Pclass        0
Name          0
Sex           0
Age         177
SibSp         0
Parch         0
Ticket        0
Fare          0
Cabin       687
Embarked      2
dtype: int64

### Select Features

In [5]:
cols = ['Age', 'Sex', 'Pclass']

In [6]:
X = titanic_full[cols]
X.head()

Unnamed: 0_level_0,Age,Sex,Pclass
PassengerId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,22.0,male,3
2,38.0,female,1
3,26.0,female,3
4,35.0,female,1
5,35.0,male,3


### Select Target

In [7]:
y = titanic_full['Survived']
y.head()

PassengerId
1    0
2    1
3    1
4    1
5    0
Name: Survived, dtype: int64

### Preprocess Data

In [8]:
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder, MaxAbsScaler
from sklearn.compose import ColumnTransformer

In [9]:
transformer = Pipeline(steps=[
    ('imputer', SimpleImputer(strategy='mean')),
    ('scaler', MaxAbsScaler())
])
preprocessor = ColumnTransformer(transformers=[
    ('imputer', transformer, ['Age']),
    ('encoder', OrdinalEncoder(), ['Sex']),
], remainder='passthrough')

In [10]:
X = preprocessor.fit_transform(X)

### Split Data

In [11]:
from sklearn.model_selection import train_test_split

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

In [12]:
print("X train : ", X_train.shape)
print("X test  : ", X_val.shape)
print("\ny train : ", y_train.shape)
print("y test  : ", y_val.shape)

X train :  (712, 3)
X test  :  (179, 3)

y train :  (712,)
y test  :  (179,)


### Build Model using Neural Network

In [13]:
from tensorflow.keras import *

In [41]:
model = Sequential([
    layers.Dense(1, activation='sigmoid')
])

In [42]:
optimizer = optimizers.Adam(learning_rate=0.03)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['acc'])

In [43]:
early_stop = callbacks.EarlyStopping(monitor='val_acc', min_delta=1e4, patience=5, restore_best_weights=True)

### Grid Search

from sklearn.model_selection import GridSearchCV

params = {'model__': [100, 150, 200, 250], 'model__learning_rate': [0.01, 0.03, 0.05, 0.09]}
best_model = GridSearchCV(model, param_grid=params)
best_model.fit(X, y)

print("Best parmaters :") 
for k, v in best_model.best_params_.items():
    print(f"  {k} = {v}")
print("Best Score :", best_model.best_score_)

### Fit on training Data

In [44]:
model.fit(X_train, y_train,
          epochs=200, 
          validation_data=[X_val, y_val],
          callbacks=[early_stop],
          verbose=2)

Epoch 1/200


2022-03-01 08:22:45.945968: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.
2022-03-01 08:22:46.138323: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


23/23 - 0s - loss: 0.6443 - acc: 0.6180 - val_loss: 0.6046 - val_acc: 0.6257 - 343ms/epoch - 15ms/step
Epoch 2/200
23/23 - 0s - loss: 0.5714 - acc: 0.6742 - val_loss: 0.5673 - val_acc: 0.7095 - 99ms/epoch - 4ms/step
Epoch 3/200
23/23 - 0s - loss: 0.5319 - acc: 0.7781 - val_loss: 0.5438 - val_acc: 0.7765 - 96ms/epoch - 4ms/step
Epoch 4/200
23/23 - 0s - loss: 0.5094 - acc: 0.7865 - val_loss: 0.5299 - val_acc: 0.7598 - 95ms/epoch - 4ms/step
Epoch 5/200
23/23 - 0s - loss: 0.4965 - acc: 0.7935 - val_loss: 0.5260 - val_acc: 0.7598 - 98ms/epoch - 4ms/step
Epoch 6/200
23/23 - 0s - loss: 0.4880 - acc: 0.7935 - val_loss: 0.5201 - val_acc: 0.7598 - 100ms/epoch - 4ms/step


<keras.callbacks.History at 0x179bcc820>

### Predict on Test Data

In [45]:
X_test = preprocessor.transform(titanic_test)

In [46]:
predictions = (model.predict(X_test) > 0.5).ravel().astype(int)
predictions

2022-03-01 08:22:49.346019: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:112] Plugin optimizer for device_type GPU is enabled.


array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

### Create Submissions

In [47]:
submission = pd.DataFrame({'PassengerId': titanic_test.index, 'Survived': predictions})
submission.head()

Unnamed: 0,PassengerId,Survived
0,892,0
1,893,0
2,894,0
3,895,0
4,896,0


In [48]:
submission.to_csv('data/submission.csv', index=False)

---