# Preprocessing Data.

In [0]:
import pandas as pd
import numpy as np
import h5py
from sklearn.preprocessing import MinMaxScaler
from sklearn.impute import SimpleImputer

In [0]:
def cat2num_sex(val):
  if val == "male":
    return 1
  else:
    return 0

def cat2num_embark(val):
  if val == "C":
    return 2
  elif val == "Q":
    return 1
  else:
    return 0
  
df = pd.read_csv("train.csv")  

df = df.sample(frac=1).reset_index(drop=True)


# Missing Data
df['Age'] = df['Age'].fillna(value = df.Age.median(),inplace = False)

# Categorical to Numerical
df['Sex'] = df['Sex'].apply(cat2num_sex)
df['Embarked'] = df['Embarked'].apply(cat2num_embark)

# Normalize
scaler = MinMaxScaler()
df['Age'] = scaler.fit_transform(np.array(df['Age']).reshape(-1,1))
df['Fare'] = scaler.fit_transform(np.array(df['Fare']).reshape(-1,1))


# Coloumns
target_cols = ["Survived"]
feature_cols = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]

X_train = df[feature_cols]
y_train = df[target_cols]


train_size = int(len(y_train) * 0.80)
with h5py.File("dataset-v4.h5", 'w') as f:
  f.create_dataset("X_train", data=np.array(X_train[:train_size]))
  f.create_dataset('y_train', data=np.array(y_train[:train_size]))
  f.create_dataset("X_val", data=np.array(X_train[train_size:]))
  f.create_dataset("y_val", data=np.array(y_train[train_size:]))


**Build Model**

In [0]:
from keras.models import Sequential
from keras.layers import Dense, Activation, BatchNormalization, Dropout


model = Sequential()
# I layer
model.add(Dense(50,input_dim = 7))
model.add(Dropout(0.3))
model.add(BatchNormalization(momentum = 0.99))
model.add(Activation('relu'))
# II Layer
model.add(Dense(30))
model.add(Dropout(0.2))
model.add(BatchNormalization(momentum=0.99))
model.add(Activation('relu'))
# Final Layer
model.add(Dense(1))
model.add(Activation('sigmoid'))

**Train Model**

In [92]:
from keras.optimizers import Adam
from keras.callbacks import CSVLogger, ModelCheckpoint

adam = Adam(lr = 0.05, epsilon = 1e-8)
model.compile(loss = 'mean_squared_error',optimizer = adam, metrics = ['accuracy'])


with h5py.File(''.join(['dataset-v4.h5']), 'r') as hf:
      X_train = hf['X_train'].value
      y_train = hf['y_train'].value
      X_val = hf['X_val'].value
      y_val = hf['y_val'].value
      
model.fit(X_train,y_train,batch_size = 64, validation_data = (X_val,y_val), epochs = 1000,  callbacks=[
                  CSVLogger(
                      'logs.csv',
                      append=True
                  ),
                  ModelCheckpoint(
                      'model-ffn-{epoch:02d}-{val_acc:.2f}-{val_loss:.5f}.hdf5',
                      monitor='val_acc',
                      verbose=1,
                      mode='min'
                  )
              ]
)





Train on 712 samples, validate on 179 samples
Epoch 1/1000

Epoch 00001: saving model to model-ffn-01-0.83-0.12461.hdf5
Epoch 2/1000

Epoch 00002: saving model to model-ffn-02-0.82-0.12666.hdf5
Epoch 3/1000

Epoch 00003: saving model to model-ffn-03-0.83-0.12663.hdf5
Epoch 4/1000

Epoch 00004: saving model to model-ffn-04-0.84-0.12455.hdf5
Epoch 5/1000

Epoch 00005: saving model to model-ffn-05-0.82-0.12563.hdf5
Epoch 6/1000

Epoch 00006: saving model to model-ffn-06-0.80-0.12784.hdf5
Epoch 7/1000

Epoch 00007: saving model to model-ffn-07-0.82-0.12874.hdf5
Epoch 8/1000

Epoch 00008: saving model to model-ffn-08-0.82-0.13060.hdf5
Epoch 9/1000

Epoch 00009: saving model to model-ffn-09-0.80-0.13134.hdf5
Epoch 10/1000

Epoch 00010: saving model to model-ffn-10-0.83-0.13081.hdf5
Epoch 11/1000

Epoch 00011: saving model to model-ffn-11-0.83-0.13442.hdf5
Epoch 12/1000

Epoch 00012: saving model to model-ffn-12-0.84-0.12894.hdf5
Epoch 13/1000

Epoch 00013: saving model to model-ffn-13-0.83-0

<keras.callbacks.History at 0x7f94079d4780>

**Test Model**

In [0]:
df = pd.read_csv("test.csv")

# Missing Data
df['Age'] = df['Age'].fillna(value=df.Age.median())
df['Fare'] = df['Fare'].fillna(value=df.Fare.median())


# Categorical to Numerical
df['Sex'] = df['Sex'].apply(cat2num_sex)
df['Embarked'] = df['Embarked'].apply(cat2num_embark)

# Normalization
scaler = MinMaxScaler()
df['Age'] = scaler.fit_transform(np.array(df['Age']).reshape(-1, 1))
df['Fare'] = scaler.fit_transform(np.array(df['Fare']).reshape(-1, 1))

# Columns
features_cols = ["Pclass", "Sex", "Age", "SibSp", "Parch", "Fare", "Embarked"]
X_test = np.array(df[features_cols])

model.load_weights('model-ffn-999-0.83-0.13249.hdf5')

# inference
predicted = model.predict(X_test)

vals = np.round(predicted)
range = np.arange(892, 1310)

with open("output.csv", "w") as f:
    f.write("PassengerId,Survived\n")
    for x, y in zip(range, vals):
        f.write("{},{}\n".format(x, int(y[0])))
f.close()

