In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (c) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/titanic/train.csv
/kaggle/input/titanic/test.csv
/kaggle/input/titanic/gender_submission.csv


In [2]:
import math
import numpy as np
import h5py
import matplotlib.pyplot as plt
from matplotlib.pyplot import imread
import scipy
from PIL import Image
import pandas as pd
import tensorflow as tf
import tensorflow.keras.layers as tfl
from tensorflow.python.framework import ops


%matplotlib inline
np.random.seed(1)

In [3]:
train = pd.read_csv('/kaggle/input/titanic/train.csv')
test = pd.read_csv('/kaggle/input/titanic/test.csv')

print(train.columns)
train['Embarked'].unique()

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


array(['S', 'C', 'Q', nan], dtype=object)

In [4]:
Y_train = train['Survived']
Y_train

0      0
1      1
2      1
3      1
4      0
      ..
886    0
887    1
888    0
889    1
890    0
Name: Survived, Length: 891, dtype: int64

In [5]:
def train_custom_linear_encoder(data):
    
    max_val = np.nanmax(data)
    min_val = np.nanmin(data)
    encoding_params = {'max_val':max_val,
                       'min_val':min_val}
    linear_encoded=(data-encoding_params['min_val'])/(
        encoding_params['max_val']-encoding_params['min_val'])
    
    linear_encoded = linear_encoded.values.reshape(-1,1)
    
    # Added a mask to deal with Nan Values
    mask=np.invert(np.isnan(linear_encoded))
    linear_encoded=np.nan_to_num(linear_encoded)
    
    # output
    return linear_encoded, mask, encoding_params

def test_custom_linear_encoder(data,encoding_params):
    linear_encoded=(data-encoding_params['min_val'])/(
        encoding_params['max_val']-encoding_params['min_val'])
    
    linear_encoded = linear_encoded.values.reshape(-1,1)
    
    # Added a mask to deal with Nan Values
    mask=np.invert(np.isnan(linear_encoded))
    linear_encoded=np.nan_to_num(linear_encoded)
    
    #output
    return linear_encoded, mask


array1, mask, encoding_params = train_custom_linear_encoder(train['Age'])
array2, mask = test_custom_linear_encoder(train['Age'], encoding_params)

assert np.all(np.equal(array1,array2,))

# integer encode
def train_custom_binary_encoder(data):
    unique_labels = data.unique()
    encoding_params = {}
    k=0
    for labels in unique_labels:
        encoding_params[labels] = k
        k=k+1
    one_hot = np.zeros((data.shape[0],len(encoding_params)))
    mask = np.ones((data.shape[0],len(encoding_params)))
    for i, row in enumerate(data):
        one_hot[i,encoding_params[row]] = 1
    return one_hot, mask, encoding_params

def test_custom_binary_encoder(data, encoding_params):
    one_hot = np.zeros((data.shape[0],len(encoding_params)))
    mask = np.ones((data.shape[0],len(encoding_params)))
    for i, row in enumerate(data):
        one_hot[i,encoding_params[row]] = 1
    return one_hot, mask

array1, mask, encoding_params = train_custom_binary_encoder(train['Embarked'])
array2, mask = test_custom_binary_encoder(train['Embarked'],encoding_params)
assert np.all(np.equal(array1,array2,))

In [7]:

to_encode = {'Pclass':'binary',
             'Embarked':'binary',
             'Sex':'binary',
            'Age':'linear',
            'Fare':'linear'}

def train_pre_processor(pd_csv,to_encode):
    """
    arguments
        inputs -- straight from the input
        
    returns
        outputs -- arguments to train
    """
    encoders = []
    encoded_data = []
    encoder_mask = []
    for encoder_key in to_encode.keys():
        encoder_type = to_encode[encoder_key]
        if encoder_type == 'binary':
            encoded_col, mask, encoding_params = train_custom_binary_encoder(pd_csv[encoder_key])
        elif encoder_type == 'linear':
            encoded_col, mask, encoding_params = train_custom_linear_encoder(pd_csv[encoder_key])
        encoders.append(encoding_params)
        encoded_data.append(encoded_col)
        encoder_mask.append(mask)

    inputs_processed= np.concatenate(encoded_data,axis=1)
    mask_processed = np.concatenate(encoder_mask,axis=1)
    output = np.array([inputs_processed, mask_processed])
    output = tf.transpose(output, perm=[1,0,2])
    
    return output, encoders

X_train, encoders = train_pre_processor(train, to_encode)
print(X_train.shape)


def test_pre_processor(pd_csv,encoders, to_encode):
    """
    arguments
        inputs -- straight from the input
        
    returns
        outputs -- arguments to train
    """
    encoded_data = []
    encoder_mask = []
    k=0
    for i,encoder_key in enumerate(to_encode.keys()):
        encoder_type = to_encode[encoder_key]
        encoding_params = encoders[i]
        if encoder_type == 'binary':
            encoded_col, mask = test_custom_binary_encoder(pd_csv[encoder_key],encoding_params)
        elif encoder_type == 'linear':
            encoded_col, mask = test_custom_linear_encoder(pd_csv[encoder_key],encoding_params)
        encoded_data.append(encoded_col)
        encoder_mask.append(mask)
        

    inputs_processed= np.concatenate(encoded_data,axis=1)
    mask_processed = np.concatenate(encoder_mask,axis=1)
    output = np.array([inputs_processed, mask_processed])
    output = tf.transpose(output, perm=[1,0,2])
    return output



X_test = test_pre_processor(test, encoders,to_encode)
print(X_test.shape)

(891, 2, 11)
(418, 2, 11)


In [11]:
def oneLayerNN():
    """
    Simple one layer neural network
    
    Arguments:
    None

    Returns:
    model -- TF Keras model 
    """
    model = tf.keras.Sequential([
            tfl.Input(shape=(2,11)),
            tfl.Flatten(),
            tfl.Dense(32, activation='relu'),
            tfl.Dense(16, activation='relu'),
            tfl.Dense(1, activation='sigmoid'),
        ])
    
    return model

In [12]:
one_layer_nn = oneLayerNN()
one_layer_nn.compile(optimizer='adam',
                   loss='binary_crossentropy',
                   metrics=['accuracy'])

In [13]:
one_layer_nn.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 22)                0         
                                                                 
 dense_3 (Dense)             (None, 32)                736       
                                                                 
 dense_4 (Dense)             (None, 16)                528       
                                                                 
 dense_5 (Dense)             (None, 1)                 17        
                                                                 
Total params: 1,281
Trainable params: 1,281
Non-trainable params: 0
_________________________________________________________________


In [14]:
one_layer_nn.fit(X_train, Y_train, epochs=10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7e8fdcea97e0>

In [16]:
X_test = test_pre_processor(test, encoders,to_encode)
X_test = one_layer_nn(X_test)
xresult = X_test>0.5
x_result= xresult.numpy()
x_result.shape

(418, 1)

In [17]:
pass_id = test['PassengerId'].to_numpy()
pass_col=pass_id.reshape(-1,1)
pass_col.shape

(418, 1)

In [18]:
data_result = np.concatenate((pass_col,x_result),axis=1)
data_result

array([[ 892,    0],
       [ 893,    1],
       [ 894,    0],
       [ 895,    0],
       [ 896,    1],
       [ 897,    0],
       [ 898,    1],
       [ 899,    0],
       [ 900,    1],
       [ 901,    0],
       [ 902,    0],
       [ 903,    0],
       [ 904,    1],
       [ 905,    0],
       [ 906,    1],
       [ 907,    1],
       [ 908,    0],
       [ 909,    0],
       [ 910,    1],
       [ 911,    1],
       [ 912,    0],
       [ 913,    0],
       [ 914,    1],
       [ 915,    0],
       [ 916,    1],
       [ 917,    0],
       [ 918,    1],
       [ 919,    0],
       [ 920,    0],
       [ 921,    0],
       [ 922,    0],
       [ 923,    0],
       [ 924,    1],
       [ 925,    0],
       [ 926,    0],
       [ 927,    0],
       [ 928,    0],
       [ 929,    1],
       [ 930,    0],
       [ 931,    0],
       [ 932,    0],
       [ 933,    0],
       [ 934,    0],
       [ 935,    1],
       [ 936,    1],
       [ 937,    0],
       [ 938,    0],
       [ 939,

In [19]:
submission = pd.DataFrame(data_result,columns=["PassengerId","Survived"])
submission.to_csv('/kaggle/working/submission.csv',index=False)