# Titanic Tutorial

重要：
- 因为model.fit(features.as_matrix(), survived.as_matrix(), batch_size = 2, epochs = 20)需要numpy.array输入，而不是pandas.DataFrame，这里需要DataFrame.as_matrix()转换
- 因为使用了kernel_initializer = 'uniform'，导致报错：InternalError: Blas GEMM launch failed

Reference: 
1. https://www.kaggle.com/c/titanic#tutorials
2. https://www.kaggle.com/sinakhorami/titanic-best-working-classifier
3. https://www.kaggle.com/arthurtok/introduction-to-ensembling-stacking-in-python/notebook


## 1. Preprocess

### Import pkgs

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import MinMaxScaler
from IPython.display import display

%matplotlib inline

### Import original data as DataFrame

In [2]:
data_train = pd.read_csv('./input/train.csv')
data_test = pd.read_csv('./input/test.csv')

display(data_train.head(200))
display(data_test.head(20))
data_train.loc[2, 'Ticket']

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.0750,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S
5,897,3,"Svensson, Mr. Johan Cervin",male,14.0,0,0,7538,9.225,,S
6,898,3,"Connolly, Miss. Kate",female,30.0,0,0,330972,7.6292,,Q
7,899,2,"Caldwell, Mr. Albert Francis",male,26.0,1,1,248738,29.0,,S
8,900,3,"Abrahim, Mrs. Joseph (Sophie Halaut Easu)",female,18.0,0,0,2657,7.2292,,C
9,901,3,"Davies, Mr. John Samuel",male,21.0,2,0,A/4 48871,24.15,,S


'STON/O2. 3101282'

### Show columns of dataframe

In [3]:
data_train_original_col = data_train.columns
data_test_original_col = data_test.columns
print(data_train_original_col)
print(data_test_original_col)
# data_train0 = data_train.drop(data_train_original_col, axis = 1)
# data_test0  = data_test.drop(data_test_original_col, axis = 1)
# display(data_train0.head(2))
# display(data_test0.head(2))

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')
Index(['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch',
       'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')


### Preprocess features

In [4]:
full_data = [data_train, data_test]

In [5]:
# Pclass
for dataset in full_data:
    temp = dataset[dataset['Pclass'].isnull()]
    if len(temp) == 0:
        print('Do not have null value!')
    else:
        temp.head(2)
        
for dataset in full_data:
    dataset['a_Pclass'] = dataset['Pclass']
#     display(dataset.head())

Do not have null value!
Do not have null value!


In [6]:
# Name
for dataset in full_data:
    dataset['a_Name_Length'] = dataset['Name'].apply(len)
#     display(dataset.head(2))

In [7]:
# Sex
for dataset in full_data:
    dataset['a_Sex'] = dataset['Sex'].map({'female': 0, 'male': 1}).astype(int)
#     display(dataset.head(2))

In [8]:
# Age
for dataset in full_data:
    dataset['a_Age'] = dataset['Age'].fillna(-1)
    dataset['a_Have_Age'] = dataset['Age'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Age'].isnull()].head(2))
#     display(dataset.head(2))

In [9]:
# SibSp and Parch
for dataset in full_data:
    dataset['a_FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1
    dataset['a_IsAlone'] = dataset['a_FamilySize'].apply(lambda x: 1 if x<=1 else 0)
#     display(dataset.head(2))

In [10]:
# Ticket(Very one have a ticket)
for dataset in full_data:
    dataset['a_Have_Ticket'] = dataset['Ticket'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Ticket'].isnull()].head(2))
#     display(dataset.head(2))

In [11]:
# Fare
for dataset in full_data:
    dataset['a_Fare'] = dataset['Fare'].fillna(-1)
    dataset['a_Have_Fare'] = dataset['Fare'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Fare'].isnull()].head(2))
#     display(dataset.head(2))

In [12]:
# Cabin
for dataset in full_data:
    dataset['a_Have_Cabin'] = dataset['Cabin'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Cabin'].isnull()].head(2))
#     display(dataset.head(2))

In [13]:
# Embarked
for dataset in full_data:
#     dataset['Embarked'] = dataset['Embarked'].fillna('N')
    dataset['a_Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2, None: 3} ).astype(int)
    dataset['a_Have_Embarked'] = dataset['Embarked'].isnull().map({True: 0, False: 1}).astype(int)
#     display(dataset[dataset['Embarked'].isnull()].head(2))
#     display(dataset.head(2))

Name words segmentation and one-hote

In [14]:
# # Name words segmentation
# import re
# name_words = []

# # Inorder to allign columns of data_train and data_test, only data_train to fetch word
# for name in data_train['Name']:
# #     print(name)
#     words = re.findall(r"[\w']+", name)
# #     print(len(words))
# #     print(words)
#     for w in words:
#         if w not in name_words:
#             name_words.append(w)
# # print(len(name_words))
# name_words.sort()
# # print(name_words)

In [15]:
# # Add columns
# for dataset in full_data:
#     for w in name_words:
#         col_name = 'a_Name_' + w
#         dataset[col_name] = 0
#     dataset.head(1)

In [16]:
# # Name words one-hote
# for dataset in full_data:
#     for i, row in dataset.iterrows():
#     #     print(row['Name'])
#         words = re.findall(r"[\w']+", row['Name'])
#         for w in words:
#             if w in name_words:
#                 col_name = 'a_Name_' + w
#                 dataset.loc[i, col_name] = 1
# #     display(dataset[dataset['a_Name_Braund'] == 1])

Cabin segmentation and one-hote

In [17]:
# # Get cabin segmentation words
# import re
# cabin_words = []

# # Inorder to allign columns of data_train and data_test, only data_train to fetch number
# for c in data_train['Cabin']:
# #     print(c)
#     if c is not np.nan:
#         word = re.findall(r"[a-zA-Z]", c)
# #         print(words[0])
#         cabin_words.append(word[0])
# print(len(cabin_words))
# cabin_words.sort()
# print(np.unique(cabin_words))
# cabin_words_unique = list(np.unique(cabin_words))

In [18]:
# def get_cabin_word(cabin):
#     if cabin is not np.nan:
#         word = re.findall(r"[a-zA-Z]", cabin)
#         if word:
#             return cabin_words_unique.index(word[0])
#     return -1

# for dataset in full_data:
#     dataset['a_Cabin_Word'] = dataset['Cabin'].apply(get_cabin_word)
#     # dataset['a_Cabin_Word'].head(100)

In [19]:
# def get_cabin_number(cabin):
#     if cabin is not np.nan:
#         word = re.findall(r"[0-9]+", cabin)
#         if word:
#             return int(word[0])
#     return -1

# for dataset in full_data:
#     dataset['a_Cabin_Number'] = dataset['Cabin'].apply(get_cabin_number)
#     # dataset['a_Cabin_Number'].head(100)

In [20]:
# Clean data
# Reference: 
#    1. https://www.kaggle.com/sinakhorami/titanic-best-working-classifier
#    2. https://www.kaggle.com/arthurtok/introduction-to-ensembling-stacking-in-python/notebook
# full_data = [data_train, data_test]
# for dataset in full_data:
#     dataset['a_Name_length'] = dataset['Name'].apply(len)
#     #dataset['Sex'] = (dataset['Sex']=='male').astype(int)
#     dataset['a_Sex'] = dataset['Sex'].map( {'female': 0, 'male': 1} ).astype(int)
#     dataset['a_Age'] = dataset['Age'].fillna(0)
#     dataset['a_Age_IsNull'] = dataset['Age'].isnull()
#     dataset['a_FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1
#     dataset['a_IsAlone'] = dataset['a_FamilySize'].apply(lambda x: 1 if x<=1 else 0)
#     dataset['a_Fare'] = dataset['Fare'].fillna(dataset['Fare'].median())
#     #dataset['Has_Cabin'] = dataset['Cabin'].apply(lambda x: 1 if type(x) == str else 0) # same as below
#     dataset['a_Has_Cabin'] = dataset['Cabin'].apply(lambda x: 0 if type(x) == float else 1)
#     dataset['a_Has_Embarked'] = dataset['Embarked'].isnull()
#     dataset['Embarked'] = dataset['Embarked'].fillna('N')
#     dataset['a_Embarked'] = dataset['Embarked'].map( {'S': 0, 'C': 1, 'Q': 2, 'N': 3} ).astype(int)
#     dataset['Embarked'] = dataset['Embarked'].fillna('S')
    
# display(data_train.head(2))
# display(data_test.head(2))

In [21]:
survived = data_train['Survived']
data_train0 = data_train.drop(data_train_original_col, axis = 1)
data_test0  = data_test.drop(data_test_original_col, axis = 1)
display(data_train0.head(2))
display(data_test0.head(2))

features = data_train0
display(features.head(2))

Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_FamilySize,a_IsAlone,a_Have_Ticket,a_Fare,a_Have_Fare,a_Have_Cabin,a_Embarked,a_Have_Embarked
0,3,23,1,22.0,1,2,0,1,7.25,1,0,0,1
1,1,51,0,38.0,1,2,0,1,71.2833,1,1,1,1


Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_FamilySize,a_IsAlone,a_Have_Ticket,a_Fare,a_Have_Fare,a_Have_Cabin,a_Embarked,a_Have_Embarked
0,3,16,1,34.5,1,1,1,1,7.8292,1,0,2,1
1,3,32,0,47.0,1,2,0,1,7.0,1,0,0,1


Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_FamilySize,a_IsAlone,a_Have_Ticket,a_Fare,a_Have_Fare,a_Have_Cabin,a_Embarked,a_Have_Embarked
0,3,23,1,22.0,1,2,0,1,7.25,1,0,0,1
1,1,51,0,38.0,1,2,0,1,71.2833,1,1,1,1


Check and confirm all columns is proccessed

In [22]:
for col in features.columns:
    if not col.startswith('a_'):
        print(col)

In [23]:
# Shuffle and split the train_data into train, crossvalidation and testing subsets
X_train, X_val, y_train, y_val = train_test_split(features, survived, test_size=0.2)

In [24]:
# Show distribute of abave data sets
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)
display(X_train.head(2))
display(y_train.head(2))

(712, 13)
(179, 13)
(712,)
(179,)


Unnamed: 0,a_Pclass,a_Name_Length,a_Sex,a_Age,a_Have_Age,a_FamilySize,a_IsAlone,a_Have_Ticket,a_Fare,a_Have_Fare,a_Have_Cabin,a_Embarked,a_Have_Embarked
595,3,27,1,36.0,1,3,0,1,24.15,1,0,0,1
297,1,28,0,2.0,1,4,0,1,151.55,1,1,0,1


595    0
297    0
Name: Survived, dtype: int64

## 2. Build model

In [25]:
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

def my_GridSearchCV(model, parameters):
    if parameters:
        clf = GridSearchCV(model, parameters)
    else:
        clf = model
    clf.fit(X_train, y_train)
    return clf

def ny_train(models):
    clfs = {}
    for key, value in models.items():
        print('start ' + key)
        clf = my_GridSearchCV(value['model'], value['param'])
        clfs[key] = clf
    return clfs

def my_predict(clfs, X_val=X_val, y_val=y_val):
    for key, value in clfs.items():
        print(key, end=':  ')
        y_pred = value.predict(X_val)
        print(accuracy_score(y_val, y_pred))

In [26]:
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import RandomForestClassifier

models = {}
models = {
#     'SVC': {
#         'model': SVC(),
#         'param': {'kernel':('linear', 'rbf'), 'C':[1, 5, 10]}
#     },
    'GaussianNB':{
        'model': GaussianNB(),
        'param': {}
    },
    'AdaBoostClassifier':{
        'model': AdaBoostClassifier(),
        'param': {}
    },
    'RandomForestClassifier':{
        'model': RandomForestClassifier(),
        'param': {}
    }
}

clfs = ny_train(models)
my_predict(clfs)

start AdaBoostClassifier
start GaussianNB
start RandomForestClassifier
AdaBoostClassifier:  0.798882681564
GaussianNB:  0.748603351955
RandomForestClassifier:  0.821229050279


### Neuron network

In [27]:
from sklearn.metrics import confusion_matrix

from keras.utils.np_utils import to_categorical # convert to one-hot-encoding
from keras.models import Sequential
from keras.layers import Dense, Dropout, Input, Flatten, Conv2D, MaxPooling2D, BatchNormalization
from keras.optimizers import Adam
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import LearningRateScheduler, TensorBoard

Using TensorFlow backend.


In [28]:
# print(features.loc[689])
# print(survived.loc[689])
# print(features[0:5].index)
# print(dir(features))

In [29]:
x = features.as_matrix()
print(x.shape)
print(type(x))
print(x[0])

(891, 13)
<class 'numpy.ndarray'>
[  3.    23.     1.    22.     1.     2.     0.     1.     7.25   1.     0.
   0.     1.  ]


In [30]:
# Initialising the ANN
%pdb off
model = Sequential()

# layers
model.add(Dense(units = 512, kernel_initializer = 'uniform', activation = 'relu', input_dim = 13))
model.add(Dense(units = 512, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 512, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 512, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 512, kernel_initializer = 'uniform', activation = 'relu'))
model.add(Dense(units = 1, kernel_initializer = 'uniform', activation = 'sigmoid'))

# Compiling the ANN
model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])

# Train the ANN
model.fit(features.as_matrix(), survived.as_matrix(), batch_size = 2, epochs = 20)

Automatic pdb calling has been turned OFF
Epoch 1/20


InternalError: Blas GEMM launch failed : a.shape=(2, 13), b.shape=(13, 512), m=2, n=512, k=13
	 [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_recv_dense_1_input_0/_15, dense_1/kernel/read)]]
	 [[Node: metrics/acc/Mean_1/_49 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_401_metrics/acc/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'dense_1/MatMul', defined at:
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel\kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\zmq\eventloop\ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tornado\ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\zmq\eventloop\zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tornado\stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel\kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel\ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\ipykernel\zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\IPython\core\interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-6d5c2ec314e9>", line 6, in <module>
    model.add(Dense(units = 512, kernel_initializer = 'uniform', activation = 'relu', input_dim = 13))
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\keras\models.py", line 442, in add
    layer(x)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\keras\engine\topology.py", line 602, in __call__
    output = self.call(inputs, **kwargs)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\keras\layers\core.py", line 841, in call
    output = K.dot(inputs, self.kernel)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\keras\backend\tensorflow_backend.py", line 998, in dot
    out = tf.matmul(x, y)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\ops\math_ops.py", line 1801, in matmul
    a, b, transpose_a=transpose_a, transpose_b=transpose_b, name=name)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\ops\gen_math_ops.py", line 1263, in _mat_mul
    transpose_b=transpose_b, name=name)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 768, in apply_op
    op_def=op_def)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 2336, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "C:\Users\lenovo\Anaconda2\envs\tensorflow_gpu\lib\site-packages\tensorflow\python\framework\ops.py", line 1228, in __init__
    self._traceback = _extract_stack()

InternalError (see above for traceback): Blas GEMM launch failed : a.shape=(2, 13), b.shape=(13, 512), m=2, n=512, k=13
	 [[Node: dense_1/MatMul = MatMul[T=DT_FLOAT, transpose_a=false, transpose_b=false, _device="/job:localhost/replica:0/task:0/gpu:0"](_recv_dense_1_input_0/_15, dense_1/kernel/read)]]
	 [[Node: metrics/acc/Mean_1/_49 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_401_metrics/acc/Mean_1", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


In [None]:
# # For a binary classification problem
# model.compile(optimizer='rmsprop',
#               loss='binary_crossentropy',
#               metrics=['accuracy'])

In [None]:
# %%time
# print(X_train.shape)
# print(X_val.shape)
# print(y_train.shape)
# print(y_val.shape)
# hist = model.fit(X_train, y_train,
#                  epochs=10, 
#                  batch_size=32, #Increase this when not on Kaggle kernel
#                  verbose=1)

## 4. Predict and Export titanic_pred.csv file

In [None]:
train_cols = data_train.columns
for col in data_test0.columns:
    if col not in train_cols:
        print(col)

In [None]:
display(data_test0.head(2))
y_data_pred = model.predict(data_test0)
passenger_id = data_test['PassengerId']
output = pd.DataFrame( { 'PassengerId': passenger_id , 'Survived': y_data_pred })
output.to_csv( '.\\output\\titanic_pred.csv' , index = False )

In [None]:
print('Done!')