In [35]:
"""An Example of a DNNClassifier for the Iris dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

# Titanic Training and Test data set extraction

In [36]:
titanic_train_data =  pd.read_csv('titanic_train.csv')
titanic_train_data.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [37]:
print(titanic_train_data['Name'].size)
print(len(titanic_train_data['Name'].unique()))

print(titanic_train_data['Sex'].size)
print(len(titanic_train_data['Sex'].unique()))

print(titanic_train_data['Ticket'].size)
print(len(titanic_train_data['Ticket'].unique()))

print(titanic_train_data['Cabin'].size)
print(len(titanic_train_data['Cabin'].unique()))

print(titanic_train_data['Embarked'].size)
print(len(titanic_train_data['Embarked'].unique()))

891
891
891
2
891
681
891
148
891
4


In [38]:
#Remove non-important details from the dataset
#titanic_train_data = titanic_train_data.drop(['PassengerId','Name','Ticket'], axis=1)
titanic_train_data = titanic_train_data.drop(['PassengerId','Name','Sex','Ticket','Cabin','Embarked'], axis=1)
titanic_train_data.head(5)

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare
0,0,3,22.0,1,0,7.25
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.925
3,1,1,35.0,1,0,53.1
4,0,3,35.0,0,0,8.05


In [39]:
print('Size before filliing null values', titanic_train_data.shape)
titanic_train_data = titanic_train_data.fillna(titanic_train_data.mode().iloc[0])
print('Size after filling null values', titanic_train_data.shape)

#Get the features and Label for Train data
all_features =  titanic_train_data.drop(['Survived'], axis=1)
all_labels = titanic_train_data[['Survived']]

train_features, validation_features, train_labels, validation_labels = train_test_split(all_features, all_labels, test_size=0.33 )

print(train_features.head(5))
print(train_labels.head(5))

print(validation_features.head(5))
print(validation_labels.head(5))

print(train_features.shape, train_labels.shape, validation_features.shape, validation_labels.shape)


Size before filliing null values (891, 6)
Size after filling null values (891, 6)
     Pclass   Age  SibSp  Parch     Fare
296       3  23.5      0      0   7.2292
9         2  14.0      1      0  30.0708
784       3  25.0      0      0   7.0500
789       1  46.0      0      0  79.2000
723       2  50.0      0      0  13.0000
     Survived
296         0
9           1
784         0
789         0
723         0
     Pclass   Age  SibSp  Parch     Fare
495       3  24.0      0      0  14.4583
91        3  20.0      0      0   7.8542
502       3  24.0      0      0   7.6292
699       3  42.0      0      0   7.6500
656       3  24.0      0      0   7.8958
     Survived
495         0
91          0
502         0
699         0
656         0
(596, 5) (596, 1) (295, 5) (295, 1)


In [40]:
titanic_test_data =  pd.read_csv('titanic_test.csv')
titanic_test_data.head(5)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [41]:
print('Size before filling null values', titanic_test_data.shape)
titanic_test_data = titanic_test_data.fillna(titanic_test_data.mode().iloc[0])
print('Size after filling null values', titanic_test_data.shape)

#test_features = titanic_test_data.drop(['PassengerId','Name','Ticket'], axis=1)
test_features = titanic_test_data.drop(['PassengerId','Name','Sex','Ticket','Cabin','Embarked'], axis=1)

test_features.head(5)

Size before filling null values (418, 11)
Size after filling null values (418, 11)


Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
0,3,34.5,0,0,7.8292
1,3,47.0,1,0,7.0
2,2,62.0,0,0,9.6875
3,3,27.0,0,0,8.6625
4,3,22.0,1,1,12.2875


# Create the Neural Networks model and Train

In [45]:
def input_fn_train(features, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features),labels))
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    #dataset.batch(batch_size)
    return dataset

feature_set = []
for feature in train_features.keys():
    if feature in ['Sex','Cabin' ,'Embarked']:
        feature_set.append(tf.feature_column.categorical_column_with_vocabulary_list(key=feature, 
                                                                                    vocabulary_list= titanic_train_data[feature].unique()))
    else:
        feature_set.append(tf.feature_column.numeric_column(key=feature))
    
print(feature_set)

classifier = tf.estimator.DNNClassifier(hidden_units=[10, 10], feature_columns=feature_set, n_classes=2)
classifier.train(input_fn= lambda:input_fn_train(train_features, train_labels, 10), steps = 1000)

[_NumericColumn(key='Pclass', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='SibSp', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='Parch', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='Fare', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_save_summary_steps': 100, '_master': '', '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fe419b39198>, '_log_step_count_steps': 100, '_is_chief': True, '_keep_checkpoint_max': 5, '_task_id': 0, '_save_checkpoints_steps': None, '_num_worker_replicas': 1, '_tf_random_seed': None, '_task_type': 'worker', '_session_config': None, '_model_dir': '/tmp/tmpko9spbya', '_num_ps_replicas': 0,

INFO:tensorflow:step = 7101, loss = 6.5300674 (0.375 sec)
INFO:tensorflow:global_step/sec: 283.4
INFO:tensorflow:step = 7201, loss = 6.687435 (0.356 sec)
INFO:tensorflow:global_step/sec: 274.507
INFO:tensorflow:step = 7301, loss = 3.7713702 (0.364 sec)
INFO:tensorflow:global_step/sec: 271.318
INFO:tensorflow:step = 7401, loss = 3.1882653 (0.367 sec)
INFO:tensorflow:global_step/sec: 287.68
INFO:tensorflow:step = 7501, loss = 6.2009697 (0.343 sec)
INFO:tensorflow:global_step/sec: 280.847
INFO:tensorflow:step = 7601, loss = 5.601495 (0.362 sec)
INFO:tensorflow:global_step/sec: 272.011
INFO:tensorflow:step = 7701, loss = 5.0145493 (0.366 sec)
INFO:tensorflow:global_step/sec: 291.842
INFO:tensorflow:step = 7801, loss = 6.3181725 (0.338 sec)
INFO:tensorflow:global_step/sec: 285.301
INFO:tensorflow:step = 7901, loss = 4.375243 (0.351 sec)
INFO:tensorflow:global_step/sec: 273.073
INFO:tensorflow:step = 8001, loss = 6.353567 (0.366 sec)
INFO:tensorflow:global_step/sec: 294.347
INFO:tensorflow:s

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7fe440eeee80>

# Evaluation and Prediction

In [46]:
def evaluate_fn_train(features, labels, batch_size):
    if labels is None:
        temp = (dict(features))
    else:
        temp = (dict(features), labels)
    print(temp)
    dataset = tf.data.Dataset.from_tensor_slices(temp)
    dataset = dataset.batch(batch_size)
    return dataset

evaluation_result = classifier.evaluate(input_fn= lambda:evaluate_fn_train(features=validation_features, labels=validation_labels, batch_size=10))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**evaluation_result))

({'SibSp': 495    0
91     0
502    0
699    0
656    0
298    0
276    0
628    0
443    0
708    0
583    0
471    0
101    0
680    0
31     1
342    0
863    8
274    0
786    0
527    0
767    0
11     0
121    0
20     0
683    5
104    2
516    0
647    0
561    0
222    0
      ..
704    1
34     1
188    1
674    0
662    0
430    0
635    0
303    0
876    0
836    0
308    1
339    0
365    0
865    0
403    1
368    0
866    1
598    0
474    0
590    0
286    0
426    1
230    1
725    0
778    0
64     0
374    3
478    0
695    0
617    1
Name: SibSp, Length: 295, dtype: int64, 'Fare': 495     14.4583
91       7.8542
502      7.6292
699      7.6500
656      7.8958
298     30.5000
276      7.7500
628      7.8958
443     13.0000
708    151.5500
583     40.1250
471      8.6625
101      7.8958
680      8.1375
31     146.5208
342     13.0000
863     69.5500
274      7.7500
786      7.4958
527    221.7792
767      7.7500
11      26.5500
121      8.0500
20      26.0000
683     

In [47]:
predictions = classifier.predict(input_fn= lambda:evaluate_fn_train(features=test_features, labels=None, batch_size=10))

for predict in predictions:
    #predict['class_ids'][0]
    print(predict['class_ids'][0])
    print(predict['probabilities'][predict['class_ids'][0]])
    #break

{'SibSp': 0      0
1      1
2      0
3      0
4      1
5      0
6      0
7      1
8      0
9      2
10     0
11     0
12     1
13     1
14     1
15     1
16     0
17     0
18     1
19     0
20     1
21     0
22     0
23     0
24     1
25     1
26     0
27     0
28     0
29     2
      ..
388    0
389    3
390    0
391    0
392    0
393    0
394    3
395    1
396    0
397    1
398    0
399    0
400    0
401    1
402    0
403    0
404    1
405    0
406    1
407    1
408    0
409    1
410    0
411    1
412    0
413    0
414    0
415    0
416    0
417    1
Name: SibSp, Length: 418, dtype: int64, 'Fare': 0        7.8292
1        7.0000
2        9.6875
3        8.6625
4       12.2875
5        9.2250
6        7.6292
7       29.0000
8        7.2292
9       24.1500
10       7.8958
11      26.0000
12      82.2667
13      26.0000
14      61.1750
15      27.7208
16      12.3500
17       7.2250
18       7.9250
19       7.2250
20      59.4000
21       3.1708
22      31.6833
23      61.3792
24     26