In [61]:
"""An Example of a DNNClassifier for the Iris dataset."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import tensorflow as tf
import pandas as pd
from sklearn.model_selection import train_test_split

# Titanic Training and Test data set extraction

In [62]:
titanic_train_data =  pd.read_csv('titanic_train.csv')
titanic_train_data.head(5)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


In [63]:
#Remove all the non-numeric columns

titanic_train_data = titanic_train_data.drop(['PassengerId','Name','Sex','Ticket','Cabin','Embarked'], axis=1)
titanic_train_data.head(5)

Unnamed: 0,Survived,Pclass,Age,SibSp,Parch,Fare
0,0,3,22.0,1,0,7.25
1,1,1,38.0,1,0,71.2833
2,1,3,26.0,0,0,7.925
3,1,1,35.0,1,0,53.1
4,0,3,35.0,0,0,8.05


In [64]:
#titanic_train_data = titanic_train_data.fillna(value=0)
print('Size before removing null values', titanic_train_data.shape)
titanic_train_data = titanic_train_data.dropna()
print('Size after removing null values', titanic_train_data.shape)

#Get the features and Label for Train data
all_features =  titanic_train_data.drop(['Survived'], axis=1)
all_labels = titanic_train_data[['Survived']]

train_features, validation_features, train_labels, validation_labels = train_test_split(all_features, all_labels, test_size=0.33 )

print(train_features.head(5))
print(train_labels.head(5))

print(validation_features.head(5))
print(validation_labels.head(5))

print(train_features.shape, train_labels.shape, validation_features.shape, validation_labels.shape)


Size before removing null values (891, 6)
Size after removing null values (714, 6)
     Pclass   Age  SibSp  Parch     Fare
137       1  37.0      1      0  53.1000
496       1  54.0      1      0  78.2667
192       3  19.0      1      0   7.8542
320       3  22.0      0      0   7.2500
805       3  31.0      0      0   7.7750
     Survived
137         0
496         1
192         1
320         0
805         0
     Pclass   Age  SibSp  Parch     Fare
289       3  22.0      0      0   7.7500
18        3  31.0      1      0  18.0000
80        3  22.0      0      0   9.0000
871       1  47.0      1      1  52.5542
309       1  30.0      0      0  56.9292
     Survived
289         1
18          0
80          0
871         1
309         1
(478, 5) (478, 1) (236, 5) (236, 1)


In [65]:
titanic_test_data =  pd.read_csv('titanic_test.csv')
titanic_test_data.head(5)

Unnamed: 0,PassengerId,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,892,3,"Kelly, Mr. James",male,34.5,0,0,330911,7.8292,,Q
1,893,3,"Wilkes, Mrs. James (Ellen Needs)",female,47.0,1,0,363272,7.0,,S
2,894,2,"Myles, Mr. Thomas Francis",male,62.0,0,0,240276,9.6875,,Q
3,895,3,"Wirz, Mr. Albert",male,27.0,0,0,315154,8.6625,,S
4,896,3,"Hirvonen, Mrs. Alexander (Helga E Lindqvist)",female,22.0,1,1,3101298,12.2875,,S


In [66]:
print('Size before removing null values', titanic_test_data.shape)
titanic_test_data = titanic_test_data.dropna()
print('Size after removing null values', titanic_test_data.shape)

test_features = titanic_test_data.drop(['PassengerId','Name','Sex','Ticket','Cabin','Embarked'], axis=1)
test_features.head(5)

Size before removing null values (418, 11)
Size after removing null values (87, 11)


Unnamed: 0,Pclass,Age,SibSp,Parch,Fare
12,1,23.0,1,0,82.2667
14,1,47.0,1,0,61.175
24,1,48.0,1,3,262.375
26,1,22.0,0,1,61.9792
28,1,41.0,0,0,30.5


# Create the Neural Networks model and Train

In [73]:
def input_fn_train(features, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((dict(features),labels))
    dataset = dataset.shuffle(1000).repeat().batch(batch_size)
    #dataset.batch(batch_size)
    return dataset

feature_set = []
for feature in train_features.keys():
    feature_set.append(tf.feature_column.numeric_column(key=feature))
    
print(feature_set)

classifier = tf.estimator.DNNClassifier(hidden_units=[10, 10], feature_columns=feature_set, n_classes=2)
classifier.train(input_fn= lambda:input_fn_train(train_features, train_labels, 10), steps = 10000)

[_NumericColumn(key='Pclass', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='Age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='SibSp', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='Parch', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None), _NumericColumn(key='Fare', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]
INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_num_ps_replicas': 0, '_num_worker_replicas': 1, '_model_dir': '/tmp/tmp0ghy8d2x', '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f45bc5ac588>, '_save_checkpoints_secs': 600, '_master': '', '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_session_config': None, '_service': None, '_log_step_count_steps': 100, '_save_checkpoints_steps': None, '_is_chief': True, '_save_summary_steps': 100, '_keep_

INFO:tensorflow:step = 7101, loss = 4.5612707 (0.586 sec)
INFO:tensorflow:global_step/sec: 241.802
INFO:tensorflow:step = 7201, loss = 4.9945893 (0.402 sec)
INFO:tensorflow:global_step/sec: 263.056
INFO:tensorflow:step = 7301, loss = 7.3679495 (0.380 sec)
INFO:tensorflow:global_step/sec: 253.404
INFO:tensorflow:step = 7401, loss = 7.4090877 (0.395 sec)
INFO:tensorflow:global_step/sec: 252.187
INFO:tensorflow:step = 7501, loss = 5.304507 (0.395 sec)
INFO:tensorflow:global_step/sec: 255.079
INFO:tensorflow:step = 7601, loss = 5.3182 (0.392 sec)
INFO:tensorflow:global_step/sec: 262.959
INFO:tensorflow:step = 7701, loss = 4.0921545 (0.383 sec)
INFO:tensorflow:global_step/sec: 262.471
INFO:tensorflow:step = 7801, loss = 3.2083406 (0.380 sec)
INFO:tensorflow:global_step/sec: 240.107
INFO:tensorflow:step = 7901, loss = 7.0215373 (0.415 sec)
INFO:tensorflow:global_step/sec: 249.231
INFO:tensorflow:step = 8001, loss = 4.9236298 (0.402 sec)
INFO:tensorflow:global_step/sec: 262.307
INFO:tensorflo

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x7f457109d400>

# Evaluation and Prediction

In [77]:
def evaluate_fn_train(features, labels, batch_size):
    if labels is None:
        temp = (dict(features))
    else:
        temp = (dict(features), labels)
    print(temp)
    dataset = tf.data.Dataset.from_tensor_slices(temp)
    dataset = dataset.batch(batch_size)
    return dataset

evaluation_result = classifier.evaluate(input_fn= lambda:evaluate_fn_train(features=validation_features, labels=validation_labels, batch_size=10))

print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**evaluation_result))

({'Fare': 289      7.7500
18      18.0000
80       9.0000
871     52.5542
309     56.9292
767      7.7500
659    113.2750
729      7.9250
599     56.9292
150     12.5250
393    113.2750
291     91.0792
476     21.0000
535     26.2500
346     13.0000
252     26.5500
440     26.2500
872      5.0000
74      56.4958
734     13.0000
479     12.2875
673     13.0000
558     79.6500
58      27.7500
583     40.1250
41      21.0000
294      7.8958
761      7.1250
118    247.5208
75       7.6500
         ...   
188     15.5000
392      7.9250
44       7.8792
663      7.4958
327     13.0000
515     34.0208
59      46.9000
148     26.0000
325    135.6333
302      0.0000
197      8.4042
570     10.5000
890      7.7500
204      8.0500
725      8.6625
880     26.0000
630     30.0000
311    262.3750
345     13.0000
797      8.6833
803      8.5167
885     29.1250
883     10.5000
696      8.0500
232     13.5000
199     13.0000
509     56.4958
374     21.0750
708    151.5500
266     39.6875
Name: Fare, Le

In [79]:
predictions = classifier.predict(input_fn= lambda:evaluate_fn_train(features=test_features, labels=None, batch_size=10))

for predict in predictions:
    #predict['class_ids'][0]
    print(predict['class_ids'][0])
    print(predict['probabilities'][predict['class_ids'][0]])
    #break

{'Fare': 12      82.2667
14      61.1750
24     262.3750
26      61.9792
28      30.5000
34      57.7500
44      52.5542
46      29.7000
48      76.2917
50      60.0000
53     263.0000
57       7.6500
59     262.3750
64     262.3750
68      28.5375
69     263.0000
73      27.7208
74     211.5000
75     211.5000
77      25.7000
81     221.7792
92      52.0000
96      78.8500
100     55.4417
109     13.0000
112     31.6792
114    221.7792
117     16.7000
118     75.2417
122     57.7500
         ...   
293     81.8583
305     26.5500
306    151.5500
308     93.5000
314    135.6333
316    146.5208
321      7.2292
322     13.0000
326     39.0000
331     29.7000
335     26.0000
343    512.3292
350     63.3583
355     26.0000
356     51.4792
364     55.4417
371    134.5000
372      0.0000
374     81.8583
378     50.0000
390     93.5000
391     39.4000
395     60.0000
397     79.2000
400    164.8667
404     27.7208
405     13.8625
407    211.5000
411     90.0000
414    108.9000
Name: Fare, Len