In [1]:
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn import metrics
import pandas as pd

In [2]:
df = pd.read_csv('titanic.csv')
print(df.head())

   PassengerId  Survived  Pclass  \
0            1         0       3   
1            2         1       1   
2            3         1       3   
3            4         1       1   
4            5         0       3   

                                                Name     Sex   Age  SibSp  \
0                            Braund, Mr. Owen Harris    male  22.0      1   
1  Cumings, Mrs. John Bradley (Florence Briggs Th...  female  38.0      1   
2                             Heikkinen, Miss. Laina  female  26.0      0   
3       Futrelle, Mrs. Jacques Heath (Lily May Peel)  female  35.0      1   
4                           Allen, Mr. William Henry    male  35.0      0   

   Parch            Ticket     Fare Cabin Embarked  
0      0         A/5 21171   7.2500   NaN        S  
1      0          PC 17599  71.2833   C85        C  
2      0  STON/O2. 3101282   7.9250   NaN        S  
3      0            113803  53.1000  C123        S  
4      0            373450   8.0500   NaN        S  


In [3]:
train_df = df.drop(['PassengerId',"Name",'Ticket'],axis=1)

In [4]:
male_mean_age = train_df[train_df['Sex']=='male']['Age'].mean()
female_mean_age = train_df[train_df['Sex']=='female']['Age'].mean()
print(male_mean_age)
train_df.loc[(train_df['Sex']=='male') & (train_df['Age'].isnull()),'Age'] = male_mean_age
train_df.loc[(train_df['Sex']=='female') & (train_df['Age'].isnull()) ,'Age'] = female_mean_age

30.72664459161148


In [5]:
train_df['Cabin'] = train_df['Cabin'].fillna('X')
train_df['Embarked'] = train_df['Embarked'].fillna('S')

In [6]:
y = train_df.Survived
x = train_df.drop(['Survived'],axis=1)
print(y.head())
print(x.head())
print(x.isna().sum())

0    0
1    1
2    1
3    1
4    0
Name: Survived, dtype: int64
   Pclass     Sex   Age  SibSp  Parch     Fare Cabin Embarked
0       3    male  22.0      1      0   7.2500     X        S
1       1  female  38.0      1      0  71.2833   C85        C
2       3  female  26.0      0      0   7.9250     X        S
3       1  female  35.0      1      0  53.1000  C123        S
4       3    male  35.0      0      0   8.0500     X        S
Pclass      0
Sex         0
Age         0
SibSp       0
Parch       0
Fare        0
Cabin       0
Embarked    0
dtype: int64


In [13]:
X_train,X_test,y_train,y_test = train_test_split(x,y,train_size=0.75,random_state=1)
len(x['Cabin'].unique())

148

### Define Input

In [14]:
pclass_features = tf.feature_column.numeric_column('Pclass')
parch_feature = tf.feature_column.numeric_column('Parch')
fare_feature = tf.feature_column.numeric_column('Fare')
age_feature = tf.feature_column.numeric_column('Age')

#defining buckets for children, teens ..
age_bucket = tf.feature_column.bucketized_column(age_feature,[12,21,60])
#define categorical column with predefined values

sex_feature = tf.feature_column.categorical_column_with_vocabulary_list('Sex',['female','male'])

embarked_feature = tf.feature_column.categorical_column_with_hash_bucket('Embarked',3)
cabin_feature = tf.feature_column.categorical_column_with_hash_bucket('Cabin',100)

feature_columns = [pclass_features,age_feature,age_bucket,parch_feature,fare_feature,embarked_feature,cabin_feature]

## In case of DNN

In [15]:
embarked_embedding = tf.feature_column.embedding_column(categorical_column=embarked_feature,dimension=3)
cabin_embedding = tf.feature_column.embedding_column(categorical_column=cabin_feature,dimension=300)

In [16]:
feature_columns = [pclass_features,age_feature,age_bucket,parch_feature,fare_feature,embarked_embedding,cabin_embedding]

## Instantiate DNN Estimator

In [17]:
estimator = tf.estimator.DNNClassifier(feature_columns=feature_columns,hidden_units=[30,20,10])

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmpfo2_ie56', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': ClusterSpec({}), '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [18]:
# train input function

train_input_fn = tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_train,
                                                              y=y_train,
                                                              num_epochs= None,
                                                              shuffle=True,
                                                              target_column='target')




In [19]:
estimator.train(input_fn=train_input_fn,steps=1000)

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Running local

<tensorflow_estimator.python.estimator.canned.dnn.DNNClassifierV2 at 0x7f1dea975d30>

In [20]:
result = estimator.evaluate(tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_train,y=y_train,batch_size=10,num_epochs=1,shuffle=True))

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-07-19T20:36:17Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpfo2_ie56/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.92276s
INFO:tensorflow:Finished evaluation at 2020-07-19-20:36:18
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.7020958, accuracy_baseline = 0.63023955, auc = 0.7099445, auc_precision_recall = 0.6020121, average_loss = 0.59037673, global_step = 1000, label/mean = 0.36976048, loss = 0.5900658, precision = 0.6318681, pr

In [21]:
eval_input = tf.compat.v1.estimator.inputs.pandas_input_fn(x=X_test,y=y_test,batch_size=10,num_epochs=1,shuffle=False)
result=estimator.evaluate(eval_input)

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Starting evaluation at 2020-07-19T20:36:25Z
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmpfo2_ie56/model.ckpt-1000
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
INFO:tensorflow:Inference Time : 0.77198s
INFO:tensorflow:Finished evaluation at 2020-07-19-20:36:26
INFO:tensorflow:Saving dict for global step 1000: accuracy = 0.6367713, accuracy_baseline = 0.57399106, auc = 0.6747533, auc_precision_recall = 0.61011946, average_loss = 0.6424455, global_step = 1000, label/mean = 0.42600897, loss = 0.6501835, precision = 0.61290324, p