In [0]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
import tensorflow as tf
import tensorflow.compat.v2.feature_column as fc 

df_train = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
df_eval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
y_train = df_train.pop('survived')
y_eval = df_eval.pop('survived')

In [2]:
'''
get cat and nums using pandas, which is required further for encodings.
'''
import numpy as np

numeric_cols = df_train.select_dtypes(np.number).columns.tolist()
print (f' NUMERIC: {numeric_cols}')
categorical_cols = df_train.select_dtypes('object').columns.tolist()
print (f' CATEGORICAL: {categorical_cols}')

 NUMERIC: ['age', 'n_siblings_spouses', 'parch', 'fare']
 CATEGORICAL: ['sex', 'class', 'deck', 'embark_town', 'alone']


In [3]:
'''
create the feature vector by adding features manually.
'''

feature_cols = []

for col in categorical_cols:
  vocabulary = df_train[col].unique()
  print(f' {col} unique values: {vocabulary}')
  feature_cols.append(tf.feature_column.categorical_column_with_vocabulary_list(col, vocabulary))

for col in numeric_cols:
  feature_cols.append(tf.feature_column.numeric_column(col, dtype=tf.float32))

print(feature_cols)

 sex unique values: ['male' 'female']
 class unique values: ['Third' 'First' 'Second']
 deck unique values: ['unknown' 'C' 'G' 'A' 'B' 'D' 'F' 'E']
 embark_town unique values: ['Southampton' 'Cherbourg' 'Queenstown' 'unknown']
 alone unique values: ['n' 'y']
[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='embark_town', vocabulary_list=('Southampton', 'Cherbourg', 'Queenstown', 'unknown'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, default_value=-1, num_oov_buck

In [0]:
'''
input function creator
'''
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))
    if shuffle:
      ds = ds.shuffle(1000)
    ds = ds.batch(batch_size).repeat(num_epochs)
    return ds
  return input_function

train_input_fn = make_input_fn(df_train, y_train)
eval_input_fn = make_input_fn(df_eval, y_eval, num_epochs=1, shuffle=False)


In [7]:
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_cols)
linear_est.train(train_input_fn)
result = linear_est.evaluate(eval_input_fn)

clear_output()
print(result['accuracy'])
print(result)

0.75
{'accuracy': 0.75, 'accuracy_baseline': 0.625, 'auc': 0.834619, 'auc_precision_recall': 0.75909275, 'average_loss': 0.49368316, 'label/mean': 0.375, 'loss': 0.47662264, 'precision': 0.70886075, 'prediction/mean': 0.33172557, 'recall': 0.56565654, 'global_step': 200}


In [15]:
pred = list(linear_est.predict(eval_input_fn))
print(pred)
print(len(pred))
print(pred[0]['probabilities'][1])

INFO:tensorflow:Calling model_fn.


To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

INFO:tensorflow:Done calling model_fn.
INFO:tensorflow:Graph was finalized.
INFO:tensorflow:Restoring parameters from /tmp/tmp0a7siiqz/model.ckpt-200
INFO:tensorflow:Running local_init_op.
INFO:tensorflow:Done running local_init_op.
[{'logits': array([-3.3343773], dtype=float32), 'logistic': array([0.03441048], dtype=float32), 'probabilities': array([0.9655895 , 0.03441049], dtype=float32), 'class_ids': array([0]), 'classes': array([b'0'], dtype=object), 'all_class_ids': array([0, 1], dtype=int32), 'all_classes': array([b'0', b'1'], dtype=object)}, {'logits': array([-1.0205002], dtype=float32), 'logistic': array([0.26492995], dtype=float32), 'probabilities'