In [None]:
import tensorflow as tf

In [None]:
train_file_path = tf.keras.utils.get_file(
    "train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")

test_file_path = tf.keras.utils.get_file(
    "eval.csv", "https://storage.googleapis.com/tf-datasets/titanic/eval.csv")

def get_dataset(file_path, **kwargs):
  dataset = tf.data.experimental.make_csv_dataset(
      file_path,
      batch_size=5, # Artificially small to make examples easier to show.
      label_name='survived',
      na_value="?",
      num_epochs=1,
      ignore_errors=True,
      **kwargs
  )

  return dataset

raw_train_data = get_dataset(train_file_path)
raw_test_data = get_dataset(test_file_path)

# What happes after loading the CSV file?
def show_batch(dataset):
  for batch, label in dataset.take(1):
    for key, value in batch.items():
      print("{:20s}: {}".format(key, value.numpy()))

show_batch(get_dataset(train_file_path))
    

sex                 : [b'female' b'male' b'male' b'male' b'female']
age                 : [34. 40.  4. 39. 28.]
n_siblings_spouses  : [0 1 4 0 1]
parch               : [1 4 1 0 0]
fare                : [23.     27.9    29.125  13.     82.1708]
class               : [b'Second' b'Third' b'Third' b'Second' b'First']
deck                : [b'unknown' b'unknown' b'unknown' b'unknown' b'unknown']
embark_town         : [b'Southampton' b'Southampton' b'Queenstown' b'Southampton' b'Cherbourg']
alone               : [b'n' b'n' b'n' b'y' b'n']


In [None]:
# Getting data from named columns
CSV_COLUMNS = ['survived', 'sex', 'age', 'n_siblings_spouses', 'parch', 'fare', 'class', 'deck', 'embark_town', 'alone']

temp_dataset = get_dataset(train_file_path, column_names=CSV_COLUMNS)

show_batch(temp_dataset)

sex                 : [b'female' b'male' b'female' b'male' b'male']
age                 : [52. 18. 28. 28. 16.]
n_siblings_spouses  : [1 1 1 0 0]
parch               : [1 1 0 0 0]
fare                : [93.5    20.2125 24.15    7.75   10.5   ]
class               : [b'First' b'Third' b'Third' b'Third' b'Second']
deck                : [b'B' b'unknown' b'unknown' b'unknown' b'unknown']
embark_town         : [b'Southampton' b'Southampton' b'Queenstown' b'Queenstown' b'Southampton']
alone               : [b'n' b'n' b'n' b'y' b'y']


In [None]:
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'class', 'deck', 'alone']
temp_dataset = get_dataset(train_file_path, select_columns=SELECT_COLUMNS)
show_batch(temp_dataset)

age                 : [28. 23. 38.  2. 28.]
n_siblings_spouses  : [0 1 0 1 1]
class               : [b'Third' b'First' b'Third' b'Second' b'Third']
deck                : [b'unknown' b'D' b'unknown' b'unknown' b'unknown']
alone               : [b'y' b'n' b'y' b'n' b'n']


In [None]:
# Extracting features
SELECT_COLUMNS = ['survived', 'age', 'n_siblings_spouses', 'parch', 'fare']

DEFAULTS = [0, 0.0, 0.0, 0.0, 0.0]

temp_dataset = get_dataset(train_file_path,
                           select_columns=SELECT_COLUMNS,
                           column_defaults=DEFAULTS)

# Function that will pack together all the columns:
def pack(features, label):
  return tf.stack(list(features.values()), axis=-1), label

packed_dataset = temp_dataset.map(pack)


In [None]:
# Packing numeric features

NUMERIC_FEATURES = ['age', 'n_siblings_spouses', 'parch', 'fare']

class PackNumericFeatures(object):
  def __init__(self, names):
    self.names = names
  
  def __call__(self, features, labels):
    numeric_features = [features.pop(name) for name in self.names]
    numeric_features = [tf.cast(feat, tf.float32) for feat in numeric_features]
    numeric_features = tf.stack(numeric_features, axis=-1)
    features['numeric'] = numeric_features 

    return features, labels
  
packed_train_data = raw_train_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

packed_test_data = raw_test_data.map(
    PackNumericFeatures(NUMERIC_FEATURES))

show_batch(packed_train_data)

sex                 : [b'male' b'male' b'male' b'male' b'male']
class               : [b'Third' b'Second' b'Third' b'First' b'First']
deck                : [b'unknown' b'unknown' b'unknown' b'unknown' b'unknown']
embark_town         : [b'Southampton' b'Southampton' b'Southampton' b'Southampton'
 b'Southampton']
alone               : [b'n' b'y' b'y' b'n' b'y']
numeric             : [[ 28.     8.     2.    69.55]
 [ 28.     0.     0.     0.  ]
 [ 42.     0.     0.     7.55]
 [ 50.     2.     0.   133.65]
 [ 56.     0.     0.    26.55]]


In [None]:
import pandas as pd
import numpy as np
import functools

In [None]:
# Normalizing features
def normalize_numeric_data(data, mean, std):
  # Center the data
  return (data - mean) / std

desc = pd.read_csv(train_file_path)[NUMERIC_FEATURES].describe()

MEAN, STD = np.array(desc.T['mean']), np.array(desc.T['std'])

normalizer = functools.partial(normalize_numeric_data,
                               mean = MEAN,
                               std = STD)

numeric_columns = tf.feature_column.numeric_column(
    'numeric',
    normalizer_fn=normalizer,
    shape=[len(NUMERIC_FEATURES)]
)

# Now for the categorical features

In [None]:
CATEGORIES = {
    'sex': ['male', 'female'],
    'class': ['First', 'Second', 'Third'],
    'deck': ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J'],
    'embark_town': ['Cherbourg', 'Southhampton', 'Queenstown'],
    'alone': ['y', 'n']
}

cat_feature_col = tf.feature_column.categorical_column_with_vocabulary_list(
    key='class',
    vocabulary_list=['First', 'Second', 'Third'])

categorical_columns = tf.feature_column.indicator_column(cat_feature_col)

# Training the model

In [None]:
dense_features = tf.keras.layers.DenseFeatures(categorical_columns + numeric_columns)

model = tf.keras.Sequential([
  dense_features, 
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dense(1, activation='sigmoid'),
])

model.compile(
    loss='binary_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

model.fit(packed_train_data, epochs=20)

ValueError: ignored