<a href="https://colab.research.google.com/github/SMBH-1/tbd/blob/main/ML_Project_Titanic_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))

!pip install -q sklearn

from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output
from six.moves import urllib
import tensorflow as tf

dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')
dftrain.head()
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

CATEGORICAL_COLUMNS = ['sex', 'n_siblings_spouses', 'parch', 'class', 'deck', 'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []

for feature_name in CATEGORICAL_COLUMNS:
  vocabulary = dftrain[feature_name].unique() #gets a list of all unique values from given feature column
  feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
  feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))

del feature_columns[1:3]
del feature_columns[3]
del feature_columns[5]

<IPython.core.display.Javascript object>

  Building wheel for sklearn (setup.py) ... [?25l[?25hdone


In [5]:
print(feature_columns)

[VocabularyListCategoricalColumn(key='sex', vocabulary_list=('male', 'female'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='class', vocabulary_list=('Third', 'First', 'Second'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='deck', vocabulary_list=('unknown', 'C', 'G', 'A', 'B', 'D', 'F', 'E'), dtype=tf.string, default_value=-1, num_oov_buckets=0), VocabularyListCategoricalColumn(key='alone', vocabulary_list=('n', 'y'), dtype=tf.string, default_value=-1, num_oov_buckets=0), NumericColumn(key='age', shape=(1,), default_value=None, dtype=tf.float32, normalizer_fn=None)]


In [6]:
def make_input_fn(data_df, label_df, num_epochs=1000, shuffle=True, batch_size=24):
  def input_function():   # inner function that will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) #create tf.data.Dataset object
    if shuffle:
      ds = ds.shuffle(1000) #randomize data order
    ds = ds.batch(batch_size).repeat(num_epochs) #split dataset into batches of 32 and repeat procuess num_epochs number of times
    return ds #return a batch of the dataset
  return input_function #return a function object for use

train_input_fn = make_input_fn(dftrain, y_train) #we call the input_function that was returned to us to get a dataset obj we can feed to the model
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)


linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)

linear_est.train(train_input_fn) #train
result = linear_est.evaluate(eval_input_fn) #get model metrics/stats by testing on test data

clear_output() #clears console output
print(result)
print(result['accuracy']) #the result variable is simply a dict of stats about our model

{'accuracy': 0.7689394, 'accuracy_baseline': 0.625, 'auc': 0.82868075, 'auc_precision_recall': 0.7856128, 'average_loss': 0.4889413, 'label/mean': 0.375, 'loss': 0.48894137, 'precision': 0.6759259, 'prediction/mean': 0.4027055, 'recall': 0.7373737, 'global_step': 27000}
0.7689394


In [7]:
result = list(linear_est.predict(eval_input_fn))

passenger = 7

print(dfeval.loc[passenger])
print(y_eval.loc[passenger])
print(result[passenger]['probabilities'][1])


sex                          male
age                          21.0
n_siblings_spouses              0
parch                           0
fare                         8.05
class                       Third
deck                      unknown
embark_town           Southampton
alone                           y
Name: 7, dtype: object
0
0.10999823
