In [None]:
from __future__ import absolute_import, division, print_function, unicode_literals

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.display import clear_output

import tensorflow as tf

%matplotlib inline
plt.style.use("ggplot")
plt.rcParams["figure.figsize"] = (12,8)
plt.rcParams["figure.autolayout"] = True

In [None]:
# Load dataset.
dftrain = pd.read_csv('Data/train.csv') # training data
dfeval = pd.read_csv('Data/eval.csv') # testing data


In [None]:
dftrain.head()

In [None]:
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

y_train.head()
dftrain.head()

In [None]:
dftrain.describe()

In [None]:
dftrain.shape

In [None]:
dftrain.age.hist(bins=20)

In [None]:
dftrain.sex.value_counts().plot(kind='barh')

In [None]:
pd.concat([dftrain, y_train], axis=1).groupby('sex').survived.mean().plot(kind='barh').set_xlabel('% survive')

In [None]:
categorical_column =['sex', 'n_siblings_spouses', 'parch', 'class', 'deck',
                       'embark_town', 'alone']

numeric_columns = ['age', 'fare']

feature_columns = []

#converting categorical data into numeric data 
for feature_name in categorical_column:
    vocabulary = dftrain[feature_name].unique() #gets a list of all unique values from given feature
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in numeric_columns:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))


In [None]:
print(feature_columns)

In [None]:
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function():  # inner function, this will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df))  # create tf.data.Dataset object with data and its label
    if shuffle:
      ds = ds.shuffle(1000)  # randomize order of data
    ds = ds.batch(batch_size).repeat(num_epochs)  # split dataset into batches of 32 and repeat process for number of epochs
    return ds  # return a batch of the dataset
  return input_function  # return a function object for use

train_input_fn = make_input_fn(dftrain, y_train)  # here we will call the input_function that was returned to us to get a dataset object we can feed to the model
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)




In [None]:
#Creating Model
linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
linear_est.train(train_input_fn)  # train
result = linear_est.evaluate(eval_input_fn)  # get model metrics/stats by testing on tetsing data

clear_output()  # clears consoke output
print(result['accuracy'])  # the result variable is simply a dict of stats about our model

In [None]:
dfeval.shape

In [None]:
pred_dicts = list(linear_est.predict(eval_input_fn)) # Making list of all the predictions

print(len(pred_dicts))
print(pred_dicts)

In [None]:
print(pred_dicts[0]["probabilities"][1]) #Probability of Survival obtained from the model for person in index 0
print(y_eval.loc[0]) # Actual Survival Information
print(dfeval.loc[0]) # Predicted chances of death