In [None]:
import tensorflow as tf
import pandas as pd

# Load the training and testing data
dftrain = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/train.csv')
dfeval = pd.read_csv('https://storage.googleapis.com/tf-datasets/titanic/eval.csv')

# Remove the 'survived' column from the training and testing data
y_train = dftrain.pop('survived')
y_eval = dfeval.pop('survived')

CATEGORICAL_COLUMNS = ['sex', 'n_siblings', 'parch', 'class', 'deck', 'embark_town', 'alone']
NUMERIC_COLUMNS = ['age', 'fare']

feature_columns = []
for feature_name in CATEGORICAL_COLUMNS:
    vocabulary = dftrain[feature_name].unique() #gets a list of all unique values from given feaute column
    feature_columns.append(tf.feature_column.categorical_column_with_vocabulary_list(feature_name, vocabulary))

for feature_name in NUMERIC_COLUMNS:
    feature_columns.append(tf.feature_column.numeric_column(feature_name, dtype=tf.float32))
    
print(feature_columns)

#data_df -> Pandas Dataframe
#label_df -> label stands for y_train, eval
def make_input_fn(data_df, label_df, num_epochs=10, shuffle=True, batch_size=32):
  def input_function(): #inner function, this will be returned
    ds = tf.data.Dataset.from_tensor_slices((dict(data_df), label_df)) #create tf.data.Dataset object with data
    if shuffle:
      ds = ds.shuffle(1000) #randomize order of data
    ds = ds.batch(batch_size).repeat(num_epochs) #split dataset into batches of 32 and repeat process fir bo. of epochs
    return ds # returns a batch of dataset
  return input_function #return a function object for use


train_input_fn = make_input_fn(dftrain, y_train) #call the input_function that was returned to us to get a dataset
eval_input_fn = make_input_fn(dfeval, y_eval, num_epochs=1, shuffle=False)


linear_est = tf.estimator.LinearClassifier(feature_columns=feature_columns)
#We create a linear estimator by passing the feature columns we created earlier


#Training the Model
linear_est.train(train_input_fn) #train
result = linear_est.evaluate(eval_input_fn) #Get model metrics/stats by testing on testing data


clear_output() #clears console output
print(result['accuracy']) #the result variable is simple a dict of stats about our models
#Answer -> 0.7386364 (Not good in 1st)
print(result)

result = list(linear_est.predict(eval_input_fn)) #Predicting labels using test data
print(result)
print(result[0])
print(result[0]['probabilities'][1]) #Prints out probability of survival
print(result[0]['probabilities'][0]) #Prints out probability of not survival

print(dfeval.loc[0]) #Prints the details of the First Person
print(dfeval.loc[3])

print(y_eval[3])