# Tensorflow Estimator API 

### Introduction

In this project I show how to use the Tensorflow `sequential API within TF 2.0`. I will use the US Census Dataset and create a binary classification model. The data used has already been preprocessed, normalized and one-hot encoded. Data is stored within a Google Cloud Storage Bucket ready to be loaded into pandas dataframe or tensorflow dataset. 

### Import Libraries

In [35]:
from collections import namedtuple
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import datetime
import functools

In [36]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow import feature_column

In [37]:
tf.__version__

'2.0.0-beta0'

### Load Data from Google Cloud Storage

In [38]:
#Specify data location
x_train_data = 'gs://crazy-hippo-01/dataset/x_train.csv'
x_test_data = 'gs://crazy-hippo-01/dataset/x_test.csv'
y_train_data = 'gs://crazy-hippo-01/dataset/y_train.csv'
y_test_data = 'gs://crazy-hippo-01/dataset/y_test.csv'
x_val = 'gs://crazy-hippo-01/dataset/x_val.csv'
y_val = 'gs://crazy-hippo-01/dataset/y_val.csv'

In [39]:
#Read CSV files into a Pandas Dataframe
X_train = pd.read_csv(x_train_data)
y_train = pd.read_csv(y_train_data)
X_test = pd.read_csv(x_test_data)
y_test = pd.read_csv(y_test_data)
x_val = pd.read_csv(x_val)
y_val = pd.read_csv(y_val)

### Create Tensorflow Dataset

In [40]:
FEATURES = X_train.columns
NUMERIC_COLS = X_train.columns[2:]
CATEGORICAL_COLS = X_train.columns[:2]

In [41]:
def create_dataset(mode):
    
    #Specify data location
    x_train_data = 'gs://crazy-hippo-01/dataset/x_train.csv'
    x_test_data = 'gs://crazy-hippo-01/dataset/x_test.csv'
    y_train_data = 'gs://crazy-hippo-01/dataset/y_train.csv'
    y_test_data = 'gs://crazy-hippo-01/dataset/y_test.csv'
    x_val = 'gs://crazy-hippo-01/dataset/x_val.csv'
    y_val = 'gs://crazy-hippo-01/dataset/y_val.csv'
    
    #Read CSV files into a Pandas Dataframe
    X_train = pd.read_csv(x_train_data)
    y_train = pd.read_csv(y_train_data)
    X_test = pd.read_csv(x_test_data)
    y_test = pd.read_csv(y_test_data)
    x_val = pd.read_csv(x_val)
    y_val = pd.read_csv(y_val)
    
    #Input Parameters
    BATCH_SIZE = 32
    
    #Apply shuffle, batch and repeat to the training data. For evaluation we apply batch.
    if mode == tf.estimator.ModeKeys.TRAIN : 
        dataset = tf.data.Dataset.from_tensor_slices((dict(X_train), y_train.values))
        dataset = dataset.shuffle(1000).batch(BATCH_SIZE).repeat()
    else: 
        dataset = tf.data.Dataset.from_tensor_slices((dict(x_val), y_val.values))
        dataset = dataset.batch(32)
    
    # We take advantage of multi-threading; 1=AUTOTUNE
    dataset = dataset.prefetch(1)
    
    return dataset

### Build Model

In [42]:
def create_dnn_model(INPUTS, NUMERIC_COLS, CATEGORICAL_COLS):
    
    feature_columns = []
    
    # numeric cols
    for col in INPUTS:
        feature_columns.append(feature_column.numeric_column(col))
        
    classifier = tf.estimator.DNNClassifier(
        feature_columns=feature_columns,
        hidden_units=[32, 32],
        n_classes=2,
        model_dir="logs/fit/"
    )   
    
    return classifier

estimator_model = create_dnn_model(FEATURES, NUMERIC_COLS, CATEGORICAL_COLS)

print('Your model has been built....')
#func_model.summary()        

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_keep_checkpoint_max': 5, '_service': None, '_save_checkpoints_steps': None, '_device_fn': None, '_experimental_distribute': None, '_num_ps_replicas': 0, '_save_summary_steps': 100, '_is_chief': True, '_protocol': None, '_task_type': 'worker', '_task_id': 0, '_master': '', '_eval_distribute': None, '_train_distribute': None, '_experimental_max_worker_delay_secs': None, '_num_worker_replicas': 1, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7efd307a8f28>, '_save_checkpoints_secs': 600, '_tf_random_seed': None, '_model_dir': 'logs/fit/', '_keep_checkpoint_every_n_hours': 10000, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_log_step_count_steps': 100, '_global_id_in_cluster': 0, '_evaluation_master': ''}
Your model has been built....


### Train Model

In [43]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
history = tf.estimator.train_and_evaluate(
    estimator_model,
    train_spec=tf.estimator.TrainSpec(input_fn=create_dataset, max_steps=10000),
    eval_spec=tf.estimator.EvalSpec(input_fn=create_dataset)
)

### Evaluate Model

In [45]:
history

({'accuracy': 0.788125,
  'accuracy_baseline': 0.7640625,
  'auc': 0.82552505,
  'auc_precision_recall': 0.57387626,
  'average_loss': 0.42559308,
  'global_step': 10000,
  'label/mean': 0.2359375,
  'loss': 0.42559308,
  'precision': 0.7549669,
  'prediction/mean': 0.23787342,
  'recall': 0.15099338},
 [])

### Visualize Training

In [None]:
!rm -rf ./logs/ 

In [48]:
%load_ext tensorboard
%tensorboard --logdir logs

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6006 (pid 5027), started 0:00:31 ago. (Use '!kill 5027' to kill it.)