# Performing Classification with Estimators API (tf.contrib.learn)

In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import os

from numpy import genfromtxt

%matplotlib inline

## 1. Load and prepare the data

In [2]:
current_dir = os.getcwd()
## Training data
train_dataset_path = os.path.join(os.getcwd(), os.pardir, 'data', 'small_higgs.csv')
higgs_train = genfromtxt(train_dataset_path, delimiter=',')
X_train = higgs_train[:,1:]
y_train = higgs_train[:,0]
del higgs_train

# Validation data
validation_dataset_path = os.path.join(os.getcwd(), os.pardir, 'data', 'validation_higgs.csv')
higgs_val = genfromtxt(validation_dataset_path, delimiter=',')
X_val = higgs_val[:,1:]
y_val = higgs_val[:,0]
del higgs_val

## 2. Write one or more dataset importing functions

In [3]:
feature_names = ['lepton_pT', 'lepton_eta', 'lepton_phi', 'missing_energy_magnitude',
                 'missing_energy_phi', 'jet_1_pt', 'jet_1_eta', 'jet_1_phi', 'jet_1_b_tag',
                 'jet_2_pt', 'jet_2_eta', 'jet_2_phi', 'jet_2_b_tag', 'jet_3_pt', 'jet_3_eta',
                 'jet_3_phi', 'jet_3_b_tag', 'jet_4_pt', 'jet_4_eta', 'jet_4_phi', 'jet_4_b_tag',
                  'm_jj', 'm_jjj', 'm_lv', 'm_jlv', 'm_bb', 'm_wbb', 'm_wwbb']

In [15]:
BATCH_SIZE = 128
N_EPOCHS = 400
## This is a dict containing names and the corresponding columns:
train_features_dict = {name: col for name, col in zip(feature_names, X_train.T)}

## Training input function
input_fn_train = tf.estimator.inputs.numpy_input_fn(
    x=train_features_dict,
    y=y_train,
    batch_size=BATCH_SIZE,
    num_epochs=N_EPOCHS,
    shuffle = True)

## Validation input function
val_features_dict = {name:col for name, col in zip(feature_names, X_val.T)}

input_fn_val = tf.estimator.inputs.numpy_input_fn(
    x=val_features_dict,
    y=y_val,
    num_epochs=1,
    shuffle = False)

## 3. Define the feature columns¶

Remember:
> when defining a feature column like: `tf.feature_column.numeric_column('feature_1')` the string `feature_1` must also be a key in the `train_features_dict` and `test_features_dict`

In [5]:
numeric_feature_cols = [tf.feature_column.numeric_column(col) for col in feature_names]

## 4. Instantiate the DNNClassifier Estimator

In [16]:
higgs_model = tf.estimator.DNNClassifier(
    feature_columns=numeric_feature_cols,
    hidden_units=[200, 200, 200],
    model_dir='./high_level_api_classification',
    dropout = 0.2, # In the last section we used keep_prob=0.8, hence dropout=1-keep_prob = 1-.8 = 0.2
    optimizer='Adagrad',
    activation_fn=tf.nn.elu)
# Loss is calculated by using softmax cross entropy.

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './high_level_api_classification', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x000001DC5FD72630>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## 5. Train the model

In [17]:
higgs_model.train(input_fn=input_fn_train)

INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ./high_level_api_classification\model.ckpt.
INFO:tensorflow:loss = 97.729, step = 1
INFO:tensorflow:global_step/sec: 137.565
INFO:tensorflow:loss = 93.2399, step = 101 (0.728 sec)
INFO:tensorflow:global_step/sec: 184.011
INFO:tensorflow:loss = 92.7844, step = 201 (0.543 sec)
INFO:tensorflow:global_step/sec: 187.47
INFO:tensorflow:loss = 93.4877, step = 301 (0.532 sec)
INFO:tensorflow:global_step/sec: 185.725
INFO:tensorflow:loss = 86.9227, step = 401 (0.539 sec)
INFO:tensorflow:global_step/sec: 172.85
INFO:tensorflow:loss = 85.7106, step = 501 (0.579 sec)
INFO:tensorflow:global_step/sec: 185.725
INFO:tensorflow:loss = 91.7221, step = 601 (0.538 sec)
INFO:tensorflow:global_step/sec: 173.15
INFO:tensorflow:loss = 90.2454, step = 701 (0.578 sec)
INFO:tensorflow:global_step/sec: 169.328
INFO:tensorflow:loss = 88.9947, step = 801 (0.591 sec)
INFO:tensorflow:global_step/sec: 183.672
INFO:tensorflow:loss

<tensorflow.python.estimator.canned.dnn.DNNClassifier at 0x1dc5fd72940>

## 6. Visualize/analyze the results of the model

In [20]:
higgs_model.evaluate(input_fn=input_fn_val)

INFO:tensorflow:Starting evaluation at 2018-01-12-14:40:39
INFO:tensorflow:Restoring parameters from ./high_level_api_classification\model.ckpt-1563
INFO:tensorflow:Finished evaluation at 2018-01-12-14:40:40
INFO:tensorflow:Saving dict for global step 1563: accuracy = 0.6374, accuracy_baseline = 0.535, auc = 0.694428, auc_precision_recall = 0.710696, average_loss = 0.62848, global_step = 1563, label/mean = 0.535, loss = 78.56, prediction/mean = 0.562754
