# Setup

- Contains setup functions (inherited from data_exploration.ipynb)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)

#tf.enable_eager_execution()
import numpy as np
import os
import datetime
import tqdm
import sys
import pprint

In [3]:
from Tf_Exploration.exploration.utils import FeatureProto, dataset_config

In [4]:
filename_list = []
for dirname, dirnames, filenames in os.walk('processed_data/'):
    # print path to all subdirectories first.
    for f in filenames:
        filename_list.append('{}{}'.format(dirname, f))
print(filename_list)
dataset = tf.data.TFRecordDataset(filename_list)

feature_proto = FeatureProto()
num_cpus = os.cpu_count()
features, labels = dataset_config(filename_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus,
                                 repeat=True)
columns = feature_proto.get_feature_columns()

['processed_data/tf_record_covtype_test_2018-12-27 21:00:00', 'processed_data/tf_record_covtype_train_2018-12-27 21:00:00']


# Define the network

input_layer takes 2 args:

    features: 
    
        result of parsing the dataset (parse_example)
        
        dictionary

    feature_columns: 
        
        series of keys to lookup in the features dict
    
        list

In [5]:
dense_tensor = tf.feature_column.input_layer(features=features, feature_columns=columns)
use_custom = False  # Vs estimator

if use_custom:
    inputs = dense_tensor
    for units in [256, 16]:
        dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
    logits = tf.layers.dense(dense_tensor, 8)

    # Verification
    correct_pred = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

    # Training 
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))
    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
    train_op = optimizer.minimize(loss_op)
else:
    from tensorflow.estimator import DNNClassifier
    
#     estimator = DNNClassifier(
#         feature_columns=columns,
#         n_classes=8,
#         hidden_units=[256, 16, 8], logging_hook=logging_hook)
    
    estimator = DNNClassifier(
        feature_columns=columns,
        n_classes=8,
        hidden_units=[256, 16, 8],
        optimizer=lambda: tf.train.AdamOptimizer(
            learning_rate=tf.train.exponential_decay(
                learning_rate=0.1,
                global_step=tf.train.get_global_step(),
                decay_steps=10000,
                decay_rate=0.96)
        )
            
    )
    
    def input_fn_train(): # returns x, y (where y represents label's class index).
        return dataset_config(filename_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus)

    def input_fn_eval(): # returns x, y (where y represents label's class index).
        return dataset_config(filename_list, batch_size=2048, mapper=feature_proto.unpack, num_cpus=num_cpus)

    # Fit model.
    loss = estimator.evaluate(input_fn=input_fn_eval)
    print(loss)
    estimator.train(input_fn=input_fn_train)
    loss = estimator.evaluate(input_fn=input_fn_eval)
    print(loss)
    

    # Evaluate cross entropy between the test and train labels.
    loss = estimator.evaluate(input_fn=input_fn_eval)["loss"]

{'accuracy': 0.35716817, 'average_loss': 223.98567, 'loss': 458233.66, 'global_step': 0}
{'accuracy': 0.36460522, 'average_loss': 1.321195, 'loss': 2702.923, 'global_step': 9079}


In [7]:
####################################################
# Build BaselineClassifier
####################################################

from tensorflow.estimator import BaselineClassifier
classifier = BaselineClassifier(n_classes=8)

# Input builders
def input_fn_train(): # returns x, y (where y represents label's class index).
    return dataset_config(filename_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus)

def input_fn_eval(): # returns x, y (where y represents label's class index).
    return dataset_config(filename_list, batch_size=2048, mapper=feature_proto.unpack, num_cpus=num_cpus)

# Fit model.
loss = classifier.evaluate(input_fn=input_fn_eval)
print(loss)
classifier.train(input_fn=input_fn_train)
# Evaluate cross entropy between the test and train labels.
loss = classifier.evaluate(input_fn=input_fn_eval)
print(loss)

{'accuracy': 0.0, 'average_loss': 2.0794415, 'loss': 4254.1567, 'global_step': 0}
{'accuracy': 0.36460522, 'average_loss': 1.3038024, 'loss': 2667.3408, 'global_step': 9079}
