# Setup

- uses functions from [utils](https://github.com/IanQS/blogpostcode/blob/master/src/Tf_Exploration/exploration/utils.py)

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import tensorflow as tf

tf.logging.set_verbosity(tf.logging.ERROR)
print(tf.__version__)

#tf.enable_eager_execution()
import numpy as np
import os
import datetime
import tqdm
import sys
import pprint

1.12.0


In [3]:
from Tf_Exploration.exploration.utils import FeatureProto, dataset_config

In [12]:
filename_list = []
evaluation_list = []
for root, dirs, files in os.walk('processed_data/'):
    for f in files:
        path = os.path.join(root, f)
        if "tfrecord" in f:
            if "train" in f:
                filename_list.append(path)
            elif "test" in f:
                evaluation_list.append(path)
            else:
                print('Unmatched: {}'.format(path))
        
print(filename_list)

dataset = tf.data.TFRecordDataset(filename_list)

feature_proto = FeatureProto(one_hot=True)
num_cpus = os.cpu_count()

def train_func():
    return dataset_config(filenames=filename_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus)

def test_func():
    return dataset_config(filenames=evaluation_list, batch_size=4096, mapper=feature_proto.unpack, num_cpus=num_cpus)
columns = feature_proto.get_feature_columns()

['processed_data/covtype_train_1_2019-01-06_02:00:00.tfrecord', 'processed_data/covtype_train_3_2019-01-06_02:00:00.tfrecord', 'processed_data/covtype_train_0_2019-01-06_02:00:00.tfrecord', 'processed_data/covtype_train_2_2019-01-06_02:00:00.tfrecord']


# Custom Network

In [13]:
is_training = tf.placeholder_with_default(True, shape=(), name='Is_Training')
features, labels = tf.cond(is_training, train_func, test_func)

dense_tensor = tf.feature_column.input_layer(features=features, feature_columns=columns)
for units in [256, 16]:
    dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
logits = tf.layers.dense(dense_tensor, 8)

# Verification
correct_pred = tf.equal(tf.cast(logits, tf.int32), labels)
accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

# Training 
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=logits, labels=labels))
optimizer = tf.train.AdamOptimizer(learning_rate=0.1)
train_op = optimizer.minimize(loss_op)

In [14]:
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    keep_iterating = True

    i = 0
    print('Accuracy: {}'.format(sess.run(accuracy)))
    while keep_iterating:
        i += 1
        try:
            _, loss_val = sess.run([train_op, loss_op])
            if i % 1000 == 1:
                accuracy_value = sess.run(accuracy, feed_dict={is_training: False})
                print('Iteration: {}: Loss: {} Accuracy: {}'.format(i, loss_val, accuracy_value))
        except tf.errors.OutOfRangeError:
            print('Iteration: {}: Loss: {} Accuracy: {}'.format(i, loss_val, accuracy_value))
            print('Out of range')
            keep_iterating = False
        except Exception as e:
            print(e)
            keep_iterating = False

Accuracy: 0.0
Iteration: 1: Loss: 517.5015869140625 Accuracy: 0.0
Iteration: 1001: Loss: 1.0586555004119873 Accuracy: 0.48236083984375
Iteration: 2001: Loss: 1.135718584060669 Accuracy: 0.357940673828125
Iteration: 3001: Loss: 1.4994240999221802 Accuracy: 0.359619140625
Iteration: 4001: Loss: 1.1727173328399658 Accuracy: 0.360504150390625
Iteration: 5001: Loss: 1.1828272342681885 Accuracy: 0.482177734375
Iteration: 5701: Loss: 1.139478325843811 Accuracy: 0.482177734375
Out of range


# Canned Estimator

In [8]:
# Util functions
feature_proto = FeatureProto(one_hot=False)
num_cpus = os.cpu_count()

def input_fn_train(): # returns x, y (where y represents label's class index).
    return dataset_config(filenames=filename_list, batch_size=64, mapper=feature_proto.unpack, num_cpus=num_cpus)

def input_fn_eval(): # returns x, y (where y represents label's class index).
    return dataset_config(filenames=evaluation_list, batch_size=2048, mapper=feature_proto.unpack, num_cpus=num_cpus)


def evaluate(estimator):
    # Fit model.
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)
    estimator.train(input_fn=input_fn_train)
    fit = estimator.evaluate(input_fn=input_fn_eval)
    print(fit)

In [9]:
from tensorflow.estimator import DNNClassifier

estimator = DNNClassifier(
    feature_columns=columns,
    n_classes=8,
    hidden_units=[256, 16],
    optimizer=lambda: tf.train.AdamOptimizer(
        learning_rate=tf.train.exponential_decay(
            learning_rate=0.1,
            global_step=tf.train.get_global_step(),
            decay_steps=10000,
            decay_rate=0.96)
    )
)

evaluate(estimator)

{'accuracy': 0.30092165, 'average_loss': 263.777, 'loss': 537748.75, 'global_step': 0}
{'accuracy': 0.48942798, 'average_loss': 1.2048812, 'loss': 2456.33, 'global_step': 5701}


# Baseline Estimator

In [10]:
from tensorflow.estimator import BaselineClassifier

classifier = BaselineClassifier(n_classes=8)

evaluate(classifier)

{'accuracy': 0.0, 'average_loss': 2.079439, 'loss': 4239.246, 'global_step': 0}
{'accuracy': 0.48942798, 'average_loss': 1.2046915, 'loss': 2455.9434, 'global_step': 5701}
