# Setup

- Contains setup functions (inherited from data_exploration.ipynb)

In [None]:
def proto_wrap(feature_proto):
    features = feature_proto.dataset_parsing()
    def unpack(example_proto):
        parsed_features = tf.parse_single_example(example_proto, features)
        labels = parsed_features['Cover_Type']
        parsed_features.pop('Cover_Type')
        # Then, convert the dataset into tensors which tensorflow expects?
        parsed_features['Soil_Type'] = tf.convert_to_tensor(parsed_features['Soil_Type'])
        parsed_features['Wilderness_Area'] = tf.cast(tf.argmax(parsed_features['Wilderness_Area'], axis=0), dtype=tf.float32)
        labels = tf.one_hot(tf.cast(labels, dtype=tf.uint8), 8, on_value=1, off_value=0, axis=-1)

        return parsed_features, labels
    return unpack
 


def dataset_config(filenames: list, mapper=None, repeat=False, batch_size=32,
                  initializable=False, sess=None, feed_dict=None, num_cpus=None):
    dataset = tf.data.TFRecordDataset(filenames)
    
    if mapper is not None:
        dataset = dataset.map(mapper, num_parallel_calls=num_cpus)
        
    if repeat:
        dataset = dataset.repeat()
        
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=batch_size)
    
    if initializable:
        """
        An initializable iterator requires you to run an explicit iterator.initializer operation before using it. 
        In exchange for this inconvenience, it enables you to parameterize the definition of the dataset, 
        using one or more tf.placeholder() tensors that can be fed when you initialize the iterator
        """
        # Creates an Iterator for enumerating the elements of this dataset
        if sess is None:
            raise Exception('Initializable dataset configuration specified but session not supplied')
        iterator = dataset.make_initializable_iterator()
    else:
        """
        A one-shot iterator is the simplest form of iterator, which only supports iterating once through a dataset, 
        with no need for explicit initialization. One-shot iterators handle almost all of the cases that the existing 
        queue-based input pipelines support, but they do not support parameterization
        """
        iterator = dataset.make_one_shot_iterator()
        
    if initializable:
        assert feed_dict is not None, 'Supply feed dict to initializable iterator'
        sess.run(iterator.initializer, feed_dict=feed_dict)
    
    next_element = iterator.get_next()
    return next_element

In [None]:
filename_list = []
for dirname, dirnames, filenames in os.walk('processed_data/'):
    # print path to all subdirectories first.
    for f in filenames:
        filename_list.append('{}{}'.format(dirname, f))
print(filename_list)
dataset = tf.data.TFRecordDataset(filename_list)

num_cpus = os.cpu_count()
contextual_unpacker = proto_wrap(feature_proto)
training_dataset_next = dataset_config(filename_list, mapper=contextual_unpacker, num_cpus=num_cpus)

# Actual Code

In [None]:
dense_tensor = input_layer(features, columns)
for units in [256, 16, 8]:
    dense_tensor = tf.layers.dense(dense_tensor, units, tf.nn.relu)
prediction = tf.layers.dense(dense_tensor, 1, tf.nn.softmax)