In [1]:
import tensorflow as tf
import csv
import os

print(tf.__version__)

  return f(*args, **kwds)


1.4.0


In [2]:
train_data_files = ['data/train-data.csv']
valid_data_files = ['data/valid-data.csv']
test_data_files = ['data/test-data.csv']

In [3]:
HEADER = ['key','x','y','alpha','beta','target']
HEADER_DEFAULTS = [[0], [0.0], [0.0], ['NA'], ['NA'], [0.0]]

NUMERIC_FEATURE_NAMES = ['x', 'y']  

CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY = {'alpha':['ax01', 'ax02'], 'beta':['bx01', 'bx02']}
CATEGORICAL_FEATURE_NAMES = list(CATEGORICAL_FEATURE_NAMES_WITH_VOCABULARY.keys())

FEATURE_NAMES = NUMERIC_FEATURE_NAMES + CATEGORICAL_FEATURE_NAMES

TARGET_NAME = 'target'

UNUSED_FEATURE_NAMES = list(set(HEADER) - set(FEATURE_NAMES) - {TARGET_NAME})

print("Header: {}".format(HEADER))
print("Numeric Features: {}".format(NUMERIC_FEATURE_NAMES))
print("Categorical Features: {}".format(CATEGORICAL_FEATURE_NAMES))
print("Target: {}".format(TARGET_NAME))
print("Unused Features: {}".format(UNUSED_FEATURE_NAMES))

Header: ['key', 'x', 'y', 'alpha', 'beta', 'target']
Numeric Features: ['x', 'y']
Categorical Features: ['alpha', 'beta']
Target: target
Unused Features: ['key']


In [4]:
def create_csv_iterator(csv_file_path, skip_header):
    
    with tf.gfile.Open(csv_file_path) as csv_file:
        reader = csv.reader(csv_file)
        if skip_header: # Skip the header
            next(reader)
        for row in reader:
            yield row

In [5]:
def create_example(row):
    """
    Returns a tensorflow.Example Protocol Buffer object.
    """
    example = tf.train.Example()

    for i in range(len(HEADER)):
        
        feature_name = HEADER[i]
        feature_value = row[i]
        
        if feature_name in UNUSED_FEATURE_NAMES:
            continue
 
        if feature_name in NUMERIC_FEATURE_NAMES:
            example.features.feature[feature_name].float_list.value.extend([float(feature_value)])
        
        elif feature_name in CATEGORICAL_FEATURE_NAMES:
            example.features.feature[feature_name].bytes_list.value.extend([bytes(feature_value, 'utf-8')])
            

        elif feature_name in TARGET_NAME:
            example.features.feature[feature_name].float_list.value.extend([float(feature_value)])

    return example

In [6]:
def create_tfrecords_file(input_csv_file):
    """
    Creates a TFRecords file for the given input data and
    example transofmration function
    """
    output_tfrecord_file = input_csv_file.replace("csv","tfrecords")
    writer = tf.python_io.TFRecordWriter(output_tfrecord_file)
    
    print("Creating TFRecords file at", output_tfrecord_file, "...")
    
    for i, row in enumerate(create_csv_iterator(input_csv_file, skip_header=False)):
        
        if len(row) == 0:
            continue
            
        example = create_example(row)
        content = example.SerializeToString()
        writer.write(content)
        
    writer.close()
    
    print("Finish Writing", output_tfrecord_file)

In [7]:
print("Converting Training Data Files")
for input_csv_file in train_data_files:
    create_tfrecords_file(input_csv_file)
print("")

print("Converting Validation Data Files")
for input_csv_file in valid_data_files:
    create_tfrecords_file(input_csv_file)
print("")

print("Converting Test Data Files")
for input_csv_file in test_data_files:
    create_tfrecords_file(input_csv_file)

Converting Training Data Files
Creating TFRecords file at data/train-data.tfrecords ...
Finish Writing data/train-data.tfrecords

Converting Validation Data Files
Creating TFRecords file at data/valid-data.tfrecords ...
Finish Writing data/valid-data.tfrecords

Converting Test Data Files
Creating TFRecords file at data/test-data.tfrecords ...
Finish Writing data/test-data.tfrecords
