## Import libraries and enable Eager execution mode

In [2]:
import tensorflow as tf
import numpy as np

tf.enable_eager_execution()

In [None]:
!head -5 adult.data

In [None]:
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

## Function `read_csv_2_dataset` : Input csv file path, and return a dataset whose element is  a dictionary of features.

In [None]:
def read_csv_2_dataset(csv_file):
    def _parse_line(line):
        RECORD_DEFAULTS = [0,'A']*3+ ['A']*4+[0]*3+['A']*2
        COLUMNS = ['age','workclass','fnlwgt','education','education-num','marital-status',
           'occupation','relationship','race','sex','capital-gain','capital-loss',
           'hours-per-week','native-country','label']
        # Decode the line into its fields
        fields = tf.decode_csv(line, RECORD_DEFAULTS)
        # Pack the result into a dictionary
        features = dict(zip(COLUMNS,fields))
        # Separate the label from the features
        #label = features.pop('label')
        #return features, label
        return features
    return tf.data.TextLineDataset(csv_file).map(_parse_line)

## Function `dump_dataset_2_tfrecord`: Input dataset and name of tfrecord file to dump

In [None]:
def dump_dataset_2_tfrecord(dataset, out_file):
    writer = tf.python_io.TFRecordWriter(out_file)
    for x in dataset:
        example = tf.train.Example(
            features=tf.train.Features(feature=
                {'age': _int64_feature(x['age'].numpy()),
                'workclass': _bytes_feature(x['workclass'].numpy()),
                'fnlwgt': _int64_feature(x['fnlwgt'].numpy()),
                'education': _bytes_feature(x['education'].numpy()),
                'education-num': _int64_feature(x['education-num'].numpy()),
                'marital-status': _bytes_feature(x['marital-status'].numpy()),
                'occupation': _bytes_feature(x['occupation'].numpy()),
                'relationship': _bytes_feature(x['relationship'].numpy()),
                'race': _bytes_feature(x['race'].numpy()),
                'sex': _bytes_feature(x['sex'].numpy()),
                'capital-gain': _int64_feature(x['capital-gain'].numpy()),
                'capital-loss': _int64_feature(x['capital-loss'].numpy()),
                'hours-per-week': _int64_feature(x['hours-per-week'].numpy()),
                'native-country': _bytes_feature(x['native-country'].numpy()),
                'label': _bytes_feature(x['label'].numpy())
                })
        )
        writer.write(example.SerializeToString())
    writer.close()   

## Load data from CSV file and store them in dataset

In [None]:
data = read_csv_2_dataset('./adult.data')
data

## Dump dataset to tfrecord file

In [None]:
dump_dataset_2_tfrecord(data, 'census.tfrecord')