# Reading Writing Converting Examples

provides a list of small example for writing and reading data in tensorflow from different sources (numpy array, csv file ...)

More in the official [documentation](https://www.tensorflow.org/how_tos/reading_data/)

In [1]:
import argparse, os
import tensorflow as tf
import numpy as np
from pprint import pprint 

FLAGS = None

parser = argparse.ArgumentParser()
parser.add_argument(
    '--directory',
    type=str,
    default='data',
    help='Directory to download data files and write the converted result'
)

FLAGS, unparsed = parser.parse_known_args()


## Standard Tensorflow Format



In [35]:
def _int64_feature(value, verbose=False):
    val = tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))
    if verbose:
        print(value, val)
    return val


def _bytes_feature(value, verbose=False):
    val = tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
    if verbose:
        print(value, val)
    return val

fmat = lambda x: (np.random.rand(x) > 0.5).astype(np.int)

list_of_data = [fmat(10) for i in range(20)]
list_of_labels = [0 if np.random.rand() > 0.5 else 1 for i in range(20)]

pprint(list_of_data)
pprint(list_of_labels)

[array([1, 1, 1, 0, 0, 0, 0, 1, 1, 0]),
 array([0, 0, 0, 1, 1, 1, 0, 1, 0, 1]),
 array([1, 0, 1, 0, 0, 1, 1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 0, 1, 0, 0]),
 array([1, 0, 1, 1, 1, 1, 0, 1, 0, 1]),
 array([1, 1, 0, 0, 1, 0, 1, 0, 1, 1]),
 array([0, 0, 1, 1, 0, 0, 1, 0, 0, 0]),
 array([0, 0, 1, 1, 0, 0, 0, 0, 1, 0]),
 array([0, 1, 0, 0, 1, 0, 1, 0, 0, 1]),
 array([0, 0, 1, 0, 1, 1, 0, 0, 1, 0]),
 array([0, 1, 1, 0, 1, 1, 1, 1, 0, 1]),
 array([1, 0, 1, 1, 0, 0, 0, 1, 1, 0]),
 array([0, 1, 0, 1, 0, 0, 0, 1, 0, 0]),
 array([1, 0, 0, 0, 1, 1, 0, 1, 1, 1]),
 array([1, 1, 1, 1, 0, 0, 1, 0, 0, 1]),
 array([0, 0, 0, 1, 1, 1, 0, 1, 0, 0]),
 array([1, 0, 1, 0, 1, 0, 1, 1, 0, 0]),
 array([0, 0, 0, 0, 1, 1, 0, 0, 1, 0]),
 array([1, 1, 1, 0, 1, 0, 1, 1, 1, 0]),
 array([1, 1, 0, 0, 1, 1, 1, 0, 1, 0])]
[0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0]


Writing simple list of matrix
-----------------------------
Adapted from this [repo](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/how_tos/reading_data/convert_to_records.py)

In [36]:
verbose = False

num_examples = len(list_of_data)

assert(len(list_of_data) == len(list_of_labels))

size = list_of_data[0].shape[0]

filename = os.path.join(FLAGS.directory, 'simple_list_of_matrix.tfrecords')
print('Writing', filename)

writer = tf.python_io.TFRecordWriter(filename)
for index in range(num_examples):
    data_raw = list_of_data[index].tostring()
    example = tf.train.Example(features=tf.train.Features(feature={
            'label': _int64_feature(int(list_of_labels[index]), verbose),
            'image_raw': _bytes_feature(data_raw, verbose)}))
    writer.write(example.SerializeToString())
writer.close()

Writing data/simple_list_of_matrix.tfrecords


Reading simple list of matrix
-----------------------------
Adapted from this [repo](https://github.com/tensorflow/tensorflow/blob/master/tensorflow/examples/how_tos/reading_data/fully_connected_reader.py)

In [37]:
reconstructed_matrix = []

record_iterator = tf.python_io.tf_record_iterator(path=filename)

for record in record_iterator:
    example = tf.train.Example()
    example.ParseFromString(record)
    label = int(example.features.feature['label'].int64_list.value[0])
    mat_string = (example.features.feature['image_raw'].bytes_list.value[0])
    mat_1d = np.fromstring(mat_string, dtype=np.int)
    # if reshape needed
    #reconstructed_mat = mat_1d.reshape((height, width, -1))
    reconstructed_matrix.append((mat_1d, label))

(10,)
[1 1 1 0 0 0 0 1 1 0]
(10,)
[0 0 0 1 1 1 0 1 0 1]
(10,)
[1 0 1 0 0 1 1 1 1 1]
(10,)
[1 1 1 1 1 1 0 1 0 0]
(10,)
[1 0 1 1 1 1 0 1 0 1]
(10,)
[1 1 0 0 1 0 1 0 1 1]
(10,)
[0 0 1 1 0 0 1 0 0 0]
(10,)
[0 0 1 1 0 0 0 0 1 0]
(10,)
[0 1 0 0 1 0 1 0 0 1]
(10,)
[0 0 1 0 1 1 0 0 1 0]
(10,)
[0 1 1 0 1 1 1 1 0 1]
(10,)
[1 0 1 1 0 0 0 1 1 0]
(10,)
[0 1 0 1 0 0 0 1 0 0]
(10,)
[1 0 0 0 1 1 0 1 1 1]
(10,)
[1 1 1 1 0 0 1 0 0 1]
(10,)
[0 0 0 1 1 1 0 1 0 0]
(10,)
[1 0 1 0 1 0 1 1 0 0]
(10,)
[0 0 0 0 1 1 0 0 1 0]
(10,)
[1 1 1 0 1 0 1 1 1 0]
(10,)
[1 1 0 0 1 1 1 0 1 0]


In [40]:
for i in range(len(list_of_data)):
    original_mat = list_of_data[i]
    original_lab = list_of_labels[i]
    reconstructed_mat = reconstructed_matrix[i][0]
    reconstructed_lab = reconstructed_matrix[i][1]
    print(np.allclose(original_mat, reconstructed_mat), np.allclose(original_lab, reconstructed_lab))

True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
True True
