In [1]:
import tensorflow as tf
import numpy as np
import os


In [2]:
# Number of samples
n_samples = 10

# Sample features
feature0 = np.random.choice([False, True], n_samples)  # Boolean feature
feature1 = np.random.randint(0, 5, n_samples)  # Integer feature
feature2 = np.array([b'cat', b'dog', b'bird', b'fish', b'horse'])[feature1]  # String feature
feature3 = np.random.randn(n_samples)  # Float feature


In [3]:
def _bytes_feature(value):
    """Returns a bytes_list from a string / byte."""
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _float_feature(value):
    """Returns a float_list from a float / double."""
    return tf.train.Feature(float_list=tf.train.FloatList(value=[value]))

def _int64_feature(value):
    """Returns an int64_list from a bool / enum / int / uint."""
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

# Function to create a tf.train.Example
def serialize_example(feature0, feature1, feature2, feature3):
    feature = {
        'feature0': _int64_feature(feature0),
        'feature1': _int64_feature(feature1),
        'feature2': _bytes_feature(feature2),
        'feature3': _float_feature(feature3),
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()


In [4]:
tfrecord_file = "data.tfrecord"

# Write the data to TFRecord file
with tf.io.TFRecordWriter(tfrecord_file) as writer:
    for i in range(n_samples):
        example = serialize_example(feature0[i], feature1[i], feature2[i], feature3[i])
        writer.write(example)


  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))


In [5]:
# Read TFRecord File
raw_dataset = tf.data.TFRecordDataset(tfrecord_file)

# Function to parse the TFRecord example
def parse_example(example_proto):
    feature_description = {
        'feature0': tf.io.FixedLenFeature([], tf.int64),
        'feature1': tf.io.FixedLenFeature([], tf.int64),
        'feature2': tf.io.FixedLenFeature([], tf.string),
        'feature3': tf.io.FixedLenFeature([], tf.float32),
    }
    return tf.io.parse_single_example(example_proto, feature_description)

# Apply the parsing function
parsed_dataset = raw_dataset.map(parse_example)

# Print some sample data
for parsed_record in parsed_dataset.take(5):
    print(parsed_record)


{'feature0': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'feature1': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'feature2': <tf.Tensor: shape=(), dtype=string, numpy=b'dog'>, 'feature3': <tf.Tensor: shape=(), dtype=float32, numpy=0.58739126>}
{'feature0': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'feature1': <tf.Tensor: shape=(), dtype=int64, numpy=2>, 'feature2': <tf.Tensor: shape=(), dtype=string, numpy=b'bird'>, 'feature3': <tf.Tensor: shape=(), dtype=float32, numpy=0.3632374>}
{'feature0': <tf.Tensor: shape=(), dtype=int64, numpy=0>, 'feature1': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'feature2': <tf.Tensor: shape=(), dtype=string, numpy=b'dog'>, 'feature3': <tf.Tensor: shape=(), dtype=float32, numpy=-1.8765284>}
{'feature0': <tf.Tensor: shape=(), dtype=int64, numpy=1>, 'feature1': <tf.Tensor: shape=(), dtype=int64, numpy=3>, 'feature2': <tf.Tensor: shape=(), dtype=string, numpy=b'fish'>, 'feature3': <tf.Tensor: shape=(), dtype=float32, numpy=-0.87273574>}
{'feature0': <

In [6]:
BATCH_SIZE = 2
dataset = parsed_dataset.shuffle(10).batch(BATCH_SIZE)

# Example: Iterating through batches
for batch in dataset:
    print(batch)


{'feature0': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 1])>, 'feature1': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([4, 3])>, 'feature2': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'horse', b'fish'], dtype=object)>, 'feature3': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([ 0.23574543, -0.87273574], dtype=float32)>}
{'feature0': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([0, 1])>, 'feature1': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([0, 1])>, 'feature2': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'cat', b'dog'], dtype=object)>, 'feature3': <tf.Tensor: shape=(2,), dtype=float32, numpy=array([-0.9145783 ,  0.58739126], dtype=float32)>}
{'feature0': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([0, 0])>, 'feature1': <tf.Tensor: shape=(2,), dtype=int64, numpy=array([1, 2])>, 'feature2': <tf.Tensor: shape=(2,), dtype=string, numpy=array([b'dog', b'bird'], dtype=object)>, 'feature3': <tf.Tensor: shape=(2,), dtype=float32, numpy=array(