In [1]:
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras

print(tf.__version__)
print(sys.version_info)


2.2.0-rc4
sys.version_info(major=3, minor=7, micro=3, releaselevel='final', serial=0)


In [4]:
# tfrecord 文件格式
# -> tf.train.Example
#    -> tf.train.Features -> {"key": tf.train.Feature}
#        -> tf.train.Feature -> tf.train.ByteList/FloatList/Int64List

favorite_books = [name.encode("utf-8") for name in ["machine_learning", "cc150"]]
favorite_books_bytelist = tf.train.BytesList(value = favorite_books)
print(favorite_books_bytelist)

hours_floatlist = tf.train.FloatList(value = [15.1, 12.2, 8., 1.])
print(hours_floatlist)

age_int64list = tf.train.Int64List(value=[1,6])

features = tf.train.Features(
    feature = {
        "favorite_books": tf.train.Feature(
            bytes_list = favorite_books_bytelist
        ),
        "hours":tf.train.Feature(
            float_list = hours_floatlist
        ),
        "age": tf.train.Feature(
            int64_list = age_int64list
        )
    }
)
print(features)


value: "machine_learning"
value: "cc150"

value: 15.100000381469727
value: 12.199999809265137
value: 8.0
value: 1.0

feature {
  key: "age"
  value {
    int64_list {
      value: 1
      value: 6
    }
  }
}
feature {
  key: "favorite_books"
  value {
    bytes_list {
      value: "machine_learning"
      value: "cc150"
    }
  }
}
feature {
  key: "hours"
  value {
    float_list {
      value: 15.100000381469727
      value: 12.199999809265137
      value: 8.0
      value: 1.0
    }
  }
}



In [8]:
example = tf.train.Example(features = features)
print(example)
serialized_example = example.SerializeToString()
print(serialized_example)


features {
  feature {
    key: "age"
    value {
      int64_list {
        value: 1
        value: 6
      }
    }
  }
  feature {
    key: "favorite_books"
    value {
      bytes_list {
        value: "machine_learning"
        value: "cc150"
      }
    }
  }
  feature {
    key: "hours"
    value {
      float_list {
        value: 15.100000381469727
        value: 12.199999809265137
        value: 8.0
        value: 1.0
      }
    }
  }
}

b'\n]\n\r\n\x03age\x12\x06\x1a\x04\n\x02\x01\x06\n-\n\x0efavorite_books\x12\x1b\n\x19\n\x10machine_learning\n\x05cc150\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x9a\x99qA33CA\x00\x00\x00A\x00\x00\x80?'


In [9]:
output_dir = "tfrecord_basic"
if not os.path.exists(output_dir):
    os.mkdir(output_dir)
filename = 'test.record'
filename_fullpath = os.path.join(output_dir, filename)

with tf.io.TFRecordWriter(filename_fullpath) as writer:
    for i in range(3):
        writer.write(serialized_example)

In [10]:
dataset = tf.data.TFRecordDataset([filename_fullpath])
for item in dataset:
    print(item)

tf.Tensor(b'\n]\n\r\n\x03age\x12\x06\x1a\x04\n\x02\x01\x06\n-\n\x0efavorite_books\x12\x1b\n\x19\n\x10machine_learning\n\x05cc150\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x9a\x99qA33CA\x00\x00\x00A\x00\x00\x80?', shape=(), dtype=string)
tf.Tensor(b'\n]\n\r\n\x03age\x12\x06\x1a\x04\n\x02\x01\x06\n-\n\x0efavorite_books\x12\x1b\n\x19\n\x10machine_learning\n\x05cc150\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x9a\x99qA33CA\x00\x00\x00A\x00\x00\x80?', shape=(), dtype=string)
tf.Tensor(b'\n]\n\r\n\x03age\x12\x06\x1a\x04\n\x02\x01\x06\n-\n\x0efavorite_books\x12\x1b\n\x19\n\x10machine_learning\n\x05cc150\n\x1d\n\x05hours\x12\x14\x12\x12\n\x10\x9a\x99qA33CA\x00\x00\x00A\x00\x00\x80?', shape=(), dtype=string)


In [11]:
expected_features = {
    "favorite_books": tf.io.VarlenFeature(dtype=tf.string),
    "hours": tf.io.VarlenFeature(dtype=tf.float32),
    "age": tf.io.FixedLenFeature(dtype=tf.int64),
}

dataset = tf.data.TFRecordDataset([filename_fullpath])
for item in dataset:
    example = tf.io.parse_single_example(item, expected_features)
    print(example)

AttributeError: module 'tensorflow._api.v2.io' has no attribute 'VarlenFeature'