In [None]:
try:
  import colab
  !pip install --upgrade pip
except:
  pass

In [None]:
!pip install -U tfx

RESTART THE COLAB RUNTIME WHEN YOU GET TO THIS CELL

In [1]:
import tensorflow as tf
print('TensorFlow version: {}'.format(tf.__version__))
from tfx import v1 as tfx
print('TFX version: {}'.format(tfx.__version__))

TensorFlow version: 2.8.2
TFX version: 1.8.0


In [2]:
import tensorflow as tf

In [3]:
list = range(10)
list

range(0, 10)

In [4]:
dataset = tf.data.Dataset.from_tensor_slices(range(10))
dataset

<TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [5]:
for item in dataset:
    print(item.numpy())

0
1
2
3
4
5
6
7
8
9


In [6]:
it = iter(dataset)
next(it)

<tf.Tensor: shape=(), dtype=int32, numpy=0>

In [7]:
next(it).numpy()

1

In [8]:
print(dataset.reduce(0, lambda state, value: state + value).numpy())

45


In [9]:
dataset = dataset.repeat(3).batch(7)
for item in dataset:
    print(item.numpy())

[0 1 2 3 4 5 6]
[7 8 9 0 1 2 3]
[4 5 6 7 8 9 0]
[1 2 3 4 5 6 7]
[8 9]


In [10]:
dataset = tf.data.Dataset.from_tensor_slices([1, 2, 3, 4, 5])
dataset = dataset.repeat(3)
for item in dataset:
    print(item.numpy())

1
2
3
4
5
1
2
3
4
5
1
2
3
4
5


In [11]:
dataset = dataset.map(lambda x: x**2)
for item in dataset:
    print(item.numpy())

1
4
9
16
25
1
4
9
16
25
1
4
9
16
25


In [12]:
del list

In [13]:
elements = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
dataset = tf.data.Dataset.from_generator(lambda: elements, tf.int64)

for batch in dataset:
    print(batch.numpy())

[1 2 3]
[4 5 6]
[7 8 9]


In [14]:
dataset = dataset.unbatch()
list(dataset.as_numpy_iterator())

[1, 2, 3, 4, 5, 6, 7, 8, 9]

In [15]:
for item in dataset:
    print(item.numpy())

1
2
3
4
5
6
7
8
9


In [16]:
dataset.element_spec

TensorSpec(shape=<unknown>, dtype=tf.int64, name=None)

In [17]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10], minval=1, maxval=10, dtype=tf.int32))
dataset1

<TensorSliceDataset element_spec=TensorSpec(shape=(10,), dtype=tf.int32, name=None)>

In [18]:
for item in dataset1:
    print(item.numpy())

[8 5 7 5 3 7 9 7 4 7]
[9 1 9 9 3 8 1 5 9 6]
[3 4 5 5 6 1 5 2 7 9]
[8 4 9 5 6 5 5 2 6 3]


In [19]:
dataset2 = tf.data.Dataset.from_tensor_slices((tf.random.uniform([4]), tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))
for item in dataset2:
    print(item)

(<tf.Tensor: shape=(), dtype=float32, numpy=0.92638326>, <tf.Tensor: shape=(100,), dtype=int32, numpy=
array([38, 53, 90, 61, 24, 97, 30, 69,  6, 24, 93, 57, 85,  3, 12, 65, 97,
       53, 23, 17, 78,  8, 47, 30, 48, 67,  2, 53, 81, 90, 20, 29, 99, 57,
       92, 12, 63, 78, 52, 18, 16,  2, 61, 51, 94, 48, 14, 21, 14, 44, 68,
       10, 41, 71,  2, 95, 18, 29, 87, 64, 36, 75, 92, 18, 20, 40, 96, 50,
       50, 78, 88, 79, 58, 75, 42, 23, 69, 85, 81,  2, 31, 93, 72, 86, 28,
       99, 22, 34, 53, 95, 99, 60, 44, 25, 64, 69, 13, 86, 33, 51],
      dtype=int32)>)
(<tf.Tensor: shape=(), dtype=float32, numpy=0.9529389>, <tf.Tensor: shape=(100,), dtype=int32, numpy=
array([73, 90, 92, 91, 83, 67, 62,  2, 42, 49, 78, 46, 86, 42, 82, 61,  2,
       46, 37,  5, 92, 89,  0, 71, 86, 99, 21, 55, 37, 73, 81,  0, 95, 73,
       48, 38, 25, 72,  4, 13, 63, 87, 58, 40, 91, 14, 14, 26, 47, 52, 16,
       18, 29, 82, 44, 96, 88, 39, 20, 48, 34, 69, 95, 40, 87,  1, 46, 87,
       75, 79, 78, 74, 20, 64, 

In [20]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))

dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.int32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

In [21]:
for a, (b, c) in dataset3:
    print(f"shapes: {a.shape} {b.shape} {c.shape}")

shapes: (10,) () (100,)
shapes: (10,) () (100,)
shapes: (10,) () (100,)
shapes: (10,) () (100,)


### Using with generators

In [22]:
def count(stop):
    i = 0
    while i < stop:
        yield i
        i += 1

In [23]:
for i in count(6):
    print(i)

0
1
2
3
4
5


In [24]:
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes=(),)

In [25]:
for batch in ds_counter.batch(10):
    print(batch.numpy())

[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24]


In [26]:
flowers = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz


In [27]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

img_gen = ImageDataGenerator(rescale=1./255, rotation_range=20)
images, labels = next(img_gen.flow_from_directory(flowers))

Found 3670 images belonging to 5 classes.


In [28]:
images.shape

(32, 256, 256, 3)

In [29]:
labels[0]

array([0., 0., 1., 0., 0.], dtype=float32)

In [30]:
dataset = tf.data.Dataset.from_generator(lambda: img_gen.flow_from_directory(flowers), output_types=(tf.float32, tf.float32), 
                                         output_shapes=(images.shape, labels.shape))

In [31]:
for images, labels in dataset.take(1):
    print(images.shape)
    print(labels.shape)

Found 3670 images belonging to 5 classes.
(32, 256, 256, 3)
(32, 5)


In [32]:
x = [1, 2, 3, 4, 5, 6]

dataset = tf.data.Dataset.from_tensor_slices(x)
dataset = dataset.repeat(2).batch(5, drop_remainder=True)

In [33]:
for item in dataset:
    print(item.numpy())

[1 2 3 4 5]
[6 1 2 3 4]


### TFRecord Format

In [34]:
with tf.io.TFRecordWriter('my_data.tfrecord') as f:
    f.write(b"This is my first record.")
    f.write(b"This is my second record.")

In [35]:
filepaths = ['my_data.tfrecord']
dataset = tf.data.TFRecordDataset(filepaths)

for item in dataset:
    print(item.numpy())

b'This is my first record.'
b'This is my second record.'


In [36]:
from tensorflow.train import Feature, Example, BytesList, Features, FloatList, Int64List

person_example = Example(features = Features(feature={
    'name': Feature(bytes_list=BytesList(value=[b'Alice'])), 
    'id': Feature(int64_list = Int64List(value=[123])), 
    'emails': Feature(bytes_list = BytesList(value=[b'akoredeadewole8@gmail.com']))
}))

In [37]:
with tf.io.TFRecordWriter('my_contacts.tfrecord') as f:
    f.write(person_example.SerializeToString())

## ExampleGen

### Converting CSV to tf.Example

In [39]:
import os
from tfx import v1 as tfx
from tfx.components import CsvExampleGen

In [51]:
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext

In [45]:
PIPELINE_NAME = 'AAPL'

PIPELINE_ROOT = os.path.join('pipelines', PIPELINE_NAME)

In [57]:
data_root = '/content/data_root/'

data_path = os.path.join(data_root, 'AAPL.csv')

In [58]:
!head {data_path}

Date,Open,High,Low,Close,Adj Close,Volume
2015-10-29,29.674999,30.172501,29.567499,30.132500,27.667913,204909200
2015-10-30,30.247499,30.305000,29.862499,29.875000,27.431473,197461200
2015-11-02,30.200001,30.340000,29.902500,30.295000,27.817125,128813200
2015-11-03,30.197500,30.872499,30.174999,30.642500,28.136196,182076000
2015-11-04,30.782499,30.955000,30.405001,30.500000,28.005352,179544400
2015-11-05,30.462500,30.672501,30.045000,30.230000,27.876257,158210800
2015-11-06,30.277500,30.452499,30.155001,30.264999,27.908531,132169200
2015-11-09,30.240000,30.452499,30.012501,30.142500,27.795572,135485600
2015-11-10,29.225000,29.517500,29.014999,29.192499,26.919538,236511600


In [52]:
# to run the contextx interactively

context = InteractiveContext()



In [59]:
# create the example gen

example_gen = CsvExampleGen(input_base=data_root)
context.run(example_gen, enable_cache=True)

0,1
.execution_id,3
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x7fb0e2247a10.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e2204090.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']/content/data_root/['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e2204090.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e2204090.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']/content/data_root/['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e2204090.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],/content/data_root/
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e2204090.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2) at 0x7fb0e258f590.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/2
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


### Loading TFRecord files with ExampleGen

In [60]:
DATA_ROOT = '/content/tfdata'

In [61]:
from tfx.components import ImportExampleGen

example_gen2 = ImportExampleGen(input_base=DATA_ROOT)
context.run(example_gen2)

  wrapper = lambda x, *args, **kwargs: [fn(x, *args, **kwargs)]


0,1
.execution_id,4
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } ImportExampleGen at 0x7fb0e747d390.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e24cd390.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']/content/tfdata['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:2,total_bytes:172,xor_checksum:0,sum_checksum:3315767550"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e24cd390.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e24cd390.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']/content/tfdata['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:2,total_bytes:172,xor_checksum:0,sum_checksum:3315767550"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e24cd390.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],/content/tfdata
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:2,total_bytes:172,xor_checksum:0,sum_checksum:3315767550"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e24cd390.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4) at 0x7fb0e73f4dd0.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/ImportExampleGen/examples/4
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


### Splitting datasets

In [62]:
from tfx.proto import example_gen_pb2

output = example_gen_pb2.Output(
    split_config=example_gen_pb2.SplitConfig(splits=[example_gen_pb2.SplitConfig.Split(name='train', hash_buckets=6),
 example_gen_pb2.SplitConfig.Split(name='eval', hash_buckets=2),
 example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=2)]))

In [63]:
example_gen = CsvExampleGen(input_base=data_root, output_config=output)

In [64]:
context.run(example_gen)

0,1
.execution_id,5
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x7fb0e25e6850.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e25b1fd0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0.exec_properties['input_base']/content/data_root/['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 6,  ""name"": ""train""  },  {  ""hash_buckets"": 2,  ""name"": ""eval""  },  {  ""hash_buckets"": 2,  ""name"": ""test""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e25b1fd0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e25b1fd0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"
.exec_properties,"['input_base']/content/data_root/['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 6,  ""name"": ""train""  },  {  ""hash_buckets"": 2,  ""name"": ""eval""  },  {  ""hash_buckets"": 2,  ""name"": ""test""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e25b1fd0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5
.span,0
.split_names,"[""train"", ""eval"", ""test""]"
.version,0

0,1
['input_base'],/content/data_root/
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 6,  ""name"": ""train""  },  {  ""hash_buckets"": 2,  ""name"": ""eval""  },  {  ""hash_buckets"": 2,  ""name"": ""test""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x7fb0e25b1fd0.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: /tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5) at 0x7fb0e25db090.type<class 'tfx.types.standard_artifacts.Examples'>.uri/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5.span0.split_names[""train"", ""eval"", ""test""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5
.span,0
.split_names,"[""train"", ""eval"", ""test""]"
.version,0


In [65]:
for artifact in example_gen.outputs['examples'].get():
 print(artifact)


Artifact(artifact: id: 4
type_id: 14
uri: "/tmp/tfx-interactive-2022-07-15T11_54_18.066446-wc8xghip/CsvExampleGen/examples/5"
properties {
  key: "split_names"
  value {
    string_value: "[\"train\", \"eval\", \"test\"]"
  }
}
custom_properties {
  key: "file_format"
  value {
    string_value: "tfrecords_gzip"
  }
}
custom_properties {
  key: "input_fingerprint"
  value {
    string_value: "split:single_split,num_files:1,total_bytes:108268,xor_checksum:1657884674,sum_checksum:1657884674"
  }
}
custom_properties {
  key: "payload_format"
  value {
    string_value: "FORMAT_TF_EXAMPLE"
  }
}
custom_properties {
  key: "span"
  value {
    int_value: 0
  }
}
custom_properties {
  key: "state"
  value {
    string_value: "published"
  }
}
custom_properties {
  key: "tfx_version"
  value {
    string_value: "1.8.0"
  }
}
state: LIVE
, artifact_type: id: 14
name: "Examples"
properties {
  key: "span"
  value: INT
}
properties {
  key: "split_names"
  value: STRING
}
properties {
  key: "ve

## References

1. https://colab.research.google.com/drive/1kj65CT3d4ninQhJT_NXj5yI5t6vuZ5fa?usp=sharing#scrollTo=kUSUAEqpcV2A
2. https://www.tensorflow.org/tfx/guide/examplegen