In [2]:
import tensorflow as tf 
import pathlib
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

np.set_printoptions(precision=4)

In [3]:
dataset = tf.data.Dataset.from_tensor_slices([8, 3, 0, 8, 2, 1])
dataset

<_TensorSliceDataset element_spec=TensorSpec(shape=(), dtype=tf.int32, name=None)>

In [5]:
for elem in dataset:
    print(elem)

tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)


In [6]:
it = iter(dataset)
print(next(it).numpy())

8


In [8]:
print(dataset.reduce(0, lambda state, value: state + value).numpy())

27


In [10]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10]))
dataset1.element_spec

TensorSpec(shape=(10,), dtype=tf.float32, name=None)

In [20]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]),
    tf.random.uniform([4, 100], maxval=100, dtype=tf.int32))
)
dataset2.element_spec

(TensorSpec(shape=(), dtype=tf.float32, name=None),
 TensorSpec(shape=(100,), dtype=tf.int32, name=None))

In [21]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
dataset3.element_spec

(TensorSpec(shape=(10,), dtype=tf.float32, name=None),
 (TensorSpec(shape=(), dtype=tf.float32, name=None),
  TensorSpec(shape=(100,), dtype=tf.int32, name=None)))

In [28]:
dataset4 = tf.data.Dataset.from_tensors(tf.SparseTensor(indices=[[0, 0], [1, 2]], values=[1, 2], dense_shape=[3, 4]))
dataset4.element_spec

SparseTensorSpec(TensorShape([3, 4]), tf.int32)

In [34]:
dataset4.element_spec.value_type

tensorflow.python.framework.sparse_tensor.SparseTensor

In [35]:
dataset1 = tf.data.Dataset.from_tensor_slices(tf.random.uniform([4, 10], maxval=10, dtype=tf.int32))
dataset1

<_TensorSliceDataset element_spec=TensorSpec(shape=(10,), dtype=tf.int32, name=None)>

In [36]:
for z in dataset1:
    print(z.numpy())

[1 6 7 8 6 6 9 9 6 3]
[0 5 1 9 9 1 2 2 6 2]
[5 4 9 0 4 7 9 4 7 2]
[8 4 5 2 7 2 2 7 1 0]


In [39]:
dataset2 = tf.data.Dataset.from_tensor_slices(
    (tf.random.uniform([4]),
    (tf.random.uniform([4, 100], maxval=100, dtype=tf.int32)))
)
dataset2

<_TensorSliceDataset element_spec=(TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None))>

In [40]:
dataset3 = tf.data.Dataset.zip((dataset1, dataset2))
dataset3

<_ZipDataset element_spec=(TensorSpec(shape=(10,), dtype=tf.int32, name=None), (TensorSpec(shape=(), dtype=tf.float32, name=None), TensorSpec(shape=(100,), dtype=tf.int32, name=None)))>

In [41]:
for a, (b, c) in dataset3:
    print(f'shapes: {a.shape}, {b.shape}, {c.shape}')

shapes: (10,), (), (100,)
shapes: (10,), (), (100,)
shapes: (10,), (), (100,)
shapes: (10,), (), (100,)


In [43]:
train, test = tf.keras.datasets.fashion_mnist.load_data()

In [44]:
images, labels = train
images = images / 255 

dataset = tf.data.Dataset.from_tensor_slices((images, labels))
dataset

<_TensorSliceDataset element_spec=(TensorSpec(shape=(28, 28), dtype=tf.float64, name=None), TensorSpec(shape=(), dtype=tf.uint8, name=None))>

In [46]:
def count(stop):
    i = 0
    while i <= stop:
        yield i
        i += 1

for n in count(5):
    print(n)

0
1
2
3
4
5


In [47]:
ds_counter = tf.data.Dataset.from_generator(count, args=[25], output_types=tf.int32, output_shapes=())

In [48]:
for count_batch in ds_counter.repeat().batch(10).take(10):
    print(count_batch.numpy())

[0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24 25  0  1  2  3]
[ 4  5  6  7  8  9 10 11 12 13]
[14 15 16 17 18 19 20 21 22 23]
[24 25  0  1  2  3  4  5  6  7]
[ 8  9 10 11 12 13 14 15 16 17]
[18 19 20 21 22 23 24 25  0  1]
[ 2  3  4  5  6  7  8  9 10 11]
[12 13 14 15 16 17 18 19 20 21]


In [49]:
def gen_series():
    i=0
    while True:
        size = np.random.randint(1, 10)
        yield i, np.random.normal(size=(size, ))
        i += 1
for i, series in gen_series():
    print(i, ":", str(series))
    if i > 5 :
        break

0 : [ 1.2466 -0.2383  0.1588]
1 : [-0.9232  0.7214  0.8169  0.9513  0.6094 -1.7967]
2 : [ 0.5554  0.109  -0.7535 -0.2516 -0.9317  1.1341  0.8874]
3 : [ 0.8993  0.6061  1.0045 -0.7394  0.1023]
4 : [ 0.0986  0.1776 -1.7669  1.5343 -0.4701 -1.1364 -1.7097]
5 : [ 0.8793 -0.8194  2.6058 -0.6944 -0.0903  0.1726 -1.3644  0.6902  0.2527]
6 : [-1.4029 -0.9352 -1.4145 -1.2768]


In [53]:
ds_series = tf.data.Dataset.from_generator(
    gen_series,
    output_types=(tf.int32, tf.float32),
    output_shapes=((), (None, ))
)
ds_series

<_FlatMapDataset element_spec=(TensorSpec(shape=(), dtype=tf.int32, name=None), TensorSpec(shape=(None,), dtype=tf.float32, name=None))>

In [63]:
ds_series_batch = ds_series.shuffle(20).padded_batch(10)

ids, sequence_batch = next(iter(ds_series_batch))
print(ids.numpy())
print()
print(sequence_batch.numpy())

[ 1  2  3 17 23  9 19 13  8 12]

[[ 0.2237  0.2564 -0.349   0.5167  0.      0.      0.      0.      0.    ]
 [ 0.1878 -0.4723 -0.6585 -1.1078  2.4764  0.      0.      0.      0.    ]
 [ 1.1638 -0.8839 -0.9904 -0.2428 -0.2466 -0.3862  0.3155  0.      0.    ]
 [-2.425   1.4262 -0.0616 -1.7093 -1.3237  1.0323 -0.9064  0.7169  0.    ]
 [-0.4601  0.7047  0.1245 -0.9898  0.1275  0.      0.      0.      0.    ]
 [ 0.6558  0.3125  0.5048  0.4419 -0.1734  0.      0.      0.      0.    ]
 [ 1.1398  0.604  -0.8705 -0.5785 -2.5117 -0.5096  0.2887 -0.0271  0.0254]
 [-0.1732  0.1992  0.0779  1.1117  1.147  -1.6863 -1.6469  0.      0.    ]
 [ 0.329   0.8411  0.      0.      0.      0.      0.      0.      0.    ]
 [ 0.1453 -0.9069  1.4947  0.0366  0.5062 -0.3495 -0.354   0.      0.    ]]


In [64]:
flowers = tf.keras.utils.get_file(
    'flower_photos',
    'https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz',
    untar=True)

Downloading data from https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz
[1m228813984/228813984[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 0us/step


In [65]:
img_gen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, rotation_range=20)

images, labels = next(img_gen.flow_from_directory(flowers))

Found 3670 images belonging to 5 classes.


In [66]:
print(images.dtype, images.shape)
print(labels.dtype, labels.shape)

float32 (32, 256, 256, 3)
float32 (32, 5)


In [68]:
ds = tf.data.Dataset.from_generator(
    lambda : img_gen.flow_from_directory(flowers),
    output_types=(tf.float32, tf.float32),
    output_shapes=([32, 256, 256, 3], [32, 5])
)
ds.element_spec

(TensorSpec(shape=(32, 256, 256, 3), dtype=tf.float32, name=None),
 TensorSpec(shape=(32, 5), dtype=tf.float32, name=None))

In [69]:
for images, labels in ds.take(1):
    print('images.shape: ', images.shape)
    print('labels.shape', labels.shape)

Found 3670 images belonging to 5 classes.
images.shape:  (32, 256, 256, 3)
labels.shape (32, 5)


In [70]:
# Creates a dataset that reads all of the examples from two files.
fsns_test_file = tf.keras.utils.get_file("fsns.tfrec", "https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001")

Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/fsns-20160927/testdata/fsns-00000-of-00001
[1m7904079/7904079[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


In [71]:
dataset = tf.data.TFRecordDataset(filenames=[fsns_test_file])
dataset

<TFRecordDatasetV2 element_spec=TensorSpec(shape=(), dtype=tf.string, name=None)>

In [72]:
raw_example = next(iter(dataset))
parsed = tf.train.Example.FromString(raw_example.numpy())
parsed.features.feature['image/text']

bytes_list {
  value: "Rue Perreyon"
}

In [74]:
directory_url = 'https://storage.googleapis.com/download.tensorflow.org/data/illiad/'
file_names = ['cowper.txt', 'derby.txt', 'butler.txt']

file_paths = [
    tf.keras.utils.get_file(file_name, directory_url + file_name)
    for file_name in file_names
]


Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/cowper.txt
[1m815980/815980[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/derby.txt
[1m809730/809730[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1us/step
Downloading data from https://storage.googleapis.com/download.tensorflow.org/data/illiad/butler.txt
[1m807992/807992[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2us/step


In [75]:
dataset = tf.data.TextLineDataset(file_paths)
for line in dataset.take(5):
    print(line.numpy())

b"\xef\xbb\xbfAchilles sing, O Goddess! Peleus' son;"
b'His wrath pernicious, who ten thousand woes'
b"Caused to Achaia's host, sent many a soul"
b'Illustrious into Ades premature,'
b'And Heroes gave (so stood the will of Jove)'


In [76]:
titanic_file = tf.keras.utils.get_file("train.csv", "https://storage.googleapis.com/tf-datasets/titanic/train.csv")

Downloading data from https://storage.googleapis.com/tf-datasets/titanic/train.csv
[1m30874/30874[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2us/step


In [77]:
df = pd.read_csv(titanic_file)
df.head()

Unnamed: 0,survived,sex,age,n_siblings_spouses,parch,fare,class,deck,embark_town,alone
0,0,male,22.0,1,0,7.25,Third,unknown,Southampton,n
1,1,female,38.0,1,0,71.2833,First,C,Cherbourg,n
2,1,female,26.0,0,0,7.925,Third,unknown,Southampton,y
3,1,female,35.0,1,0,53.1,First,C,Southampton,n
4,0,male,28.0,0,0,8.4583,Third,unknown,Queenstown,y


In [80]:
titanic_slices = tf.data.Dataset.from_tensor_slices(dict(df))

for feature_batch in titanic_slices.take(1):
    for key, value in feature_batch.items():
        print('{!r:20s} : {}'.format(key, value))

'survived'           : 0
'sex'                : b'male'
'age'                : 22.0
'n_siblings_spouses' : 1
'parch'              : 0
'fare'               : 7.25
'class'              : b'Third'
'deck'               : b'unknown'
'embark_town'        : b'Southampton'
'alone'              : b'n'


In [81]:
titanic_slices.element_spec

{'survived': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'sex': TensorSpec(shape=(), dtype=tf.string, name=None),
 'age': TensorSpec(shape=(), dtype=tf.float64, name=None),
 'n_siblings_spouses': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'parch': TensorSpec(shape=(), dtype=tf.int64, name=None),
 'fare': TensorSpec(shape=(), dtype=tf.float64, name=None),
 'class': TensorSpec(shape=(), dtype=tf.string, name=None),
 'deck': TensorSpec(shape=(), dtype=tf.string, name=None),
 'embark_town': TensorSpec(shape=(), dtype=tf.string, name=None),
 'alone': TensorSpec(shape=(), dtype=tf.string, name=None)}

In [82]:
titanic_batches = tf.data.experimental.make_csv_dataset(
    titanic_file,
    batch_size=4,
    label_name='survived'
)