In [1]:
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import os
import sys
import time
import tensorflow as tf
from tensorflow import keras
from pprint import pprint

print(sys.version_info)
for module in mpl, np, pd, sklearn, tf, keras:
    print(module.__name__, module.__version__)

sys.version_info(major=3, minor=7, micro=5, releaselevel='final', serial=0)
matplotlib 3.1.2
numpy 1.17.4
pandas 0.25.3
sklearn 0.22
tensorflow 2.0.0
tensorflow_core.keras 2.2.4-tf


### 1. tf.data.Dataset.from_tensor_slices()

In [2]:
# 从内存中构建数据集
dataset1 = tf.data.Dataset.from_tensor_slices(np.arange(10)) 
print(dataset1)

for item in dataset1:
    print(item)

<TensorSliceDataset shapes: (), types: tf.int32>
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)


In [3]:
# 用元组初始化 dataset
x = np.array([[1, 2], [3, 4], [5, 6]])
y = np.array(["cat", "dog", "fox"])
dataset2 = tf.data.Dataset.from_tensor_slices((x, y))
print(dataset2)
print("-----------")

for item in dataset2:
    print(item)
print("-----------")

for item_x, item_y in dataset2:
    print(item_x, item_y)
    print(item_x.numpy(), item_y.numpy())

<TensorSliceDataset shapes: ((2,), ()), types: (tf.int32, tf.string)>
-----------
(<tf.Tensor: id=26, shape=(2,), dtype=int32, numpy=array([1, 2])>, <tf.Tensor: id=27, shape=(), dtype=string, numpy=b'cat'>)
(<tf.Tensor: id=28, shape=(2,), dtype=int32, numpy=array([3, 4])>, <tf.Tensor: id=29, shape=(), dtype=string, numpy=b'dog'>)
(<tf.Tensor: id=30, shape=(2,), dtype=int32, numpy=array([5, 6])>, <tf.Tensor: id=31, shape=(), dtype=string, numpy=b'fox'>)
-----------
tf.Tensor([1 2], shape=(2,), dtype=int32) tf.Tensor(b'cat', shape=(), dtype=string)
[1 2] b'cat'
tf.Tensor([3 4], shape=(2,), dtype=int32) tf.Tensor(b'dog', shape=(), dtype=string)
[3 4] b'dog'
tf.Tensor([5 6], shape=(2,), dtype=int32) tf.Tensor(b'fox', shape=(), dtype=string)
[5 6] b'fox'


In [4]:
# 用字典初始化dataset
dataset3 = tf.data.Dataset.from_tensor_slices({"feature": x, "label": y})
for item in dataset3:
    print(item)
print("--------------")
    
for item in dataset3:
    print(item["feature"].numpy(), item["label"].numpy())

{'feature': <tf.Tensor: id=51, shape=(2,), dtype=int32, numpy=array([1, 2])>, 'label': <tf.Tensor: id=52, shape=(), dtype=string, numpy=b'cat'>}
{'feature': <tf.Tensor: id=53, shape=(2,), dtype=int32, numpy=array([3, 4])>, 'label': <tf.Tensor: id=54, shape=(), dtype=string, numpy=b'dog'>}
{'feature': <tf.Tensor: id=55, shape=(2,), dtype=int32, numpy=array([5, 6])>, 'label': <tf.Tensor: id=56, shape=(), dtype=string, numpy=b'fox'>}
--------------
[1 2] b'cat'
[3 4] b'dog'
[5 6] b'fox'


### 2. dataset.repeat()和dataset.batch()

In [5]:
# repeat()和batch()都返回dataset对象
# 把数据集遍历3次，每次batch取7个数据
dataset4 = dataset1.repeat(3).batch(7)
for item in dataset4:
    print(item)

tf.Tensor([0 1 2 3 4 5 6], shape=(7,), dtype=int32)
tf.Tensor([7 8 9 0 1 2 3], shape=(7,), dtype=int32)
tf.Tensor([4 5 6 7 8 9 0], shape=(7,), dtype=int32)
tf.Tensor([1 2 3 4 5 6 7], shape=(7,), dtype=int32)
tf.Tensor([8 9], shape=(2,), dtype=int32)


### 3. dataset.interleave

In [6]:
# 达到了一种均匀混合数据的目的
dataset5 = dataset4.interleave(
    lambda v: tf.data.Dataset.from_tensor_slices(v),   # map_fn参数
    cycle_length = 5,          # cycle_length参数：并行化处理tensor的个数
    block_length = 3,          # block_length参数
)
    
for item in dataset5:
    print(item)

tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(0, shape=(), dtype=int32)
tf.Tensor(1, shape=(), dtype=int32)
tf.Tensor(2, shape=(), dtype=int32)
tf.Tensor(7, shape=(), dtype=int32)
tf.Tensor(8, shape=(), dtype=int32)
tf.Tensor(9, shape=(), dtype=int32)
tf.Tensor(4, shape=(), dtype=int32)
tf.Tensor(5, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(6, shape=(), dtype=int32)
tf.Tensor(3, shape=(), dtype