In [40]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf 

import numpy as np
import pandas as pd

# 1 Dataset 基本操作

## 1.1 创建

### 1.1.1 tuple输入

In [44]:
d_np = np.random.rand(100).reshape(-1, 5)
l_np = np.random.randint(0, 5, (20)).reshape(20, -1)


print(f"d_np.shape:{d_np.shape}")
print(f"l_np.shape:{l_np.shape}")

d_np.shape:(20, 5)
l_np.shape:(20, 1)


In [99]:
ds = tf.data.Dataset.from_tensor_slices((d_np, l_np))
ds = ds.shuffle(buffer_size=len(d_np)) # 用于打乱数据集但不影响映射关系
ds = ds.batch(batch_size=5) # 批处理
ds = ds.repeat(2) # 整个数据集的循环次数

In [100]:
for feature_batch, label_batch in ds.take(1):
    print(f"feature_batch:\n{feature_batch}")
    print(f"label_batch:\n{label_batch}")
    

feature_batch:
[[0.97039877 0.69910395 0.89097558 0.25355272 0.72552711]
 [0.20349443 0.53751182 0.236943   0.43649216 0.79802451]
 [0.81386435 0.31782834 0.39704509 0.89016825 0.82541621]
 [0.90208287 0.03097446 0.32416909 0.28659275 0.95713608]
 [0.75498971 0.14166991 0.7938222  0.72540315 0.54143928]]
label_batch:
[[0]
 [0]
 [4]
 [3]
 [1]]


### 1.1.2 字典创建

In [137]:
b = {"a":np.array([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(-1, 1),
     "b": np.random.uniform(size=(6, 2))}
dict_ds = tf.data.Dataset.from_tensor_slices(b) 
dict_ds = dict_ds.batch(batch_size=2)

for f in dict_ds:
    print(f"feature_labels- >shape:\n{f['b']}") 
    print(f"{f['a']}")  

feature_labels- >shape:
[[0.55095129 0.65079965]
 [0.06920547 0.17278314]]
[[1.]
 [2.]]
feature_labels- >shape:
[[0.82493705 0.81355732]
 [0.34669445 0.61119527]]
[[3.]
 [4.]]
feature_labels- >shape:
[[0.61389431 0.15289676]
 [0.64509653 0.86673105]]
[[5.]
 [6.]]


## 1.2 遍历

- 直接遍历  for f, l in ds:
- toke  ds.take(1)
- iter  iter(ds)

In [101]:
d_np = np.random.rand(100).reshape(-1, 5)
l_np = np.random.randint(0, 5, (20)).reshape(20, -1)

ds = tf.data.Dataset.from_tensor_slices((d_np, l_np))
ds = ds.shuffle(buffer_size=len(d_np))
ds = ds.batch(batch_size=5)
ds = ds.repeat(2)

### 1.2.1 直接访问

In [102]:
for feature_batch, label_batch in ds:
    print(f"feature_labels- >shape:\n{feature_batch.shape}")
    print(f"{label_batch.shape}")

feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)


### 1.2.2 take

In [103]:
for feature_batch, label_batch in ds.take(1):
    print(f"feature_labels- >shape:\n{feature_batch.shape}")
    print(f"{label_batch.shape}")

feature_labels- >shape:
(5, 5)
(5, 1)


In [104]:
# 超过的内容将不输出
for feature_batch, label_batch in ds.take(5):
    print(f"feature_labels- >shape:\n{feature_batch.shape}")
    print(f"{label_batch.shape}")

feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)


### 1.2.3 iter

In [105]:
iterator = iter(ds)

In [106]:
feature_batch, label_batch = next(iterator) 
print(f"feature_batch.shape:\n{feature_batch}")
print(f"label_batch.shape:\n{label_batch}")

feature_batch.shape:
[[5.70793444e-01 6.42531982e-01 7.48771705e-01 8.61960717e-01
  8.59558538e-01]
 [4.73135994e-01 4.08826487e-01 5.75841366e-01 7.87987389e-01
  8.88300778e-01]
 [4.33145734e-01 7.58715898e-01 7.57203912e-01 8.74393246e-01
  8.79851985e-01]
 [5.41077298e-01 7.66517835e-01 5.88442311e-01 6.75217346e-01
  3.91591286e-01]
 [2.59123215e-01 7.11203072e-01 3.35041570e-01 5.59996975e-04
  8.63765474e-01]]
label_batch.shape:
[[3]
 [1]
 [2]
 [4]
 [4]]


In [107]:
for feature_batch, label_batch in iterator:
    print(f"feature_labels- >shape:\n{feature_batch.shape}")
    print(f"{label_batch.shape}")

feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
feature_labels- >shape:
(5, 5)
(5, 1)
