# TF2.0 Loader製作- Memory

## 內容
    * 基本loader
    * cache
    * prefetch

In [1]:
import tensorflow as tf
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt

**以tf.data.Dataset.from_generator這種loader類型示範**

先準備好一個data generator，會yield指定數量的數字

In [2]:
def f(rng):
    for i in range(rng):
        yield i

In [4]:
list(f(3))

[0, 1, 2]

---

## 最陽春的loader

In [5]:
d=tf.data.Dataset.from_generator(f,args=[3],output_types=tf.float32)

In [6]:
%%time
for i,x in enumerate(d):
        pprint(x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=2.0>
CPU times: user 28.4 ms, sys: 24.2 ms, total: 52.6 ms
Wall time: 49.9 ms


---

## Cache

In [11]:
d=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).cache()

In [12]:
%%time
for i,x in enumerate(d):
    if i<3:
        pprint(x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=2.0>
CPU times: user 201 ms, sys: 72.1 ms, total: 273 ms
Wall time: 240 ms


In [13]:
%%time
'''因為第一次跑完存進記憶體，第二次會變快(但因為存起來，有隨機型preprocess的話就不隨機了)'''
for i,x in enumerate(d):
    if i<3:
        pprint(x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=2.0>
CPU times: user 89 ms, sys: 328 µs, total: 89.3 ms
Wall time: 87 ms


---

## Prefetch

In [14]:
'''在跑的過程中pipeline處理，一部分先放進memory內'''
d0=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32)
d1=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).prefetch(10)
d2=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).prefetch(100)
d3=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).prefetch(tf.data.experimental.AUTOTUNE)

In [15]:
%%time
for i in d0:
    pass

CPU times: user 188 ms, sys: 75.8 ms, total: 263 ms
Wall time: 229 ms


In [16]:
%%time
'''加完快一點'''
for i in d1:
    pass

CPU times: user 203 ms, sys: 84.7 ms, total: 287 ms
Wall time: 195 ms


In [17]:
%%time
'''加越多越快'''
for i in d2:
    pass

CPU times: user 220 ms, sys: 67.1 ms, total: 287 ms
Wall time: 196 ms


In [18]:
%%time
'''Autotune很容易達到很快'''
for i in d3:
    pass

CPU times: user 199 ms, sys: 88.5 ms, total: 287 ms
Wall time: 198 ms


In [20]:
'''含有cache的情況也是可以加速'''
d0=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100)
d1=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100).prefetch(10)
d2=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100).prefetch(100)
d3=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100).prefetch(tf.data.experimental.AUTOTUNE)

In [21]:
for i in d0:
    pass
for i in d1:
    pass
for i in d2:
    pass
for i in d3:
    pass

In [22]:
%%time
for i in d0:
    pass

CPU times: user 90.9 ms, sys: 0 ns, total: 90.9 ms
Wall time: 88.7 ms


In [23]:
%%time
'''加完快一點'''
for i in d1:
    pass

CPU times: user 103 ms, sys: 0 ns, total: 103 ms
Wall time: 93.9 ms


In [24]:
%%time
'''加越多越快'''
for i in d2:
    pass

CPU times: user 95.9 ms, sys: 5.62 ms, total: 102 ms
Wall time: 93 ms


In [25]:
%%time
'''Autotune很容易達到很快，而且記憶體也不會讓他爆'''
for i in d3:
    pass

CPU times: user 89.4 ms, sys: 13.5 ms, total: 103 ms
Wall time: 93.6 ms


---