# TF2.0 Loader製作- Memory

## 內容
    * 基本loader
    * cache
    * prefetch

In [1]:
import tensorflow as tf
from pprint import pprint
import numpy as np
import matplotlib.pyplot as plt

**以tf.data.Dataset.from_generator這種loader類型示範**

先準備好一個data generator，會yield指定數量的數字

In [2]:
def f(rng):
    for i in range(rng):
        yield i

In [3]:
list(iter(f(3)))

[0, 1, 2]

---

## 最陽春的loader

In [44]:
d=tf.data.Dataset.from_generator(f,args=[3],output_types=tf.float32)

In [45]:
%%time
for i,x in enumerate(d):
        pprint(x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=2.0>
CPU times: user 33.4 ms, sys: 38.3 ms, total: 71.7 ms
Wall time: 68.7 ms


---

## Cache

In [66]:
d=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).cache()

In [67]:
%%time
for i,x in enumerate(d):
    if i<3:
        pprint(x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=2.0>
CPU times: user 227 ms, sys: 61 ms, total: 288 ms
Wall time: 255 ms


In [68]:
%%time
'''因為第一次跑完存進記憶體，第二次會變快(但因為存起來，有隨機型preprocess的話就不隨機了)'''
for i,x in enumerate(d):
    if i<3:
        pprint(x)

<tf.Tensor: shape=(), dtype=float32, numpy=0.0>
<tf.Tensor: shape=(), dtype=float32, numpy=1.0>
<tf.Tensor: shape=(), dtype=float32, numpy=2.0>
CPU times: user 110 ms, sys: 0 ns, total: 110 ms
Wall time: 107 ms


---

## Prefetch

In [354]:
'''在跑的過程中pipeline處理，一部分先放進memory內'''
d0=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32)
d1=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).prefetch(10)
d2=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).prefetch(100)
d3=tf.data.Dataset.from_generator(f,args=[1000],output_types=tf.float32).prefetch(tf.data.experimental.AUTOTUNE)

In [355]:
%%time
for i in d0:
    pass

CPU times: user 228 ms, sys: 60.9 ms, total: 288 ms
Wall time: 252 ms


In [356]:
%%time
'''加完快一點'''
for i in d1:
    pass

CPU times: user 266 ms, sys: 70.6 ms, total: 337 ms
Wall time: 229 ms


In [357]:
%%time
'''加越多越快'''
for i in d2:
    pass

CPU times: user 212 ms, sys: 108 ms, total: 320 ms
Wall time: 223 ms


In [358]:
%%time
'''Autotune很容易達到很快'''
for i in d3:
    pass

CPU times: user 241 ms, sys: 76.6 ms, total: 317 ms
Wall time: 219 ms


In [382]:
'''含有cache的情況也是可以加速'''
d0=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100)
d1=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100).prefetch(10)
d2=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100).prefetch(100)
d3=tf.data.Dataset.from_generator(f,args=[10],output_types=tf.float32).cache().repeat(100).prefetch(tf.data.experimental.AUTOTUNE)

In [383]:
for i in d0:
    pass
for i in d1:
    pass
for i in d2:
    pass
for i in d3:
    pass

In [384]:
%%time
for i in d0:
    pass

CPU times: user 113 ms, sys: 8 ms, total: 121 ms
Wall time: 118 ms


In [385]:
%%time
'''加完快一點'''
for i in d1:
    pass

CPU times: user 123 ms, sys: 8.31 ms, total: 131 ms
Wall time: 117 ms


In [386]:
%%time
'''加越多越快'''
for i in d2:
    pass

CPU times: user 95.6 ms, sys: 3.25 ms, total: 98.9 ms
Wall time: 88.6 ms


In [387]:
%%time
'''Autotune很容易達到很快，而且記憶體也不會讓他爆'''
for i in d3:
    pass

CPU times: user 101 ms, sys: 3.93 ms, total: 105 ms
Wall time: 93.7 ms


---