<a href="https://colab.research.google.com/github/michelucci/aadl2-code/blob/master/chapter2/Keras_Datasets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Keras Datasets

(C) Umberto Michelucci

umberto.michelucci@toelt.ai

www.toelt.ai

In [0]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt

In [0]:
tf.enable_eager_execution()

In [0]:
inp = tf.random_uniform([10, 10])
dataset = tf.data.Dataset.from_tensor_slices(inp)

In [0]:
print(dataset)

<TensorSliceDataset shapes: (10,), types: tf.float32>


In [0]:
dataset.make_one_shot_iterator().get_next()

<tf.Tensor: id=75, shape=(10,), dtype=float32, numpy=
array([0.2215631 , 0.32099664, 0.04410303, 0.8502971 , 0.2472974 ,
       0.25522232, 0.94817066, 0.7719344 , 0.60333145, 0.75336015],
      dtype=float32)>

In [0]:
dataset2 = dataset.map(lambda x: x*2)

In [0]:
dataset2.make_one_shot_iterator().get_next()

<tf.Tensor: id=80, shape=(10,), dtype=float32, numpy=
array([0.4431262 , 0.6419933 , 0.08820605, 1.7005942 , 0.4945948 ,
       0.51044464, 1.8963413 , 1.5438688 , 1.2066629 , 1.5067203 ],
      dtype=float32)>

In [0]:
iterator = dataset.make_one_shot_iterator()

In [0]:
for i in range(10):
  value = print(iterator.get_next())

tf.Tensor(
[0.2215631  0.32099664 0.04410303 0.8502971  0.2472974  0.25522232
 0.94817066 0.7719344  0.60333145 0.75336015], shape=(10,), dtype=float32)
tf.Tensor(
[0.28381765 0.3738917  0.8146689  0.20919728 0.5753969  0.9356725
 0.7362906  0.76200795 0.01308048 0.14003313], shape=(10,), dtype=float32)
tf.Tensor(
[0.29885674 0.47367573 0.49806583 0.64716995 0.8336675  0.8460969
 0.33148313 0.6525279  0.697958   0.06005645], shape=(10,), dtype=float32)
tf.Tensor(
[0.14598179 0.67179334 0.35625577 0.28104913 0.2975446  0.87274456
 0.30534363 0.6209985  0.03852844 0.958153  ], shape=(10,), dtype=float32)
tf.Tensor(
[0.40758312 0.29343522 0.09564996 0.36785018 0.63568246 0.3402202
 0.78177416 0.33845615 0.8337041  0.8131908 ], shape=(10,), dtype=float32)
tf.Tensor(
[0.2228247  0.4966954  0.23163402 0.59210646 0.18588722 0.01308358
 0.9436873  0.4783727  0.9283563  0.08247316], shape=(10,), dtype=float32)
tf.Tensor(
[0.8271581  0.24156737 0.5884396  0.96710765 0.0058099  0.96965003
 0.9199

## Batching

In [0]:
batched_dataset = dataset.batch(2)

In [0]:
iterator = batched_dataset.make_one_shot_iterator()

In [0]:
print(iterator.get_next())
print(iterator.get_next())

tf.Tensor(
[[0.2215631  0.32099664 0.04410303 0.8502971  0.2472974  0.25522232
  0.94817066 0.7719344  0.60333145 0.75336015]
 [0.28381765 0.3738917  0.8146689  0.20919728 0.5753969  0.9356725
  0.7362906  0.76200795 0.01308048 0.14003313]], shape=(2, 10), dtype=float32)
tf.Tensor(
[[0.29885674 0.47367573 0.49806583 0.64716995 0.8336675  0.8460969
  0.33148313 0.6525279  0.697958   0.06005645]
 [0.14598179 0.67179334 0.35625577 0.28104913 0.2975446  0.87274456
  0.30534363 0.6209985  0.03852844 0.958153  ]], shape=(2, 10), dtype=float32)


# Batching with the MNIST dataset

In [0]:
num_classes = 10

mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()

image_vector_size = 28*28
x_train = x_train.reshape(x_train.shape[0], image_vector_size)
x_test = x_test.reshape(x_test.shape[0], image_vector_size)

y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)

In [0]:
mnist_ds_train = tf.data.Dataset.from_tensor_slices((x_train, y_train))
mnist_ds_test = tf.data.Dataset.from_tensor_slices((x_test, y_test))

In [0]:
y_train.shape

(60000, 10)

# Classical network with Keras

This should not be run with eager execution

In [0]:
from keras.layers import Dense
from keras.objectives import categorical_crossentropy

Using TensorFlow backend.


In [0]:
img = tf.placeholder(tf.float32, shape=(None, 784))
x = Dense(128, activation='relu')(img)  # fully-connected layer with 128 units and ReLU activation
x = Dense(128, activation='relu')(x)
preds = Dense(10, activation='softmax')(x)

We define an easy network, simply stacking layers.

In [0]:
labels = tf.placeholder(tf.float32, shape=(None, 10))
loss = tf.reduce_mean(categorical_crossentropy(labels, preds))

correct_prediction = tf.equal(tf.argmax(preds,1), tf.argmax(labels,1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

In [0]:
train_step = tf.train.AdamOptimizer(0.001).minimize(loss)
init_op = tf.global_variables_initializer()

Let's define the iteratorse now

In [0]:
train_batched = mnist_ds_train.batch(1000)
train_batched = train_batched.prefetch(1)
#test_batched = mnist_ds_test.batch(1000)

Let's define our iterators now (https://cs230-stanford.github.io/tensorflow-input-data.html)

In [0]:
train_iterator = train_batched.make_initializable_iterator() # So we can restart from the beginning
next_batch = train_iterator.get_next()
it_init_op = train_iterator.initializer

print(next_batch)


(<tf.Tensor 'IteratorGetNext_7:0' shape=(?, 784) dtype=uint8>, <tf.Tensor 'IteratorGetNext_7:1' shape=(?, 10) dtype=float32>)


In [0]:

with tf.Session() as sess:
    sess.run(init_op)
    
    for epoch in range(50):
      sess.run(it_init_op)
           
      sess.run(train_step,feed_dict={img: x_train, labels: y_train})
      
      if (epoch % 10 == 0):
        print('epoch',epoch)
        
        print(sess.run(accuracy,feed_dict={img: x_train,
                                    labels: y_train}))

epoch 0
0.17655
epoch 10
0.6413
epoch 20
0.7025333
epoch 30
0.7262833
epoch 40
0.7582


In [0]:
with tf.Session() as sess:
    sess.run(init_op)
    
    for epoch in range(50):
      sess.run(it_init_op)
      try:
        while True:
          train_batch_x, train_batch_y = sess.run(next_batch)
          sess.run(train_step,feed_dict={img: train_batch_x, labels: train_batch_y})
      except tf.errors.OutOfRangeError:
        pass
      
      if (epoch % 10 == 0 ):
        print('epoch',epoch)
        print(sess.run(accuracy,feed_dict={img: x_train,
                                    labels: y_train}))

epoch 0
0.62185
epoch 10
0.9687
epoch 20
0.98038334
epoch 30
0.98716664
epoch 40
0.98903334


# tf.data.Dataset in eager Execution Mode

Example

In [0]:
import tensorflow as tf
from tensorflow import keras
import tensorflow.contrib.eager as tfe

tf.enable_eager_execution()

In [0]:
dataset = tf.data.Dataset.from_tensor_slices(tf.random_uniform([4, 2]))
dataset = dataset.batch(2)
for batch in dataset:
  print(batch)

tf.Tensor(
[[0.07181489 0.46992648]
 [0.00652897 0.9028846 ]], shape=(2, 2), dtype=float32)
tf.Tensor(
[[0.9167508  0.8379569 ]
 [0.33501422 0.3299384 ]], shape=(2, 2), dtype=float32)


In [0]:
# Datasets - advanced topics

How to read multiple CSV files and load them in one dataset. Example

In [0]:
datset = tf.data.Dataset.list_files("train-csv-*").flat_map(tf.data.TextLineDataset).map(decode_line)

# decode_line --> Function that returns features, labels

**flat_map** is a one-to-many transformation. Transform one file in many records

**map** is a one-to-one transformation. Transform one line in features,labels