# MNIST classification: how to feed the data to the model
1- MNIST dataset in memory  
2- Transform the numpy array as images  
3- Create and save the JPEG images on the Cloud Storage  
4- Dataset to read numpy array in memory  
5- Create TFRecords to store numpy array  
6- Dataset to read TFRecord with numpy array  
7- Create TFRecords to store JPEG images  
8- Dataset to read TFRecord with JPEG images  

## Install packages on Google  Cloud Datalab (locally use conda env)

### Select in the Python3 Kernel:
In the menu bar the of 'Kernel', select   
**python3**
### Install needed packages
copy the command below in a Google Cloud Datalab cell  
**!pip install tensorflow==1.12**
### Restart the Kernel 
this is to take into account the new installed packages. Click in the menu bar on:  
**Reset Session**

## Include paths to our functions

In [1]:
import sys
import os
import pathlib

workingdir=os.getcwd()
print(workingdir)
d=[d for d in os.listdir(workingdir)]
n=0
while not set(['notebook']).issubset(set(d)):
   workingdir=str(pathlib.Path(workingdir).parents[0])
   print(workingdir)
   d=[d for d in os.listdir(str(workingdir))]
   n+=1
   if n>5:
       break
sys.path.insert(0, workingdir)
os.chdir(workingdir)

/Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/notebook
/Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP


## Setup librairies import and plots style

### Import librairies

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import time
import gzip
import sys
import _pickle as cPickle
from PIL import Image
import glob as glob

In [3]:
print(tf.__version__)
print(tf.keras.__version__)

1.12.0
2.1.6-tf


### Import our utils functions

In [119]:
import src.utils.mnist_utils as mnist_utils
import src.utils.tensorflow_helper as tensorflow_helper
import src.model_mnist_v1.trainer.model as mnist_v1

In [120]:
import importlib
importlib.reload(mnist_utils)
importlib.reload(mnist_v1)
importlib.reload(tensorflow_helper);# to reload the function and mask the output

## Input Data
### Load the data

In [6]:
# load the data: path is relative to the python path!
(x_train, y_train), (x_test, y_test) = mnist_utils.load_data(path='data/mnist/raw/mnist.pkl.gz')

### Basics checks

In [7]:
# check data shape (training)
x_train.shape

(60000, 28, 28)

In [8]:
# check data shape (train)
x_test.shape

(10000, 28, 28)

In [9]:
x_train.dtype, x_test.dtype

(dtype('uint8'), dtype('uint8'))

In [10]:
np.max(x_train), np.min(x_train), np.max(x_test), np.min(x_test) 

(255, 0, 255, 0)

## Save Numpy array as JPEG as images

In [11]:
path_train_images='data/mnist/images_train/'
path_test_images='data/mnist/images_test/'

In [12]:
if not os.path.exists(path_train_images):
    os.makedirs(path_train_images)

In [13]:
if not os.path.exists(path_test_images):
    os.makedirs(path_test_images)

In [14]:
for i, im_array in enumerate(x_train):
    im = Image.fromarray(im_array)
    im.save(path_train_images+'image_train_'+str(i).zfill(5)+'_label_'+str(y_train[i]).zfill(2)+'.jpeg')

In [15]:
for i, im_array in enumerate(x_test):
    im = Image.fromarray(im_array)
    im.save(path_test_images+'image_test_'+str(i).zfill(5)+'_label_'+str(y_test[i]).zfill(2)+'.jpeg')

## Save JPEG images on GCP

In [19]:
GCS_BUCKET = 'gs://ml-productive-pipeline-53122' 
PROJECT = 'ml-productive-pipeline-53122'
REGION = 'europe-west1'
LOCAL_DATA_TEST = path_test_images
LOCAL_DATA_TRAIN = path_train_images

In [20]:
os.environ['GCS_BUCKET'] = GCS_BUCKET
os.environ['PROJECT'] = PROJECT
os.environ['REGION'] = REGION
os.environ['LOCAL_DATA_TEST'] = LOCAL_DATA_TEST
os.environ['LOCAL_DATA_TRAIN'] = LOCAL_DATA_TRAIN

In [21]:
!gsutil ls $GCS_BUCKET/mnist/raw

gs://ml-productive-pipeline-53122/mnist/raw/test/
gs://ml-productive-pipeline-53122/mnist/raw/train/


## Set parameters

In [22]:
tf.logging.set_verbosity(tf.logging.INFO)

In [23]:
# number of classes
NUM_CLASSES =10

# dimension of the input data
DIM_INPUT = 784

# number of epoch to train our model
EPOCHS = 2

# size of our mini batch
BATCH_SIZE = 128

# shuffle buffer size
SHUFFLE_BUFFER_SIZE = 10 * BATCH_SIZE

# prefetch buffer size
PREFETCH_BUFFER_SIZE = tf.contrib.data.AUTOTUNE

# number of paralell calls
NUM_PARALELL_CALL = 4

# model version
MODEL='v1'

## Defined flags

In [24]:
tensorflow_helper.del_all_flags(tf.flags.FLAGS)

In [25]:
# just for jupyter notebook and avoir : "UnrecognizedFlagError: Unknown command line flag 'f'"
tf.app.flags.DEFINE_string('f', '', 'kernel') 

# path to store the model and input for Tensorboard
tf.app.flags.DEFINE_string('model_dir_keras', './results/Models/Mnist/tf_1_12/keras/'+MODEL+'/ckpt/', 'Dir to save a model and checkpoints with keras')
tf.app.flags.DEFINE_string('tensorboard_dir_keras', './results/Models/Mnist/tf_1_12/keras/'+MODEL+'/logs/', 'Dir to save logs for TensorBoard with keras')

# parameters for the input dataset and train the model
tf.app.flags.DEFINE_integer('epoch', EPOCHS, 'number of epoch')
tf.app.flags.DEFINE_integer('step_per_epoch', len(x_train) // BATCH_SIZE, 'number of step per epoch')
tf.app.flags.DEFINE_integer('batch_size', BATCH_SIZE, 'Batch size')
tf.app.flags.DEFINE_integer('shuffle_buffer_size', SHUFFLE_BUFFER_SIZE , 'Shuffle buffer size')
tf.app.flags.DEFINE_integer('prefetch_buffer_size', PREFETCH_BUFFER_SIZE, 'Prefetch buffer size')
tf.app.flags.DEFINE_integer('num_parallel_calls', NUM_PARALELL_CALL, 'Number of paralell calls')

# parameters for the model
tf.app.flags.DEFINE_integer('num_classes', NUM_CLASSES, 'number of classes in our model')
tf.app.flags.DEFINE_integer('dim_input', DIM_INPUT, 'dimension of the input data for our model')

FLAGS = tf.app.flags.FLAGS

## Dataset to preprocess and feed data in our model 
Use tf.data.dataset it will prepare mini batches of data, reshuffle the data, parallelized the pre-processing the data. 

https://www.tensorflow.org/guide/performance/datasets  
To summarize, one good order for the different transformations is:
- create the dataset
- shuffle (with a big enough buffer size)  
https://stackoverflow.com/questions/46444018/meaning-of-buffer-size-in-dataset-map-dataset-prefetch-and-dataset-shuffle)
- repeat
- map with the actual work (preprocessing, augmentation…) using multiple parallel calls
- batch
- prefetch

ModeKeys:  
https://www.tensorflow.org/api_docs/python/tf/estimator/ModeKeys  
- EVAL
- PREDICT
- TRAIN

### Printing the number relater to the number of events (epoch, batch size, ...)

In [26]:
def print_summary_input(data, step='training'):
    print('Summary for the {} dataset:'.format(step))
    if step=='training':
        print('  - number of epoch            :', FLAGS.epoch)
        print('  - number of events per epoch :', len(data))
        print('  - batch size                 :', FLAGS.batch_size)
        print('  - number of step per epoch   :', FLAGS.step_per_epoch)
        print('  - total number of steps      :', FLAGS.epoch * FLAGS.step_per_epoch)
    else:
        print('  - number of epoch            :', 1)
        print('  - number of events per epoch :', len(data))
        print('  - batch size                 :', None)
        print('  - number of step per epoch   :', 1)
        print('  - total number of steps      :', 1) 

In [27]:
print_summary_input(x_train)

Summary for the training dataset:
  - number of epoch            : 2
  - number of events per epoch : 60000
  - batch size                 : 128
  - number of step per epoch   : 468
  - total number of steps      : 936


In [28]:
print_summary_input(x_test)

Summary for the training dataset:
  - number of epoch            : 2
  - number of events per epoch : 10000
  - batch size                 : 128
  - number of step per epoch   : 468
  - total number of steps      : 936


### Dataset using numpy array to preprocess and feed data in our model 

#### Creating "Graph" for the datasets

In [29]:
training_dataset = mnist_v1.input_mnist_array_dataset_fn(x_train, 
                                                         y_train, 
                                                         FLAGS,
                                                         mode=tf.estimator.ModeKeys.TRAIN, 
                                                         batch_size=FLAGS.batch_size)

INFO:tensorflow:input_dataset_fn: TRAIN, train


In [30]:
testing_dataset = mnist_v1.input_mnist_array_dataset_fn(x_test, 
                                                        y_test,
                                                        FLAGS,
                                                        mode=tf.estimator.ModeKeys.EVAL, 
                                                        batch_size=len(x_test))

INFO:tensorflow:input_dataset_fn: EVAL, eval


In [31]:
training_dataset, testing_dataset

(<PrefetchDataset shapes: ((128, 784), (128, 10)), types: (tf.float32, tf.float32)>,
 <PrefetchDataset shapes: ((10000, 784), (10000, 10)), types: (tf.float32, tf.float32)>)

#### Executing the "Graph for the datasets" for training

In [32]:
# create an iterator
iterator = training_dataset.make_one_shot_iterator()

# next_element
features, labels = iterator.get_next()

In [33]:
n=0

n_iter=12
with tf.Session() as sess:
    while True:
        try:
            start_time = time.clock()
            x,y = sess.run([features, labels])
            print('iteration n:', n, 'execution time:', time.clock() - start_time, 'seconds')
            print(x.shape)
            print(y.shape)
            print('first label of the batch',np.argmax(y[0]),'\n')
            n+=1
            if n>=n_iter:
                print('number of iteration reached')
                break
        except tf.errors.OutOfRangeError:
            print('tf.errors.OutOfRangeError')
            break

iteration n: 0 execution time: 2.2003929999999983 seconds
(128, 784)
(128, 10)
first label of the batch 9 

iteration n: 1 execution time: 0.02623299999999773 seconds
(128, 784)
(128, 10)
first label of the batch 2 

iteration n: 2 execution time: 0.02943599999999691 seconds
(128, 784)
(128, 10)
first label of the batch 6 

iteration n: 3 execution time: 0.029065000000002783 seconds
(128, 784)
(128, 10)
first label of the batch 1 

iteration n: 4 execution time: 0.030915000000000248 seconds
(128, 784)
(128, 10)
first label of the batch 5 

iteration n: 5 execution time: 0.029243999999998493 seconds
(128, 784)
(128, 10)
first label of the batch 1 

iteration n: 6 execution time: 0.029104000000003794 seconds
(128, 784)
(128, 10)
first label of the batch 9 

iteration n: 7 execution time: 0.03149700000000166 seconds
(128, 784)
(128, 10)
first label of the batch 8 

iteration n: 8 execution time: 0.028980000000004225 seconds
(128, 784)
(128, 10)
first label of the batch 2 

iteration n: 9 

#### Executing the "Graph for the datasets" for testing

In [34]:
# create an iterator
iterator = testing_dataset.make_one_shot_iterator()

# next_element
features, labels = iterator.get_next()

In [35]:
n=0

n_iter=10
with tf.Session() as sess:
    while True:
        try:
            start_time = time.clock()
            x,y = sess.run([features, labels])
            print('iteration n:', n, 'execution time:', time.clock() - start_time, 'seconds')
            print(x.shape)
            print(y.shape)
            print('first label of the batch',np.argmax(y[0]),'\n')
            n+=1
            if n>=n_iter:
                print('number of iteration reached')
                break
        except tf.errors.OutOfRangeError:
            print('tf.errors.OutOfRangeError')
            break

iteration n: 0 execution time: 2.9108459999999994 seconds
(10000, 784)
(10000, 10)
first label of the batch 7 

iteration n: 1 execution time: 2.260881999999995 seconds
(10000, 784)
(10000, 10)
first label of the batch 7 

tf.errors.OutOfRangeError


## Stored the input data as TFRecords files
TFRecord file format is a simple record-oriented binary format  
- https://medium.com/coinmonks/storage-efficient-tfrecord-for-images-6dc322b81db4
- https://www.damienpontifex.com/2017/09/18/convert-and-using-the-mnist-dataset-as-tfrecords/
- https://docs.databricks.com/_static/notebooks/horovodrunner/mnist-tensorflow-to-tfrecords.html

Contrary to numpy array or pandas dataframe this is will scale with any amount of data.

### Numpy array to TFRecords files

#### Creating the TFRecords files

In [36]:
path_test_tfrecords = 'data/mnist/tfrecord_numpy_test'
path_train_tfrecords = 'data/mnist/tfrecord_numpy_train'

In [37]:
# creating TFRecords files for the training dataset
mnist_v1.convert_numpy_to_tfrecords(x_train, y_train, 'train', path_train_tfrecords, 1)

Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_numpy_train/train.tfrecords data
Processing sample 59131 of 60000

In [38]:
# creating TFRecords files for the testing dataset
mnist_v1.convert_numpy_to_tfrecords(x_test, y_test, 'test', path_test_tfrecords, 1)

Processing sample 9811 of 10000

#### Save TFRecord file with numpy array on GCP

In [39]:
LOCAL_DATA_TEST = path_test_tfrecords
LOCAL_DATA_TRAIN = path_train_tfrecords

In [40]:
os.environ['LOCAL_DATA_TEST'] = LOCAL_DATA_TEST
os.environ['LOCAL_DATA_TRAIN'] = LOCAL_DATA_TRAIN

In [41]:
!gsutil ls $GCS_BUCKET/mnist/

gs://ml-productive-pipeline-53122/mnist/image/
gs://ml-productive-pipeline-53122/mnist/raw/
gs://ml-productive-pipeline-53122/mnist/tfrecords/


#### Dataset using TFRecords file to preprocess and feed data in our model 

In [121]:
training_dataset = mnist_v1.input_mnist_tfrecord_dataset_fn(glob.glob(path_train_tfrecords+'/train*.tfrecords'), 
                                                            FLAGS,
                                                            mode=tf.estimator.ModeKeys.TRAIN, 
                                                            batch_size=FLAGS.batch_size)

INFO:tensorflow:input_dataset_fn: TRAIN, train


In [122]:
testing_dataset = mnist_v1.input_mnist_tfrecord_dataset_fn(glob.glob(path_test_tfrecords+'/test*.tfrecords'),
                                                           FLAGS,
                                                           mode=tf.estimator.ModeKeys.EVAL, 
                                                           batch_size=len(x_test))

INFO:tensorflow:input_dataset_fn: EVAL, eval


In [123]:
iterator = training_dataset.make_one_shot_iterator()
# next_element
features, labels = iterator.get_next()

In [124]:
n=0
n_iter=10

with tf.Session() as sess:
    while True:
        try:
            start_time = time.clock()
            x,y = sess.run([features, labels])
            print('iteration n:', n, 'execution time:', time.clock() - start_time, 'seconds')
            print(x.shape)
            print(y.shape)
            print('first label of the batch',np.argmax(y[0]),'\n')
            n+=1
            if n>=n_iter:
                print('number of iteration reached')
                break
        except tf.errors.OutOfRangeError:
            print('tf.errors.OutOfRangeError')
            break

iteration n: 0 execution time: 0.8850599999999531 seconds
(128, 784)
(128, 10)
first label of the batch 7 

iteration n: 1 execution time: 0.06717600000001767 seconds
(128, 784)
(128, 10)
first label of the batch 6 

iteration n: 2 execution time: 0.06733999999994467 seconds
(128, 784)
(128, 10)
first label of the batch 0 

iteration n: 3 execution time: 0.05961200000001554 seconds
(128, 784)
(128, 10)
first label of the batch 0 

iteration n: 4 execution time: 0.06687399999998433 seconds
(128, 784)
(128, 10)
first label of the batch 3 

iteration n: 5 execution time: 0.06518599999992603 seconds
(128, 784)
(128, 10)
first label of the batch 1 

iteration n: 6 execution time: 0.058546999999975924 seconds
(128, 784)
(128, 10)
first label of the batch 9 

iteration n: 7 execution time: 0.05697999999995318 seconds
(128, 784)
(128, 10)
first label of the batch 0 

iteration n: 8 execution time: 0.05748299999993378 seconds
(128, 784)
(128, 10)
first label of the batch 9 

iteration n: 9 exec

In [125]:
iterator = testing_dataset.make_one_shot_iterator()
# next_element
features, labels = iterator.get_next()

In [126]:
n=0

n_iter=10
with tf.Session() as sess:
    while True:
        try:
            start_time = time.clock()
            x,y = sess.run([features, labels])
            print('iteration n:', n, 'execution time:', time.clock() - start_time, 'seconds')
            print(x.shape)
            print(y.shape)
            print('first label of the batch',np.argmax(y[0]),'\n')
            n+=1
            if n>=n_iter:
                print('number of iteration reached')
                break
        except tf.errors.OutOfRangeError:
            print('tf.errors.OutOfRangeError')
            break

iteration n: 0 execution time: 3.697167000000036 seconds
(10000, 784)
(10000, 10)
first label of the batch 4 

iteration n: 1 execution time: 3.2143539999999575 seconds
(10000, 784)
(10000, 10)
first label of the batch 4 

tf.errors.OutOfRangeError


### JPEG images to TFRecords files

#### Creating the TFRecords files

In [127]:
path_test_image_tfrecords = 'data/mnist/tfrecord_image_test'
path_train_image_tfrecords = 'data/mnist/tfrecord_image_train'

In [85]:
# creating TFRecords files for the training dataset
mnist_v1.convert_image_to_tfrecords(glob.glob(path_train_images+'/*.jpeg'), 'train', path_train_tfrecords, 10)

Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_train/train-1.tfrecords data
Processing sample 6000 of 6000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_train/train-2.tfrecords data
Processing sample 6000 of 6000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_train/train-3.tfrecords data
Processing sample 6000 of 6000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_train/train-4.tfrecords data
Processing sample 6000 of 6000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_train/train-5.tfrecords data
Processing sample 6000 of 6000
Proce

In [86]:
# creating TFRecords files for the testing dataset
mnist_v1.convert_image_to_tfrecords(glob.glob(path_test_images+'/*.jpeg'), 'test', path_test_tfrecords, 10)

Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_test/test-1.tfrecords data
Processing sample 1000 of 1000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_test/test-2.tfrecords data
Processing sample 1000 of 1000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_test/test-3.tfrecords data
Processing sample 1000 of 1000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_test/test-4.tfrecords data
Processing sample 1000 of 1000
Processing /Users/tarrade/Desktop/Work/Data_Science/Tutorials_Codes/Python/proj_DL_models_and_pipelines_with_GCP/data/mnist/tfrecord_image_test/test-5.tfrecords data
Processing sample 1000 of 1000
Processing /Use

#### Save TFRecord file with JPEG image on GCP

In [109]:
LOCAL_DATA_TEST = path_test_image_tfrecords
LOCAL_DATA_TRAIN = path_train_image_tfrecords

In [110]:
os.environ['LOCAL_DATA_TEST'] = LOCAL_DATA_TEST
os.environ['LOCAL_DATA_TRAIN'] = LOCAL_DATA_TRAIN

In [111]:
!gsutil ls $GCS_BUCKET/mnist/

gs://ml-productive-pipeline-53122/mnist/image/
gs://ml-productive-pipeline-53122/mnist/raw/
gs://ml-productive-pipeline-53122/mnist/tfrecords/


In [91]:
!gsutil -m cp -R $LOCAL_DATA_TRAIN/ $GCS_BUCKET/mnist/tfrecords/image_train

Copying file://data/mnist/tfrecord_image_train/train-10.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-3.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-4.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-2.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-9.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-8.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-6.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/.DS_Store [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_train/train-1.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_

In [92]:
!gsutil -m cp -R $LOCAL_DATA_TEST/ $GCS_BUCKET/mnist/tfrecords/image_test

Copying file://data/mnist/tfrecord_image_test/test-3.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-7.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-10.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-4.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-2.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-8.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-5.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-1.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test/test-9.tfrecords [Content-Type=application/octet-stream]...
Copying file://data/mnist/tfrecord_image_test

#### Dataset using TFRecords file to preprocess and feed data in our model 

In [128]:
training_dataset = mnist_v1.input_mnist_tfrecord_dataset_fn(glob.glob(path_train_image_tfrecords+'/train*.tfrecords'), 
                                                            FLAGS,
                                                            mode=tf.estimator.ModeKeys.TRAIN, 
                                                            batch_size=FLAGS.batch_size)

INFO:tensorflow:input_dataset_fn: TRAIN, train


In [129]:
testing_dataset = mnist_v1.input_mnist_tfrecord_dataset_fn(glob.glob(path_test_image_tfrecords+'/test*.tfrecords'),
                                                           FLAGS,
                                                           mode=tf.estimator.ModeKeys.EVAL, 
                                                           batch_size=len(x_test))

INFO:tensorflow:input_dataset_fn: EVAL, eval


In [130]:
iterator = training_dataset.make_one_shot_iterator()
# next_element
features, labels = iterator.get_next()

In [131]:
n=0
n_iter=10

with tf.Session() as sess:
    while True:
        try:
            start_time = time.clock()
            x,y = sess.run([features, labels])
            print('iteration n:', n, 'execution time:', time.clock() - start_time, 'seconds')
            print(x.shape)
            print(y.shape)
            print('first label of the batch',np.argmax(y[0]),'\n')
            n+=1
            if n>=n_iter:
                print('number of iteration reached')
                break
        except tf.errors.OutOfRangeError:
            print('tf.errors.OutOfRangeError')
            break

iteration n: 0 execution time: 0.7266619999999193 seconds
(128, 784)
(128, 10)
first label of the batch 7 

iteration n: 1 execution time: 0.059810000000084074 seconds
(128, 784)
(128, 10)
first label of the batch 6 

iteration n: 2 execution time: 0.05490500000007614 seconds
(128, 784)
(128, 10)
first label of the batch 0 

iteration n: 3 execution time: 0.05208000000004631 seconds
(128, 784)
(128, 10)
first label of the batch 0 

iteration n: 4 execution time: 0.04858000000001539 seconds
(128, 784)
(128, 10)
first label of the batch 3 

iteration n: 5 execution time: 0.049823999999944135 seconds
(128, 784)
(128, 10)
first label of the batch 1 

iteration n: 6 execution time: 0.04981699999996181 seconds
(128, 784)
(128, 10)
first label of the batch 9 

iteration n: 7 execution time: 0.05104200000005221 seconds
(128, 784)
(128, 10)
first label of the batch 0 

iteration n: 8 execution time: 0.049216999999998734 seconds
(128, 784)
(128, 10)
first label of the batch 9 

iteration n: 9 ex

In [132]:
iterator = testing_dataset.make_one_shot_iterator()
# next_element
features, labels = iterator.get_next()

In [133]:
n=0

n_iter=10
with tf.Session() as sess:
    while True:
        try:
            start_time = time.clock()
            x,y = sess.run([features, labels])
            print('iteration n:', n, 'execution time:', time.clock() - start_time, 'seconds')
            print(x.shape)
            print(y.shape)
            print('first label of the batch',np.argmax(y[0]),'\n')
            n+=1
            if n>=n_iter:
                print('number of iteration reached')
                break
        except tf.errors.OutOfRangeError:
            print('tf.errors.OutOfRangeError')
            break

iteration n: 0 execution time: 3.788214000000039 seconds
(10000, 784)
(10000, 10)
first label of the batch 4 

iteration n: 1 execution time: 3.382412000000045 seconds
(10000, 784)
(10000, 10)
first label of the batch 4 

tf.errors.OutOfRangeError
