In [21]:
def maybe_download_and_extract(dest_directory, url):
  """
    Download the dataset and extract the data
  """
    
  if not os.path.exists(dest_directory):
    os.makedirs(dest_directory)
  file_name = 'cifar-10-binary.tar.gz'
  file_path = os.path.join(dest_directory, file_name)
  # if have not downloaded yet
  if not os.path.exists(file_path):
    def _progress(count, block_size, total_size):
      sys.stdout.write('\r%.1f%%' % 
            (float(count * block_size) / float(total_size) * 100.0))
      sys.stdout.flush()  # flush the buffer

    print('>> Downloading %s ...' % file_name)
    file_path, _ = urllib.request.urlretrieve(url, file_path, _progress)
    file_size = os.stat(file_path).st_size
    print('\r>> Total %d bytes' % file_size)
  extracted_dir_path = os.path.join(dest_directory, 'cifar-10-batches-bin')
  if not os.path.exists(extracted_dir_path):
    # Open for reading with gzip compression, then extract all
    tarfile.open(file_path, 'r:gz').extractall(dest_directory)
  print('>> Done')

# download it

In [22]:
import os
import sys
from six.moves import urllib
import tarfile
import tensorflow as tf
import numpy as np
DATA_URL = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz'
DEST_DIRECTORY = './dataset/cifar10'
DATA_DIRECTORY = DEST_DIRECTORY + '/cifar-10-batches-bin'
IMAGE_HEIGHT = 32
IMAGE_WIDTH = 32
IMAGE_DEPTH = 3
IMAGE_SIZE_CROPPED = 24
BATCH_SIZE = 128
NUM_CLASSES = 10 
LABEL_BYTES = 1
IMAGE_BYTES = 32 * 32 * 3
NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000

# The record is the image plus a one-byte label
_RECORD_BYTES = IMAGE_HEIGHT * IMAGE_WIDTH * IMAGE_DEPTH + 1
# download it
maybe_download_and_extract(DEST_DIRECTORY, DATA_URL)
training_files = [os.path.join(DATA_DIRECTORY, 'data_batch_%d.bin' % i) for i in range(1,6)]
testing_files = [os.path.join(DATA_DIRECTORY, 'test_batch.bin')]



>> Done


In [23]:
# (5) + (6)
def read_cifar10(filename_queue):
    """ Reads and parses examples from CIFAR10 data files.
    -----
    Args:
        filename_queue: 
            A queue of strings with the filenames to read from.
    Returns:
        An object representing a single example, with the following fields:
        height: 
            number of rows in the result (32)
        width: 
            number of columns in the result (32)
        depth: 
            number of color channels in the result (3)
        key: 
            a scalar string Tensor describing the filename & record number for this example.
        label: 
            an int32 Tensor with the label in the range 0..9.
        image: 
            a [height, width, depth] uint8 Tensor with the image data
    """

    class CIFAR10Record(object):
        pass

    result = CIFAR10Record()
    # CIFAR10 consists of 60000 32x32 'color' images in 10 classes
    label_bytes = 1  # 10 class
    result.height = IMAGE_HEIGHT
    result.width = IMAGE_WIDTH
    result.depth = IMAGE_DEPTH
    image_bytes = result.height * result.width * result.depth
    # bytes of a record: label(1 byte) followed by pixels(3072 bytes)
    record_bytes = label_bytes + image_bytes
    # (5) reader for cifar10 file format
    reader = tf.FixedLengthRecordReader(record_bytes=record_bytes)
    # read a record
    result.key, record_string = reader.read(filename_queue)
    # Convert from a string to a vector of uint8 that is record_bytes long.
    # (6) decoder
    record_uint8 = tf.decode_raw(record_string, tf.uint8)
    # get the label and cast it to int32
    result.label = tf.cast(
      tf.strided_slice(record_uint8, [0], [label_bytes]), tf.int32)
    # [depth, height, width], uint8
    depth_major = tf.reshape(
      tf.strided_slice(record_uint8, [label_bytes],
                       [label_bytes + image_bytes]),
      [result.depth, result.height, result.width])
    # change to [height, width, depth], uint8
    result.image = tf.transpose(depth_major, [1, 2, 0])
    return result

In [56]:
class CNN_Model(object):
    def __init__(self, model_hps):
        self.image_size = model_hps.image_size
        self.batch_size = model_hps.batch_size
        self.num_classes = model_hps.num_classes
        self.num_training_example = model_hps.num_training_example
        self.num_epoch_per_decay = model_hps.num_epoch_per_decay
        self.init_lr = model_hps.init_lr  # initial learn rate
        self.moving_average_decay = model_hps.moving_average_decay
        self.ckpt_dir = model_hps.ckpt_dir

        self.build_model()

    def build_model(self):
        # op for training
        self.global_step = tf.contrib.framework.get_or_create_global_step()

        with tf.variable_scope('model'):
            self.images = tf.placeholder(tf.float32,[self.batch_size, self.image_size, self.image_size, 3]) 
            self.labels = tf.placeholder(tf.int32)

            self.logits = self.inference(self.images)
            self.top_k_op = tf.nn.in_top_k(self.logits, self.labels, 1) 
            self.total_loss = self.loss(self.logits, self.labels)
            self.train_op = self.train(self.total_loss, self.global_step)

    def _variable_on_cpu(self, name, shape, initializer):
        with tf.device('/cpu:0'):
            var = tf.get_variable(name, shape, initializer=initializer, dtype=tf.float32)

        return var

    def _variable_with_weight_decay(self, name, shape, stddev, wd=0.0):
        """ Helper to create an initialized Variable with weight decay.
            Note that the Variable is initialized with a truncated normal 
            distribution. A weight decay is added only if one is specified.
            -----
            Args:
                name: 
                    name of the variable
                shape: 
                    a list of ints
                stddev: 
                    standard deviation of a truncated Gaussian
                wd: 
                    add L2Loss weight decay multiplied by this float. If None, weight
                    decay is not added for this Variable.
            Returns:
                Variable Tensor
        """
        initializer = tf.truncated_normal_initializer(
            stddev=stddev, dtype=tf.float32)
        var = self._variable_on_cpu(name, shape, initializer)
        # deal with weight decay
        weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
        tf.add_to_collection('losses', weight_decay)
        return var

    def _conv_block(self, inp, scope, kernel_width, kernel_height, inp_channel, out_channel, strides = [1, 1, 1, 1], padding='SAME'):
        with tf.variable_scope(scope) as scope:
            kernel = self._variable_with_weight_decay('weights', [kernel_width, kernel_width, inp_channel, out_channel], 5e-2)
            biases = self._variable_on_cpu('bias', [out_channel], tf.constant_initializer(0.0))

            conv = tf.nn.conv2d(inp, kernel, strides=strides, padding=padding)
            pre_activation = tf.nn.bias_add(conv, biases)
            return tf.nn.relu(pre_activation, name=scope.name)

    def _fully_connected_layer(self, inp, scope, in_dim, out_dim, relu = True):
        with tf.variable_scope(scope) as scope:
            weights = self._variable_with_weight_decay('weights', [in_dim, out_dim], 0.04, 0.004)
            biases = self._variable_on_cpu('biases', [out_dim], tf.constant_initializer(0.1))
            if relu:
                return tf.nn.relu(tf.matmul(inp, weights) + biases, name=scope.name)
            else:
                return tf.matmul(inp, weights) + biases

    def inference(self, images):
        """ build the model
            -----
            Args:
                images with shape [batch_size,24,24,3]
            Return:
                logits with shape [batch_size,10]
        """
        conv_1 = self._conv_block(images, 'conv_1', 5, 5, 3, 64)
        # pool_1
        pool_1 = tf.nn.max_pool(conv_1,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='SAME',name='pool_1')
        # norm_1 (local_response_normalization)
        norm_1 = tf.nn.lrn(pool_1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm_1')

        # conv2
        conv_2 = self._conv_block(norm_1, 'conv_2', 5, 5, 64, 64)
        # norm2
        norm_2 = tf.nn.lrn(conv_2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm_2')
        # pool2
        pool_2 = tf.nn.max_pool(norm_2,ksize=[1, 3, 3, 1],strides=[1, 2, 2, 1],padding='SAME',name='pool_2')

        # Flatten feature maps before fully connected layers
        flat_features = tf.reshape(pool_2, [self.batch_size, -1])
        dim = flat_features.get_shape()[1].value
        # FC_1 (fully-connected layer)
        fc_1 = self._fully_connected_layer(flat_features, 'fc1', dim, 384)

        # FC_2
        fc_2 = self._fully_connected_layer(fc_1, 'fc2', 384, 192)

        logits = self._fully_connected_layer(fc_2, 'softmax_linear', 192, self.num_classes, relu = False)
        return logits

    def loss(self, logits, labels):
        '''calculate the loss'''
        labels = tf.cast(labels, tf.int64)
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=labels, logits=logits, name='cross_entropy_per_example')
        cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
        tf.add_to_collection('losses', cross_entropy_mean)
        # The total loss is defined as the cross entropy loss plus all of the weight
        # decay terms (L2 loss).
        return tf.add_n(tf.get_collection('losses'), name='total_loss')

    def train(self, total_loss, global_step):
        '''Return training operation of one step'''
        num_batches_per_epoch = self.num_training_example / self.batch_size
        decay_steps = int(num_batches_per_epoch * self.num_epoch_per_decay)
        # Decay the learning rate exponentially based on the number of steps.
        lr = tf.train.exponential_decay(
            self.init_lr, global_step, decay_steps, decay_rate=0.1, staircase=True)
        opt = tf.train.GradientDescentOptimizer(lr)
        grads = opt.compute_gradients(total_loss)
        apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
        # Track the moving averages of all trainable variables.
        # This step just records the moving average weights but not uses them
        ema = tf.train.ExponentialMovingAverage(self.moving_average_decay,
                                                global_step)
        self.ema = ema
        variables_averages_op = ema.apply(tf.trainable_variables())
        with tf.control_dependencies([apply_gradient_op, variables_averages_op]):
            train_op = tf.no_op(name='train')
        return train_op

In [26]:
def run_training(model, data_train , num_epoch):
    saver = tf.train.Saver()
        
    with tf.Session() as sess:
        sess.run(iterator_train.initializer)
        
        ckpt = tf.train.get_checkpoint_state(model.ckpt_dir)
        if (ckpt and ckpt.model_checkpoint_path):
            saver.restore(sess, ckpt.model_checkpoint_path)
            # assume the name of checkpoint is like '.../model.ckpt-1000'
            gs = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
            sess.run(tf.assign(model.global_step, gs))
        else:
          # no checkpoint found
            print('no ckpt, init global variable...')
            sess.run(tf.global_variables_initializer())

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        model.loss_each_epoch = []

        num_batch_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN//model.batch_size
        #start training
        for i in range(num_epoch):
            _loss = []
            print('epoch:',i)
            for _ in range(num_batch_per_epoch):
                print('.',end='')
                images, labels = sess.run(data_train)
                l, _ = sess.run([model.total_loss, model.train_op], feed_dict = {model.images:images, model.labels:labels})
                _loss.append(l)
            loss_this_epoch = np.sum(_loss)
            gs = model.global_step.eval()
            print('loss of epoch %d: %f' % (gs / num_batch_per_epoch, loss_this_epoch))
            model.loss_each_epoch.append(loss_this_epoch)
            saver.save(sess, model.ckpt_dir + 'model.ckpt', global_step=gs)
        coord.request_stop()
        coord.join(threads)
    print('Done training %d epochs' %num_epoch)

In [27]:
def run_testing(model, data_test):
    variables_to_restore = model.ema.variables_to_restore()
    saver = tf.train.Saver(variables_to_restore)
    with tf.Session() as sess:
        # Restore variables from disk.
        ckpt = tf.train.get_checkpoint_state(model.ckpt_dir)

        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess=sess, coord=coord)

            num_iter = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL // model.batch_size
            total_sample_count = num_iter * model.batch_size
            true_count = 0
            for _ in range(num_iter):
                images, labels = sess.run(data_test) 

                predictions = sess.run(model.top_k_op, feed_dict = {model.images:images, model.labels:labels})
                true_count += np.sum(predictions)
            print('Accurarcy: %d/%d = %f' % (true_count, total_sample_count,
                                             true_count / total_sample_count))
            coord.request_stop()
            coord.join(threads)
        else:
            print('train first')

In [54]:
model_hps_cifar = tf.contrib.training.HParams(
  image_size = IMAGE_SIZE_CROPPED,
  batch_size = BATCH_SIZE,
  num_classes = NUM_CLASSES,
  num_training_example = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN,
  num_epoch_per_decay = 350.0,
  init_lr = 0.1,
  moving_average_decay = 0.9999,
  ckpt_dir = './model/'
)

In [29]:
def cifar10_record_distort_parser(image,label):
    ''' Parse the record into label, cropped and distorted image
    -----
    Args:
        record: 
            a record containing label and image.
    Returns:
        label: 
            the label in the record.
        image: 
            the cropped and distorted image in the record.
    '''
    
    # TODO1
    height = IMAGE_SIZE_CROPPED
    width = IMAGE_SIZE_CROPPED
    float_image = tf.cast(image, tf.float32)
    distorted_image = tf.random_crop(float_image, [height, width, 3])
    distorted_image = tf.image.random_flip_left_right(distorted_image)
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.random_contrast(distorted_image, lower=0.2, upper=1.8)
    # standardization: subtract off the mean and divide by the variance of the pixels
    distorted_image = tf.image.per_image_standardization(distorted_image)
    # Set the shapes of tensors.
    distorted_image.set_shape([height, width, 3])
    label.set_shape([1])
    
    # ensure a level of mixing of elements.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(
      NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN * min_fraction_of_examples_in_queue)
    # (8) example queue
    # Filling queue with min_queue_examples CIFAR images before starting to train
    image_batch, label_batch = tf.train.shuffle_batch(
        [distorted_image, label],
        batch_size=BATCH_SIZE,
        num_threads=16,
        capacity=min_queue_examples + 3*BATCH_SIZE,
        min_after_dequeue=min_queue_examples)
    return image_batch, tf.reshape(label_batch, [BATCH_SIZE])



def cifar10_record_crop_parser(image,label):
    ''' Parse the record into label, cropped image
    -----
    Args:
        record: 
            a record containing label and image.
    Returns:
        label: 
            the label in the record.
        image: 
            the cropped image in the record.
    '''
    # create a queue that produces filenames to read
    
    # image preprocessing for training
    height = IMAGE_SIZE_CROPPED
    width = IMAGE_SIZE_CROPPED
    float_image = tf.cast(image, tf.float32)
    resized_image = tf.image.resize_image_with_crop_or_pad(
      float_image, height, width)
    image_eval = tf.image.per_image_standardization(resized_image)
    image_eval.set_shape([height, width, 3])
    label.set_shape([1])
    # Ensure that the random shuffling has good mixing properties.
    min_fraction_of_examples_in_queue = 0.4
    min_queue_examples = int(
      NUM_EXAMPLES_PER_EPOCH_FOR_EVAL * min_fraction_of_examples_in_queue)
    image_batch, label_batch = tf.train.batch(
        [image_eval, label],
        batch_size=BATCH_SIZE,
        num_threads=16,
        capacity=min_queue_examples + 3*BATCH_SIZE)
    return image_batch, tf.reshape(label_batch, [BATCH_SIZE])



def cifar10_iterator(filenames, batch_size, cifar10_record_parser):
    ''' Create a dataset and return a tf.contrib.data.Iterator 
    which provides a way to extract elements from this dataset.
    -----
    Args:
        filenames: 
            a tensor of filenames.
        batch_size: 
            batch size.
    Returns:
        iterator: 
            an Iterator providing a way to extract elements from the created dataset.
        output_types: 
            the output types of the created dataset.
        output_shapes: 
            the output shapes of the created dataset.
    '''
    
    # TODO3
    # tips: use dataset.map with cifar10_record_parser(record)
    #       output_types = dataset.output_types
    #       output_shapes = dataset.output_shapes
    
    file_queue = tf.train.string_input_producer(filenames)
    cifar10_record = read_cifar10(file_queue)
#     print(cifar10_record.image)
    dataset = tf.data.Dataset.from_tensors((cifar10_record.image,cifar10_record.label))
#     print(dataset)
    dataset = dataset.map(cifar10_record_parser)
#     print(dataset)
    
#     dataset = dataset.batch(batch_size)
#     print(dataset)
    
    iterator = dataset.make_initializable_iterator()
#     iterator = dataset.make_one_shot_iterator()
    
    output_types = dataset.output_types
    output_shapes = dataset.output_shapes
    return iterator,output_types,output_shapes
iterator_train, types, shapes = cifar10_iterator(filenames_train, BATCH_SIZE, cifar10_record_distort_parser)
# iterator_test, _, _ = cifar10_iterator(filenames_test, BATCH_SIZE, cifar10_record_crop_parser)
print('done')

Instructions for updating:
To construct input pipelines, use the `tf.data` module.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.
done


In [30]:
tf.reset_default_graph()

#define training/testing data files
training_files = [os.path.join(DATA_DIRECTORY, 'data_batch_%d.bin' % i) for i in range(1, 6)]
testing_files = [os.path.join(DATA_DIRECTORY, 'test_batch.bin')]
filenames_train = tf.constant(training_files)
filenames_test = tf.constant(testing_files)
# Training data iterator
iterator_train, types, shapes = cifar10_iterator(filenames_train, BATCH_SIZE, cifar10_record_distort_parser)
train_pairs= iterator_train.get_next()

# # # Testing data iterator
iterator_test, _, _ = cifar10_iterator(filenames_test, BATCH_SIZE, cifar10_record_crop_parser)


# use to handle training and testing
# handle = tf.placeholder(tf.string, shape=[])
# iterator = tf.data.Iterator.from_string_handle(handle, types, shapes)
# labels_images_pairs = iterator.get_next()
print('done')

done


In [33]:
tf.reset_default_graph()
# CNN model
model = CNN_Model(model_hps_cifar)
# Here we use CPU to handle the input because we want GPU to only focus on training.
with tf.device('/cpu:0'):
    data_train = distort_input(training_files, BATCH_SIZE)
print(data_train)
print(labels_images_pairs)
run_training(model, data_train, 1)

(<tf.Tensor 'shuffle_batch:0' shape=(128, 24, 24, 3) dtype=float32>, <tf.Tensor 'Reshape_1:0' shape=(128,) dtype=int32>)
(<tf.Tensor 'IteratorGetNext:0' shape=(128, 24, 24, 3) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(128,) dtype=int32>)
INFO:tensorflow:Restoring parameters from ./model/model.ckpt-3900
epoch: 0
.INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.CancelledError'>, Enqueue operation was cancelled
	 [[{{node shuffle_batch/random_shuffle_queue_enqueue}} = QueueEnqueueV2[Tcomponents=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](shuffle_batch/random_shuffle_queue, per_image_standardization, Cast)]]


KeyboardInterrupt: 

In [None]:
run_test(model, data_test)

In [31]:
%%time
# TODO4:
# 1. train the CNN model 10 epochs
# 2. show the loss per epoch
# 3. get the accuracy of this 10-epoch model
# 4. measure the time using '%%time' instruction
# tips:
# use placeholder handle to determine if training or testing. 
# tf.reset_default_graph()
# CNN model

# Here we use CPU to handle the input because we want GPU to only focus on training.
model = CNN_Model(model_hps_cifar)

# with tf.Session() as sess:
#     sess.run(labels_images_pairs,feed_dict = {handle:iterator_train})
        

#     sess.run(iterator_train.initializer) # this stuck

#     images, labels = sess.run(labels_images_pairs)

saver = tf.train.Saver()
num_epoch = 1

with tf.Session() as sess:
    
    ckpt = tf.train.get_checkpoint_state(model.ckpt_dir)
    if (ckpt and ckpt.model_checkpoint_path):
        saver.restore(sess, ckpt.model_checkpoint_path)
        # assume the name of checkpoint is like '.../model.ckpt-1000'
        gs = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        sess.run(tf.assign(model.global_step, gs))
    else:
      # no checkpoint found
        print('no ckpt, init global variable...')
        sess.run(tf.global_variables_initializer())
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    model.loss_each_epoch = []

    num_batch_per_epoch = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN//model.batch_size
    sess.run(iterator_train.initializer)
    
    #start training
    
    for i in range(num_epoch):
        _loss = []
        print('epoch:',i)
        for _ in range(num_batch_per_epoch):
            print('.',end='')
            sess.run(train_pairs)
            
            l, _ = sess.run([model.total_loss, model.train_op], feed_dict = {model.images:images, model.labels:labels})
            _loss.append(l)
        loss_this_epoch = np.sum(_loss)
        gs = model.global_step.eval()
        print('loss of epoch %d: %f' % (gs / num_batch_per_epoch, loss_this_epoch))
        model.loss_each_epoch.append(loss_this_epoch)
        saver.save(sess, model.ckpt_dir + 'model.ckpt', global_step=gs)
    coord.request_stop()
    coord.join(threads)
print('Done training %d epochs' %num_epoch)

no ckpt, init global variable...
epoch: 0
.INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.CancelledError'>, Enqueue operation was cancelled
	 [[{{node input_producer_1/input_producer_1_EnqueueMany}} = QueueEnqueueManyV2[Tcomponents=[DT_STRING], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](input_producer_1, Const_1, ^input_producer_1/Assert/Assert)]]


OutOfRangeError: End of sequence
	 [[{{node IteratorGetNext}} = IteratorGetNext[output_shapes=[[128,24,24,3], [128]], output_types=[DT_FLOAT, DT_INT32], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorV2)]]

Caused by op 'IteratorGetNext', defined at:
  File "C:\Users\a1989\Anaconda3\lib\runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "C:\Users\a1989\Anaconda3\lib\runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "C:\Users\a1989\Anaconda3\lib\site-packages\traitlets\config\application.py", line 658, in launch_instance
    app.start()
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel\kernelapp.py", line 486, in start
    self.io_loop.start()
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 127, in start
    self.asyncio_loop.run_forever()
  File "C:\Users\a1989\Anaconda3\lib\asyncio\base_events.py", line 422, in run_forever
    self._run_once()
  File "C:\Users\a1989\Anaconda3\lib\asyncio\base_events.py", line 1432, in _run_once
    handle._run()
  File "C:\Users\a1989\Anaconda3\lib\asyncio\events.py", line 145, in _run
    self._callback(*self._args)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tornado\platform\asyncio.py", line 117, in _handle_events
    handler_func(fileobj, events)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "C:\Users\a1989\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\zmq\eventloop\zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tornado\stack_context.py", line 276, in null_wrapper
    return fn(*args, **kwargs)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel\kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel\ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\ipykernel\zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2903, in run_ast_nodes
    if self.run_code(code, result):
  File "C:\Users\a1989\Anaconda3\lib\site-packages\IPython\core\interactiveshell.py", line 2963, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-d6bcb457897e>", line 10, in <module>
    train_pairs= iterator_train.get_next()
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tensorflow\python\data\ops\iterator_ops.py", line 420, in get_next
    name=name)), self._output_types,
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tensorflow\python\ops\gen_dataset_ops.py", line 2068, in iterator_get_next
    output_shapes=output_shapes, name=name)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tensorflow\python\framework\op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py", line 488, in new_func
    return func(*args, **kwargs)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 3272, in create_op
    op_def=op_def)
  File "C:\Users\a1989\Anaconda3\lib\site-packages\tensorflow\python\framework\ops.py", line 1768, in __init__
    self._traceback = tf_stack.extract_stack()

OutOfRangeError (see above for traceback): End of sequence
	 [[{{node IteratorGetNext}} = IteratorGetNext[output_shapes=[[128,24,24,3], [128]], output_types=[DT_FLOAT, DT_INT32], _device="/job:localhost/replica:0/task:0/device:CPU:0"](IteratorV2)]]


# =================================================

In [74]:
def cifar10_record_distort_parser(record):

    record_bytes = LABEL_BYTES + IMAGE_BYTES
    record = tf.decode_raw(record, tf.uint8)
    label  = tf.cast(record[0], tf.int32)
    
    image = tf.reshape(record[1:record_bytes]
                       , [IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
    
    reshaped_image = tf.cast(tf.transpose(image, [1, 2, 0]), tf.float32)
    distorted_image = tf.random_crop(reshaped_image
                                     , [IMAGE_SIZE_CROPPED, IMAGE_SIZE_CROPPED, 3])
    distorted_image = tf.image.random_flip_left_right(distorted_image)
    distorted_image = tf.image.random_brightness(distorted_image, max_delta=63)
    distorted_image = tf.image.per_image_standardization(distorted_image)
    
    return label, distorted_image
    


def cifar10_record_crop_parser(record):

    record_bytes = LABEL_BYTES + IMAGE_BYTES
    record = tf.decode_raw(record, tf.uint8)
    label  = tf.cast(record[0], tf.int32)
    
    image = tf.reshape(record[1:record_bytes]
                       , [IMAGE_DEPTH, IMAGE_HEIGHT, IMAGE_WIDTH])
    
    reshaped_image = tf.cast(tf.transpose(image, [1, 2, 0]), tf.float32)
    cropped_image = tf.random_crop(reshaped_image
                                     , [IMAGE_SIZE_CROPPED, IMAGE_SIZE_CROPPED, 3])
    cropped_image = tf.image.per_image_standardization(cropped_image)
    
    return label, cropped_image


def cifar10_iterator(filenames, batch_size, cifar10_record_parser):

    record_bytes = LABEL_BYTES + IMAGE_BYTES
    dataset = tf.data.FixedLengthRecordDataset(filenames, record_bytes)
    dataset = dataset.map(cifar10_record_parser)
    dataset = dataset.batch(batch_size)
    dataset = dataset.repeat(10)
    
    iterator = dataset.make_initializable_iterator()
    

    return iterator, dataset.output_types, dataset.output_shapes


tf.reset_default_graph()

training_files = [os.path.join(DATA_DIRECTORY, 'data_batch_%d.bin' % i) for i in range(1, 6)]
testing_files = [os.path.join(DATA_DIRECTORY, 'test_batch.bin')]

filenames_train = tf.constant(training_files)
filenames_test = tf.constant(testing_files)

iterator_train, types, shapes = cifar10_iterator(filenames_train, BATCH_SIZE, cifar10_record_distort_parser)
iterator_test, _, _ = cifar10_iterator(filenames_test, BATCH_SIZE, cifar10_record_crop_parser)

next_batch = iterator_train.get_next()

# use to handle training and testing
handle = tf.placeholder(tf.string, shape=[])
iterator = tf.data.Iterator.from_string_handle(handle, types, shapes)
labels_images_pairs = iterator.get_next()


# CNN model
model = CNN_Model(model_hps_cifar)

with tf.device('/cpu:0'):
    labels, images = labels_images_pairs
    labels = tf.reshape(labels, [BATCH_SIZE])
    images = tf.reshape(images, [BATCH_SIZE, IMAGE_SIZE_CROPPED, IMAGE_SIZE_CROPPED, IMAGE_DEPTH])

# with tf.variable_scope('model'):
#     logits = model.inference(images)


# train
global_step = tf.contrib.framework.get_or_create_global_step()
# total_loss = model.loss(logits, labels)
# train_op = model.train(total_loss, global_step)
# test
# top_k_op = tf.nn.in_top_k(logits, labels, 1)



from datetime import datetime
from tqdm import tqdm

NUM_EPOCH = 1
NUM_BATCH_PER_EPOCH = NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN // BATCH_SIZE
ckpt_dir = './model/'

# config = tf.ConfigProto(allow_soft_placement=True, gpu_options=tf.GPUOptions(allow_growth=True))

# train
saver = tf.train.Saver()
# with tf.Session(config=config) as sess:

with tf.Session() as sess:
    
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    
    if (ckpt and ckpt.model_checkpoint_path):
        saver.restore(sess, ckpt.model_checkpoint_path)
        # assume the name of checkpoint is like '.../model.ckpt-1000'
        gs = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        sess.run(tf.assign(global_step, gs))
    else:
        # no checkpoint found
        init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

        sess.run(init_op)
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    loss = []
    training_handle = sess.run(iterator_train.string_handle())
    
    print("{}: Start training.".format(datetime.now()))
        
    for i in range(NUM_EPOCH):
        _loss = []
        sess.run(iterator_train.initializer)

        for j in  tqdm(range(NUM_BATCH_PER_EPOCH)):
            tqdm.write(str(i))
            lbl, img = sess.run(next_batch)            
            l, _ = sess.run([model.total_loss, model.train_op], feed_dict={model.images: img, model.labels: lbl})

            _loss.append(l)
        loss_this_epoch = np.sum(_loss)
        gs = global_step.eval()
        print('{}: Loss of epoch {}: {}'.format(datetime.now(), gs / NUM_BATCH_PER_EPOCH, loss_this_epoch))
        loss.append(loss_this_epoch)
        saver.save(sess, ckpt_dir + 'model.ckpt', global_step=gs)
    coord.request_stop()
    coord.join(threads)
  
print("{}: Done training.".format(datetime.now()))



2018-11-02 23:16:47.751789: Start training.



  0%|                                             | 0/390 [00:00<?, ?it/s]
  0%|                                     | 1/390 [00:01<07:26,  1.15s/it]
  1%|▏                                    | 2/390 [00:02<07:01,  1.09s/it]
  1%|▎                                    | 3/390 [00:03<06:45,  1.05s/it]
  1%|▍                                    | 4/390 [00:03<06:32,  1.02s/it]
  1%|▍                                    | 5/390 [00:04<06:22,  1.01it/s]
  2%|▌                                    | 6/390 [00:05<06:21,  1.01it/s]
  2%|▋                                    | 7/390 [00:06<06:21,  1.00it/s]


KeyboardInterrupt: 

In [None]:
%%time

next_test = iterator_test.get_next()
variables_to_restore = model.ema.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
with tf.Session() as sess:
    # Restore variables from disk.
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)
        num_iter = NUM_EXAMPLES_PER_EPOCH_FOR_EVAL // BATCH_SIZE
        total_sample_count = num_iter * BATCH_SIZE
        true_count = 0
        sess.run(iterator_test.initializer)
        for _ in tqdm(range(num_iter)):
            lbl, img = sess.run(next_test)
            predictions = sess.run(top_k_op, feed_dict={images: img, labels: lbl})
            true_count += np.sum(predictions)
        print('{}: Accurarcy: {}/{} = {}'.format(datetime.now(), true_count, total_sample_count,
                                     true_count / total_sample_count))
        coord.request_stop()
        coord.join(threads)
    else:
        print("{}: No model existed.".format(datetime.now()))