In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
%tensorflow_version 1.x
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from absl import logging

import argparse
import collections
from datetime import datetime
import hashlib
import os
import random
import re
import sys

import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow.contrib import quantize as contrib_quantize

FLAGS = None

MAX_NUM_IMAGES_PER_CLASS = 2 ** 27 - 1  # ~134M

# A module is understood as instrumented for quantization with TF-Lite
# if it contains any of these ops.
FAKE_QUANT_OPS = ('FakeQuantWithMinMaxVars',
                  'FakeQuantWithMinMaxVarsPerChannel')

In [0]:
def create_image_lists(image_dir, testing_percentage, validation_percentage):
  with tf.device('/device:GPU:0'):
    if not tf.gfile.Exists(image_dir):
      logging.error("Image directory '" + image_dir + "' not found.")
      return None
    result = collections.OrderedDict()
    sub_dirs = sorted(x[0] for x in tf.gfile.Walk(image_dir))
    # The root directory comes first, so skip it.
    is_root_dir = True
    for sub_dir in sub_dirs:
      if is_root_dir:
        is_root_dir = False
        continue
      extensions = sorted(set(os.path.normcase(ext)  # Smash case on Windows.
                              for ext in ['JPEG', 'JPG', 'jpeg', 'jpg', 'png']))
      file_list = []
      dir_name = os.path.basename(
          # tf.gfile.Walk() returns sub-directory with trailing '/' when it is in
          # Google Cloud Storage, which confuses os.path.basename().
          sub_dir[:-1] if sub_dir.endswith('/') else sub_dir)

      if dir_name == image_dir:
        continue
      logging.info("Looking for images in '%s'",  dir_name)
      for extension in extensions:
        file_glob = os.path.join(image_dir, dir_name, '*.' + extension)
        file_list.extend(tf.gfile.Glob(file_glob))
      if not file_list:
        logging.warning('No files found')
        continue
      if len(file_list) < 20:
        logging.warning(
            'WARNING: Folder has less than 20 images, which may cause issues.')
      elif len(file_list) > MAX_NUM_IMAGES_PER_CLASS:
        logging.warning(
            'WARNING: Folder %s has more than %s images. Some images will '
            'never be selected.', dir_name, MAX_NUM_IMAGES_PER_CLASS)
      label_name = re.sub(r'[^a-z0-9]+', ' ', dir_name.lower())
      training_images = []
      testing_images = []
      validation_images = []
      for file_name in file_list:
        base_name = os.path.basename(file_name)
        hash_name = re.sub(r'_nohash_.*$', '', file_name)
        hash_name_hashed = hashlib.sha1(tf.compat.as_bytes(hash_name)).hexdigest()
        percentage_hash = ((int(hash_name_hashed, 16) %
                            (MAX_NUM_IMAGES_PER_CLASS + 1)) *
                          (100.0 / MAX_NUM_IMAGES_PER_CLASS))
        if len(validation_images) == 0:
          validation_images.append(base_name)
        elif percentage_hash < validation_percentage:
          testing_images.append(base_name)
        else:
          training_images.append(base_name)
      result[label_name] = {
          'dir': dir_name,
          'training': training_images,
          'testing': testing_images,
          'validation': validation_images,
      }
  return result

def get_image_path(image_lists, label_name, index, image_dir, category):
  if label_name not in image_lists:
    logging.fatal('Label does not exist %s.', label_name)
  label_lists = image_lists[label_name]
  if category not in label_lists:
    logging.fatal('Category does not exist %s.', category)
  category_list = label_lists[category]
  if not category_list:
    logging.fatal('Label %s has no images in the category %s.',
                  label_name, category)
  mod_index = index % len(category_list)
  base_name = category_list[mod_index]
  sub_dir = label_lists['dir']
  full_path = os.path.join(image_dir, sub_dir, base_name)
  return full_path

def get_bottleneck_path(image_lists, label_name, index, bottleneck_dir,
                        category, module_name):
  module_name = (module_name.replace('://', '~')  # URL scheme.
                 .replace('/', '~')  # URL and Unix paths.
                 .replace(':', '~').replace('\\', '~'))  # Windows paths.
  return get_image_path(image_lists, label_name, index, bottleneck_dir,
                        category) + '_' + module_name + '.txt'

def create_module_graph(module_spec):
  with tf.device('/device:GPU:0'):
    height, width = hub.get_expected_image_size(module_spec)
    with tf.Graph().as_default() as graph:
      resized_input_tensor = tf.placeholder(tf.float32, [None, height, width, 3])
      m = hub.Module(module_spec)
      bottleneck_tensor = m(resized_input_tensor)
      wants_quantization = any(node.op in FAKE_QUANT_OPS
                              for node in graph.as_graph_def().node)
    return graph, bottleneck_tensor, resized_input_tensor, wants_quantization

def run_bottleneck_on_image(sess, image_data, image_data_tensor,
                            decoded_image_tensor, resized_input_tensor,
                            bottleneck_tensor):
  # First decode the JPEG image, resize it, and rescale the pixel values.
  resized_input_values = sess.run(decoded_image_tensor,
                                  {image_data_tensor: image_data})
  # Then run it through the recognition network.
  bottleneck_values = sess.run(bottleneck_tensor,
                               {resized_input_tensor: resized_input_values})
  bottleneck_values = np.squeeze(bottleneck_values)
  return bottleneck_values

def ensure_dir_exists(dir_name):
  if not os.path.exists(dir_name):
    os.makedirs(dir_name)


def create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
                           image_dir, category, sess, jpeg_data_tensor,
                           decoded_image_tensor, resized_input_tensor,
                           bottleneck_tensor):
  with tf.device('/device:GPU:0'):
    """Create a single bottleneck file."""
    logging.debug('Creating bottleneck at %s', bottleneck_path)
    image_path = get_image_path(image_lists, label_name, index,
                                image_dir, category)
    if not tf.gfile.Exists(image_path):
      logging.fatal('File does not exist %s', image_path)
    image_data = tf.gfile.GFile(image_path, 'rb').read()
    try:
      bottleneck_values = run_bottleneck_on_image(
          sess, image_data, jpeg_data_tensor, decoded_image_tensor,
          resized_input_tensor, bottleneck_tensor)
    except Exception as e:
      raise RuntimeError('Error during processing file %s (%s)' % (image_path,
                                                                  str(e)))
    bottleneck_string = ','.join(str(x) for x in bottleneck_values)
    with tf.gfile.GFile(bottleneck_path, 'w') as bottleneck_file:
      bottleneck_file.write(bottleneck_string)

def get_or_create_bottleneck(sess, image_lists, label_name, index, image_dir,
                             category, bottleneck_dir, jpeg_data_tensor,
                             decoded_image_tensor, resized_input_tensor,
                             bottleneck_tensor, module_name):
  with tf.device('/device:GPU:0'):
    label_lists = image_lists[label_name]
    sub_dir = label_lists['dir']
    sub_dir_path = os.path.join(bottleneck_dir, sub_dir)
    ensure_dir_exists(sub_dir_path)
    bottleneck_path = get_bottleneck_path(image_lists, label_name, index,
                                          bottleneck_dir, category, module_name)
    if not os.path.exists(bottleneck_path):
      create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
                            image_dir, category, sess, jpeg_data_tensor,
                            decoded_image_tensor, resized_input_tensor,
                            bottleneck_tensor)
    with tf.gfile.GFile(bottleneck_path, 'r') as bottleneck_file:
      bottleneck_string = bottleneck_file.read()
    did_hit_error = False
    try:
      bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
    except ValueError:
      logging.warning('Invalid float found, recreating bottleneck')
      did_hit_error = True
    if did_hit_error:
      create_bottleneck_file(bottleneck_path, image_lists, label_name, index,
                            image_dir, category, sess, jpeg_data_tensor,
                            decoded_image_tensor, resized_input_tensor,
                            bottleneck_tensor)
      with tf.gfile.GFile(bottleneck_path, 'r') as bottleneck_file:
        bottleneck_string = bottleneck_file.read()
      # Allow exceptions to propagate here, since they shouldn't happen after a
      # fresh creation
      bottleneck_values = [float(x) for x in bottleneck_string.split(',')]
    return bottleneck_values

def cache_bottlenecks(sess, image_lists, image_dir, bottleneck_dir,
                      jpeg_data_tensor, decoded_image_tensor,
                      resized_input_tensor, bottleneck_tensor, module_name):
  how_many_bottlenecks = 0
  ensure_dir_exists(bottleneck_dir)
  for label_name, label_lists in image_lists.items():
    for category in ['training', 'testing', 'validation']:
      category_list = label_lists[category]
      for index, unused_base_name in enumerate(category_list):
        get_or_create_bottleneck(
            sess, image_lists, label_name, index, image_dir, category,
            bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
            resized_input_tensor, bottleneck_tensor, module_name)
        how_many_bottlenecks += 1
        if how_many_bottlenecks % 100 == 0:
          logging.info('%s bottleneck files created.', how_many_bottlenecks)


def get_random_cached_bottlenecks(sess, image_lists, how_many, category,
                                  bottleneck_dir, image_dir, jpeg_data_tensor,
                                  decoded_image_tensor, resized_input_tensor,
                                  bottleneck_tensor, module_name):
  class_count = len(image_lists.keys())
  bottlenecks = []
  ground_truths = []
  filenames = []
  if how_many >= 0:
    # Retrieve a random sample of bottlenecks.
    for unused_i in range(how_many):
      label_index = random.randrange(class_count)
      label_name = list(image_lists.keys())[label_index]
      image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1)
      image_name = get_image_path(image_lists, label_name, image_index,
                                  image_dir, category)
      bottleneck = get_or_create_bottleneck(
          sess, image_lists, label_name, image_index, image_dir, category,
          bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
          resized_input_tensor, bottleneck_tensor, module_name)
      bottlenecks.append(bottleneck)
      ground_truths.append(label_index)
      filenames.append(image_name)
  else:
    # Retrieve all bottlenecks.
    for label_index, label_name in enumerate(image_lists.keys()):
      for image_index, image_name in enumerate(
          image_lists[label_name][category]):
        image_name = get_image_path(image_lists, label_name, image_index,
                                    image_dir, category)
        bottleneck = get_or_create_bottleneck(
            sess, image_lists, label_name, image_index, image_dir, category,
            bottleneck_dir, jpeg_data_tensor, decoded_image_tensor,
            resized_input_tensor, bottleneck_tensor, module_name)
        bottlenecks.append(bottleneck)
        ground_truths.append(label_index)
        filenames.append(image_name)
  return bottlenecks, ground_truths, filenames

def get_random_distorted_bottlenecks(
    sess, image_lists, how_many, category, image_dir, input_jpeg_tensor,
    distorted_image, resized_input_tensor, bottleneck_tensor):
  with tf.device('/device:GPU:0'):
    class_count = len(image_lists.keys())
    bottlenecks = []
    ground_truths = []
    for unused_i in range(how_many):
      label_index = random.randrange(class_count)
      label_name = list(image_lists.keys())[label_index]
      image_index = random.randrange(MAX_NUM_IMAGES_PER_CLASS + 1)
      image_path = get_image_path(image_lists, label_name, image_index, image_dir,
                                  category)
      if not tf.gfile.Exists(image_path):
        logging.fatal('File does not exist %s', image_path)
      jpeg_data = tf.gfile.GFile(image_path, 'rb').read()
      # Note that we materialize the distorted_image_data as a numpy array before
      # sending running inference on the image. This involves 2 memory copies and
      # might be optimized in other implementations.
      distorted_image_data = sess.run(distorted_image,
                                      {input_jpeg_tensor: jpeg_data})
      bottleneck_values = sess.run(bottleneck_tensor,
                                  {resized_input_tensor: distorted_image_data})
      bottleneck_values = np.squeeze(bottleneck_values)
      bottlenecks.append(bottleneck_values)
      ground_truths.append(label_index)
    return bottlenecks, ground_truths

def should_distort_images(flip_left_right, random_crop, random_scale,
                          random_brightness):
  return (flip_left_right or (random_crop != 0) or (random_scale != 0) or
          (random_brightness != 0))


def add_input_distortions(flip_left_right, random_crop, random_scale,
                          random_brightness, module_spec):
  with tf.device('/device:GPU:0'):
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DistortJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                          tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    margin_scale = 1.0 + (random_crop / 100.0)
    resize_scale = 1.0 + (random_scale / 100.0)
    margin_scale_value = tf.constant(margin_scale)
    resize_scale_value = tf.random_uniform(shape=[],
                                          minval=1.0,
                                          maxval=resize_scale)
    scale_value = tf.multiply(margin_scale_value, resize_scale_value)
    precrop_width = tf.multiply(scale_value, input_width)
    precrop_height = tf.multiply(scale_value, input_height)
    precrop_shape = tf.stack([precrop_height, precrop_width])
    precrop_shape_as_int = tf.cast(precrop_shape, dtype=tf.int32)
    precropped_image = tf.image.resize_bilinear(decoded_image_4d,
                                                precrop_shape_as_int)
    precropped_image_3d = tf.squeeze(precropped_image, axis=[0])
    cropped_image = tf.random_crop(precropped_image_3d,
                                  [input_height, input_width, input_depth])
    if flip_left_right:
      flipped_image = tf.image.random_flip_left_right(cropped_image)
    else:
      flipped_image = cropped_image
    brightness_min = 1.0 - (random_brightness / 100.0)
    brightness_max = 1.0 + (random_brightness / 100.0)
    brightness_value = tf.random_uniform(shape=[],
                                        minval=brightness_min,
                                        maxval=brightness_max)
    brightened_image = tf.multiply(flipped_image, brightness_value)
    distort_result = tf.expand_dims(brightened_image, 0, name='DistortResult')
    return jpeg_data, distort_result  

def variable_summaries(var):
  with tf.device('/device:GPU:0'):
    """Attach a lot of summaries to a Tensor (for TensorBoard visualization)."""
    with tf.name_scope('summaries'):
      mean = tf.reduce_mean(var)
      tf.summary.scalar('mean', mean)
      with tf.name_scope('stddev'):
        stddev = tf.sqrt(tf.reduce_mean(tf.square(var - mean)))
      tf.summary.scalar('stddev', stddev)
      tf.summary.scalar('max', tf.reduce_max(var))
      tf.summary.scalar('min', tf.reduce_min(var))
      tf.summary.histogram('histogram', var)


def add_final_retrain_ops(class_count, final_tensor_name, bottleneck_tensor,
                          quantize_layer, is_training):
  with tf.device('/device:GPU:0'):
    batch_size, bottleneck_tensor_size = bottleneck_tensor.get_shape().as_list()
    assert batch_size is None, 'We want to work with arbitrary batch size.'
    with tf.name_scope('input'):
      bottleneck_input = tf.placeholder_with_default(
          bottleneck_tensor,
          shape=[batch_size, bottleneck_tensor_size],
          name='BottleneckInputPlaceholder')

      ground_truth_input = tf.placeholder(
          tf.int64, [batch_size], name='GroundTruthInput')

    # Organizing the following ops so they are easier to see in TensorBoard.
    layer_name = 'final_retrain_ops'
    with tf.name_scope(layer_name):
      with tf.name_scope('weights'):
        initial_value = tf.truncated_normal(
            [bottleneck_tensor_size, class_count], stddev=0.001)
        layer_weights = tf.Variable(initial_value, name='final_weights')
        variable_summaries(layer_weights)

      with tf.name_scope('biases'):
        layer_biases = tf.Variable(tf.zeros([class_count]), name='final_biases')
        variable_summaries(layer_biases)

      with tf.name_scope('Wx_plus_b'):
        logits = tf.matmul(bottleneck_input, layer_weights) + layer_biases
        tf.summary.histogram('pre_activations', logits)

    final_tensor = tf.nn.softmax(logits, name=final_tensor_name)

    # The tf.contrib.quantize functions rewrite the graph in place for
    # quantization. The imported model graph has already been rewritten, so upon
    # calling these rewrites, only the newly added final layer will be
    # transformed.
    if quantize_layer:
      if is_training:
        contrib_quantize.create_training_graph()
      else:
        contrib_quantize.create_eval_graph()
        
    tf.summary.histogram('activations', final_tensor)

    # If this is an eval graph, we don't need to add loss ops or an optimizer.
    if not is_training:
      return None, None, bottleneck_input, ground_truth_input, final_tensor

    with tf.name_scope('cross_entropy'):
      cross_entropy_mean = tf.losses.sparse_softmax_cross_entropy(
          labels=ground_truth_input, logits=logits)

    tf.summary.scalar('cross_entropy', cross_entropy_mean)

    with tf.name_scope('train'):
      optimizer = tf.train.GradientDescentOptimizer(FLAGS.learning_rate)
      train_step = optimizer.minimize(cross_entropy_mean)

    return (train_step, cross_entropy_mean, bottleneck_input, ground_truth_input,
            final_tensor)

def add_evaluation_step(result_tensor, ground_truth_tensor):
  with tf.device('/device:GPU:0'):
    with tf.name_scope('accuracy'):
      with tf.name_scope('correct_prediction'):
        prediction = tf.argmax(result_tensor, 1)
        correct_prediction = tf.equal(prediction, ground_truth_tensor)
      with tf.name_scope('accuracy'):
        evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    tf.summary.scalar('accuracy', evaluation_step)
    return evaluation_step, prediction

def run_final_eval(train_session, module_spec, class_count, image_lists,
                   jpeg_data_tensor, decoded_image_tensor,
                   resized_image_tensor, bottleneck_tensor):
  test_bottlenecks, test_ground_truth, test_filenames = (
      get_random_cached_bottlenecks(train_session, image_lists,
                                    FLAGS.test_batch_size,
                                    'testing', FLAGS.bottleneck_dir,
                                    FLAGS.image_dir, jpeg_data_tensor,
                                    decoded_image_tensor, resized_image_tensor,
                                    bottleneck_tensor, FLAGS.tfhub_module))

  (eval_session, _, bottleneck_input, ground_truth_input, evaluation_step,
   prediction) = build_eval_session(module_spec, class_count)
  try:
    test_accuracy, predictions = eval_session.run(
        [evaluation_step, prediction],
        feed_dict={
            bottleneck_input: test_bottlenecks,
            ground_truth_input: test_ground_truth
        })
  except:
    print('Final Test Failed, Moving on...')
    return
  logging.info('Final test accuracy = %.1f%% (N=%d)',
               test_accuracy * 100, len(test_bottlenecks))

  if FLAGS.print_misclassified_test_images:
    logging.info('=== MISCLASSIFIED TEST IMAGES ===')
    for i, test_filename in enumerate(test_filenames):
      if predictions[i] != test_ground_truth[i]:
        logging.info('%70s  %s', test_filename,
                     list(image_lists.keys())[predictions[i]])

def build_eval_session(module_spec, class_count):
  with tf.device('/device:GPU:0'):
    # If quantized, we need to create the correct eval graph for exporting.
    eval_graph, bottleneck_tensor, resized_input_tensor, wants_quantization = (
        create_module_graph(module_spec))

    eval_sess = tf.Session(graph=eval_graph, config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True))
    with eval_graph.as_default():
      # Add the new layer for exporting.
      (_, _, bottleneck_input,
      ground_truth_input, final_tensor) = add_final_retrain_ops(
          class_count, FLAGS.final_tensor_name, bottleneck_tensor,
          wants_quantization, is_training=False)

      # Now we need to restore the values from the training graph to the eval
      # graph.
      tf.train.Saver().restore(eval_sess, FLAGS.checkpoint_path)

      evaluation_step, prediction = add_evaluation_step(final_tensor,
                                                        ground_truth_input)

    return (eval_sess, resized_input_tensor, bottleneck_input, ground_truth_input,
            evaluation_step, prediction)

def save_graph_to_file(graph_file_name, module_spec, class_count):
  with tf.device('/device:GPU:0'):
    """Saves an graph to file, creating a valid quantized one if necessary."""
    sess, _, _, _, _, _ = build_eval_session(module_spec, class_count)
    graph = sess.graph

    output_graph_def = tf.graph_util.convert_variables_to_constants(
        sess, graph.as_graph_def(), [FLAGS.final_tensor_name])

    with tf.gfile.GFile(graph_file_name, 'wb') as f:
      f.write(output_graph_def.SerializeToString())


def prepare_file_system():
  with tf.device('/device:GPU:0'):
    # Set up the directory we'll write summaries to for TensorBoard
    if tf.gfile.Exists(FLAGS.summaries_dir):
      tf.gfile.DeleteRecursively(FLAGS.summaries_dir)
    tf.gfile.MakeDirs(FLAGS.summaries_dir)
    if FLAGS.intermediate_store_frequency > 0:
      ensure_dir_exists(FLAGS.intermediate_output_graphs_dir)
    return


def add_jpeg_decoding(module_spec):
  with tf.device('/device:GPU:0'):
    input_height, input_width = hub.get_expected_image_size(module_spec)
    input_depth = hub.get_num_image_channels(module_spec)
    jpeg_data = tf.placeholder(tf.string, name='DecodeJPGInput')
    decoded_image = tf.image.decode_jpeg(jpeg_data, channels=input_depth)
    # Convert from full range of uint8 to range [0,1] of float32.
    decoded_image_as_float = tf.image.convert_image_dtype(decoded_image,
                                                          tf.float32)
    decoded_image_4d = tf.expand_dims(decoded_image_as_float, 0)
    resize_shape = tf.stack([input_height, input_width])
    resize_shape_as_int = tf.cast(resize_shape, dtype=tf.int32)
    resized_image = tf.image.resize_bilinear(decoded_image_4d,
                                            resize_shape_as_int)
    return jpeg_data, resized_image


def export_model(module_spec, class_count, saved_model_dir):
  # The SavedModel should hold the eval graph.
  sess, in_image, _, _, _, _ = build_eval_session(module_spec, class_count)
  with sess.graph.as_default() as graph:
    tf.saved_model.simple_save(
        sess,
        saved_model_dir,
        inputs={'image': in_image},
        outputs={'prediction': graph.get_tensor_by_name('final_result:0')},
        legacy_init_op=tf.group(tf.tables_initializer(), name='legacy_init_op')
    )


def logging_level_verbosity(logging_verbosity):
  name_to_level = {
      'FATAL': logging.FATAL,
      'ERROR': logging.ERROR,
      'WARN': logging.WARN,
      'INFO': logging.INFO,
      'DEBUG': logging.DEBUG
  }

  try:
    return name_to_level[logging_verbosity]
  except Exception as e:
    raise RuntimeError('Not supported logs verbosity (%s). Use one of %s.' %
                       (str(e), list(name_to_level)))

In [0]:
def main(_):
  with tf.device('/device:GPU:0'):
    # Needed to make sure the logging output is visible.
    # See https://github.com/tensorflow/tensorflow/issues/3047
    logging_verbosity = logging_level_verbosity(FLAGS.logging_verbosity)
    logging.set_verbosity(logging_verbosity)

    if not FLAGS.image_dir:
      logging.error('Must set flag --image_dir.')
      return -1

    # Prepare necessary directories that can be used during training
    prepare_file_system()

    # Look at the folder structure, and create lists of all the images.
    image_lists = create_image_lists(FLAGS.image_dir, FLAGS.testing_percentage,
                                    FLAGS.validation_percentage)
    class_count = len(image_lists.keys())
    if class_count == 0:
      logging.error('No valid folders of images found at %s', FLAGS.image_dir)
      return -1
    if class_count == 1:
      logging.error('Only one valid folder of images found at %s '
                    ' - multiple classes are needed for classification.',
                    FLAGS.image_dir)
      return -1

    # See if the command-line flags mean we're applying any distortions.
    do_distort_images = should_distort_images(
        FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
        FLAGS.random_brightness)

    # Set up the pre-trained graph.
    module_spec = hub.load_module_spec(FLAGS.tfhub_module)
    graph, bottleneck_tensor, resized_image_tensor, wants_quantization = (
        create_module_graph(module_spec))
    
      # Add the new layer that we'll be training.
    with graph.as_default():
      (train_step, cross_entropy, bottleneck_input,
      ground_truth_input, final_tensor) = add_final_retrain_ops(
          class_count, FLAGS.final_tensor_name, bottleneck_tensor,
          wants_quantization, is_training=True)

    with tf.Session(graph=graph, config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=True)) as sess:
      # Initialize all weights: for the module to their pretrained values,
      # and for the newly added retraining layer to random initial values.
      init = tf.global_variables_initializer()
      sess.run(init)

      # Set up the image decoding sub-graph.
      jpeg_data_tensor, decoded_image_tensor = add_jpeg_decoding(module_spec)

      if do_distort_images:
        # We will be applying distortions, so set up the operations we'll need.
        (distorted_jpeg_data_tensor,
        distorted_image_tensor) = add_input_distortions(
            FLAGS.flip_left_right, FLAGS.random_crop, FLAGS.random_scale,
            FLAGS.random_brightness, module_spec)
      else:
        # We'll make sure we've calculated the 'bottleneck' image summaries and
        # cached them on disk.
        cache_bottlenecks(sess, image_lists, FLAGS.image_dir,
                          FLAGS.bottleneck_dir, jpeg_data_tensor,
                          decoded_image_tensor, resized_image_tensor,
                          bottleneck_tensor, FLAGS.tfhub_module)

      # Create the operations we need to evaluate the accuracy of our new layer.
      evaluation_step, _ = add_evaluation_step(final_tensor, ground_truth_input)

      # Merge all the summaries and write them out to the summaries_dir
      merged = tf.summary.merge_all()
      train_writer = tf.summary.FileWriter(FLAGS.summaries_dir + '/train',
                                          sess.graph)
      
      validation_writer = tf.summary.FileWriter(
          FLAGS.summaries_dir + '/validation')

      # Create a train saver that is used to restore values into an eval graph
      # when exporting models.
      train_saver = tf.train.Saver()

      # Run the training for as many cycles as requested on the command line.
      for i in range(FLAGS.how_many_training_steps):
        # Get a batch of input bottleneck values, either calculated fresh every
        # time with distortions applied, or from the cache stored on disk.
        if do_distort_images:
          (train_bottlenecks,
          train_ground_truth) = get_random_distorted_bottlenecks(
              sess, image_lists, FLAGS.train_batch_size, 'training',
              FLAGS.image_dir, distorted_jpeg_data_tensor,
              distorted_image_tensor, resized_image_tensor, bottleneck_tensor)
        else:
          (train_bottlenecks,
          train_ground_truth, _) = get_random_cached_bottlenecks(
              sess, image_lists, FLAGS.train_batch_size, 'training',
              FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
              decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
              FLAGS.tfhub_module)
        # Feed the bottlenecks and ground truth into the graph, and run a training
        # step. Capture training summaries for TensorBoard with the `merged` op.
        train_summary, _ = sess.run(
            [merged, train_step],
            feed_dict={bottleneck_input: train_bottlenecks,
                      ground_truth_input: train_ground_truth})
        train_writer.add_summary(train_summary, i)

        # Every so often, print out how well the graph is training.
        is_last_step = (i + 1 == FLAGS.how_many_training_steps)
        if (i % FLAGS.eval_step_interval) == 0 or is_last_step:
          train_accuracy, cross_entropy_value = sess.run(
              [evaluation_step, cross_entropy],
              feed_dict={bottleneck_input: train_bottlenecks,
                                                ground_truth_input: train_ground_truth})
          logging.info('%s: Step %d: Train accuracy = %.1f%%',
                      datetime.now(), i, train_accuracy * 100)
          logging.info('%s: Step %d: Cross entropy = %f',
                      datetime.now(), i, cross_entropy_value)
          # TODO: Make this use an eval graph, to avoid quantization
          # moving averages being updated by the validation set, though in
          # practice this makes a negligable difference.
          validation_bottlenecks, validation_ground_truth, _ = (
              get_random_cached_bottlenecks(
                  sess, image_lists, FLAGS.validation_batch_size, 'validation',
                  FLAGS.bottleneck_dir, FLAGS.image_dir, jpeg_data_tensor,
                  decoded_image_tensor, resized_image_tensor, bottleneck_tensor,
                  FLAGS.tfhub_module))
          # Run a validation step and capture training summaries for TensorBoard
          # with the `merged` op.
          validation_summary, validation_accuracy = sess.run(
              [merged, evaluation_step],
              feed_dict={bottleneck_input: validation_bottlenecks,
                        ground_truth_input: validation_ground_truth})
          validation_writer.add_summary(validation_summary, i)
          logging.info('%s: Step %d: Validation accuracy = %.1f%% (N=%d)',
                      datetime.now(), i, validation_accuracy * 100,
                      len(validation_bottlenecks))

        # Store intermediate results
        intermediate_frequency = FLAGS.intermediate_store_frequency

        if (intermediate_frequency > 0 and (i % intermediate_frequency == 0)
            and i > 0):
          # If we want to do an intermediate save, save a checkpoint of the train
          # graph, to restore into the eval graph.
          train_saver.save(sess, FLAGS.checkpoint_path)
          intermediate_file_name = (FLAGS.intermediate_output_graphs_dir +
                                    'intermediate_' + str(i) + '.pb')
          logging.info('Save intermediate result to : %s', intermediate_file_name)
          save_graph_to_file(intermediate_file_name, module_spec,
                            class_count)
          
          
      # After training is complete, force one last save of the train checkpoint.
      train_saver.save(sess, FLAGS.checkpoint_path)

      # We've completed all our training, so run a final test evaluation on
      # some new images we haven't used before.
      run_final_eval(sess, module_spec, class_count, image_lists,
                    jpeg_data_tensor, decoded_image_tensor, resized_image_tensor,
                    bottleneck_tensor)

      # Write out the trained graph and labels with the weights stored as
      # constants.
      logging.info('Save final result to : %s', FLAGS.output_graph)
      if wants_quantization:
        logging.info('The model is instrumented for quantization with TF-Lite')
      save_graph_to_file(FLAGS.output_graph, module_spec, class_count)
      with tf.gfile.GFile(FLAGS.output_labels, 'w') as f:
        f.write('\n'.join(image_lists.keys()) + '\n')

      if FLAGS.saved_model_dir:
        export_model(module_spec, class_count, FLAGS.saved_model_dir)


In [15]:
if __name__ == '__main__':
  if not os.path.isdir("/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp"):
    os.mkdir("/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp")
  parser = argparse.ArgumentParser()
  parser.add_argument(
      '--image_dir',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/training_data/',
      help='Path to folders of labeled images.'
  )
  parser.add_argument(
      '--output_graph',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/output_graph.pb',
      help='Where to save the trained graph.'
  )
  parser.add_argument(
      '--intermediate_output_graphs_dir',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/intermediate_graph/',
      help='Where to save the intermediate graphs.'
  )
  parser.add_argument(
      '--intermediate_store_frequency',
      type=int,
      default=0,
      help="""\
         How many steps to store intermediate graph. If "0" then will not
         store.\
      """
  )
  parser.add_argument(
      '--output_labels',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/output_labels.txt',
      help='Where to save the trained graph\'s labels.'
  )
  parser.add_argument(
      '--summaries_dir',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/retrain_logs',
      help='Where to save summary logs for TensorBoard.'
  )
  parser.add_argument(
      '--how_many_training_steps',
      type=int,
      default=4000,
      help='How many training steps to run before ending.'
  )
  parser.add_argument(
      '--learning_rate',
      type=float,
      default=0.01,
      help='How large a learning rate to use when training.'
  )
  parser.add_argument(
      '--testing_percentage',
      type=int,
      default=10,
      help='What percentage of images to use as a test set.'
  )
  parser.add_argument(
      '--validation_percentage',
      type=int,
      default=10,
      help='What percentage of images to use as a validation set.'
  )
  parser.add_argument(
      '--eval_step_interval',
      type=int,
      default=10,
      help='How often to evaluate the training results.'
  )
  parser.add_argument(
      '--train_batch_size',
      type=int,
      default=100,
      help='How many images to train on at a time.'
  )
  parser.add_argument(
      '--test_batch_size',
      type=int,
      default=-1,
      help="""\
      How many images to test on. This test set is only used once, to evaluate
      the final accuracy of the model after training completes.
      A value of -1 causes the entire test set to be used, which leads to more
      stable results across runs.\
      """
  )
  parser.add_argument(
      '--validation_batch_size',
      type=int,
      default=100,
      help="""\
      How many images to use in an evaluation batch. This validation set is
      used much more often than the test set, and is an early indicator of how
      accurate the model is during training.
      A value of -1 causes the entire validation set to be used, which leads to
      more stable results across training iterations, but may be slower on large
      training sets.\
      """
  )
  parser.add_argument(
      '--print_misclassified_test_images',
      default=False,
      help="""\
      Whether to print out a list of all misclassified test images.\
      """,
      action='store_true'
  )
  parser.add_argument(
      '--bottleneck_dir',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/bottleneck',
      help='Path to cache bottleneck layer values as files.'
  )
  parser.add_argument(
      '--final_tensor_name',
      type=str,
      default='final_result',
      help="""\
      The name of the output classification layer in the retrained graph.\
      """
  )
  parser.add_argument(
      '--flip_left_right',
      default=False,
      help="""\
      Whether to randomly flip half of the training images horizontally.\
      """,
      action='store_true'
  )
  parser.add_argument(
      '--random_crop',
      type=int,
      default=0,
      help="""\
      A percentage determining how much of a margin to randomly crop off the
      training images.\
      """
  )
  parser.add_argument(
      '--random_scale',
      type=int,
      default=0,
      help="""\
      A percentage determining how much to randomly scale up the size of the
      training images by.\
      """
  )
  parser.add_argument(
      '--random_brightness',
      type=int,
      default=0,
      help="""\
      A percentage determining how much to randomly multiply the training image
      input pixels up or down by.\
      """
  )
  parser.add_argument(
      '--tfhub_module',
      type=str,
      default=(
          'https://tfhub.dev/google/imagenet/inception_v3/feature_vector/3'),
      help="""\
      Which TensorFlow Hub module to use. For more options,
      search https://tfhub.dev for image feature vector modules.\
      """)
  parser.add_argument(
      '--saved_model_dir',
      type=str,
      default='',
      help='Where to save the exported graph.')
  parser.add_argument(
      '--logging_verbosity',
      type=str,
      default='INFO',
      choices=['DEBUG', 'INFO', 'WARN', 'ERROR', 'FATAL'],
      help='How much logging output should be produced.')
  parser.add_argument(
      '--checkpoint_path',
      type=str,
      default='/content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/_retrain_checkpoint',
      help='Where to save checkpoint files.'
  )
  FLAGS, unparsed = parser.parse_known_args()
  with tf.device('/device:GPU:0'):
    tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)

I1227 00:32:45.384862 140210095409024 <ipython-input-13-b93e9db069e1>:24] Looking for images in 'Candy Canes'
I1227 00:32:48.451291 140210095409024 <ipython-input-13-b93e9db069e1>:24] Looking for images in 'Christmas Trees'
I1227 00:32:50.706753 140210095409024 <ipython-input-13-b93e9db069e1>:24] Looking for images in 'Ornaments'
I1227 00:32:52.993061 140210095409024 <ipython-input-13-b93e9db069e1>:24] Looking for images in 'Presents'
I1227 00:32:55.281810 140210095409024 <ipython-input-13-b93e9db069e1>:24] Looking for images in 'Santa Hats'
I1227 00:32:57.566283 140210095409024 <ipython-input-13-b93e9db069e1>:24] Looking for images in 'Stockings'


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I1227 00:33:03.790576 140210095409024 saver.py:1503] Saver not created because there are no variables in the graph to restore


Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0



I1227 00:34:29.046128 140210095409024 <ipython-input-13-b93e9db069e1>:189] 100 bottleneck files created.
I1227 00:35:20.337855 140210095409024 <ipython-input-13-b93e9db069e1>:189] 200 bottleneck files created.
I1227 00:36:09.382315 140210095409024 <ipython-input-13-b93e9db069e1>:189] 300 bottleneck files created.
I1227 00:36:55.923660 140210095409024 <ipython-input-13-b93e9db069e1>:189] 400 bottleneck files created.
I1227 00:37:45.665906 140210095409024 <ipython-input-13-b93e9db069e1>:189] 500 bottleneck files created.
I1227 00:38:32.095029 140210095409024 <ipython-input-13-b93e9db069e1>:189] 600 bottleneck files created.
I1227 00:39:19.366408 140210095409024 <ipython-input-13-b93e9db069e1>:189] 700 bottleneck files created.
I1227 00:40:04.035723 140210095409024 <ipython-input-13-b93e9db069e1>:189] 800 bottleneck files created.
I1227 00:40:50.161872 140210095409024 <ipython-input-13-b93e9db069e1>:189] 900 bottleneck files created.
I1227 00:41:42.804122 140210095409024 <ipython-input-13

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I1227 02:37:15.723229 140210095409024 saver.py:1503] Saver not created because there are no variables in the graph to restore


Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0

INFO:tensorflow:Restoring parameters from /content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/_retrain_checkpoint


I1227 02:37:16.618149 140210095409024 saver.py:1284] Restoring parameters from /content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/_retrain_checkpoint
I1227 02:37:20.884800 140210095409024 <ipython-input-13-b93e9db069e1>:416] Final test accuracy = 100.0% (N=1280)
I1227 02:37:20.970943 140210095409024 <ipython-input-14-861d96b77d96>:165] Save final result to : /content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/output_graph.pb


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I1227 02:37:24.889674 140210095409024 saver.py:1503] Saver not created because there are no variables in the graph to restore


Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device
/job:localhost/replica:0/task:0/device:XLA_GPU:0 -> device: XLA_GPU device
/job:localhost/replica:0/task:0/device:GPU:0 -> device: 0, name: Tesla P100-PCIE-16GB, pci bus id: 0000:00:04.0, compute capability: 6.0

INFO:tensorflow:Restoring parameters from /content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/_retrain_checkpoint


I1227 02:37:25.846151 140210095409024 saver.py:1284] Restoring parameters from /content/drive/My Drive/TensorFlow_model/tmp/retrain_tmp/_retrain_checkpoint


Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`


W1227 02:37:27.976955 140210095409024 deprecation.py:323] From <ipython-input-13-b93e9db069e1>:456: convert_variables_to_constants (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`


Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


W1227 02:37:27.981713 140210095409024 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/framework/graph_util_impl.py:277: extract_sub_graph (from tensorflow.python.framework.graph_util_impl) is deprecated and will be removed in a future version.
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`


INFO:tensorflow:Froze 378 variables.


I1227 02:37:29.823518 140210095409024 graph_util_impl.py:334] Froze 378 variables.


INFO:tensorflow:Converted 378 variables to const ops.


I1227 02:37:29.983168 140210095409024 graph_util_impl.py:394] Converted 378 variables to const ops.


SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
