In [1]:
# Running %env without any arguments
# lists all environment variables

# The line below sets the environment
# variable CUDA_VISIBLE_DEVICES
%env CUDA_VISIBLE_DEVICES = 0

import numpy as np
import pandas as pd
import io
import time
import bson                       # this is installed with the pymongo package
import matplotlib.pyplot as plt
from scipy.misc import imread, imsave
import tensorflow as tf
from tensorflow.python.platform import tf_logging
import os.path
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim.python.slim.nets import inception
from tensorflow.contrib.framework.python.ops.variables import get_or_create_global_step
import inception_preprocessing

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

env: CUDA_VISIBLE_DEVICES=0,1


In [2]:
DATASET_PATH = '/media/rs/0E06CD1706CD0127/Kapok/kaggle/'
PRETRAINED_MODEL_PATH = DATASET_PATH + 'models/inception_v3.ckpt'
LOG_PATH = DATASET_PATH + 'logs222/'
TRAIN_PATH = DATASET_PATH + 'Split/Train/'
VAL_PATH = DATASET_PATH + 'Split/Validation/'
TEST_PATH = DATASET_PATH + 'Test/'
CATEGORY_NAME_PATH = DATASET_PATH + 'category_names.csv'
BATCH_SIZE = 64
IMAGE_WIDTH = 180
IMAGE_HEIGHT = 180
NUM_CLASS = 5270
# validation examples num: 2319618
# train examples num: 10051678
# total step: 157057
TOTAL_EXAMPLES = 10051678
NUM_EPOCHES = 4
INPUT_THREADS = 6

#Learning rate information and configuration (Up to you to experiment)
initial_learning_rate = 0.000009#0.00001
learning_rate_decay_factor = 0.96
num_epochs_before_decay = 1
#Know the number steps to take before decaying the learning rate and batches per epoch
num_steps_per_epoch = TOTAL_EXAMPLES / BATCH_SIZE
decay_steps = int(num_epochs_before_decay * num_steps_per_epoch / 6)

In [3]:
# deprecated
def read_my_file_format(filename_queue):
    # image preprocess, i.e. crop etc.
    example_image, product_id, category_id = preprocess_for_inception(tf.image.decode_jpeg(filename_queue['img_raw'])), filename_queue['product_id'], filename_queue['category_id']
    return example_image, product_id, category_id

In [4]:
# deprecated
def create_examples(filename_regex, num_epochs):
    files = tf.train.match_filenames_once(filename_regex)
    filename_queue = tf.train.string_input_producer(files, num_epochs=num_epochs, shuffle=True) 
    opts = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
    reader = tf.TFRecordReader(options=opts)
    _, serialized_example = reader.read(filename_queue)
    input_features = tf.parse_single_example(
          serialized_example,
          features={
              'img_raw': tf.FixedLenFeature([], tf.string),
              'product_id': tf.FixedLenFeature([], tf.int64),
              'category_id': tf.FixedLenFeature([], tf.int64)
          })
    return input_features

In [5]:
# deprecated
def input_pipeline(filenames, batch_size, read_threads, num_epochs=None):
    filename_queue = create_examples(filenames, num_epochs)
    example_list = [read_my_file_format(filename_queue)
                  for _ in range(read_threads)]
    min_after_dequeue = 2000
    capacity = min_after_dequeue + 3 * batch_size
    example_batch, product_batch, category_batch = tf.train.shuffle_batch_join(
      example_list, batch_size=batch_size, capacity=capacity,
      min_after_dequeue=min_after_dequeue)
    return example_batch, product_batch, category_batch

In [6]:
class MiniDataSet(object):
    def __init__(self, file_path_pattern, category_level_csv, num_examples, num_classes, min_after_dequeue=1000, batch_size = BATCH_SIZE, num_epochs = NUM_EPOCHES, num_reader = INPUT_THREADS):
        super(MiniDataSet, self).__init__()
        self._num_examples = num_examples
        self._num_classes = num_classes
        self._file_path_pattern = file_path_pattern
        self._category_level_csv = category_level_csv
        self._num_reader = num_reader
        self._batch_size = batch_size
        self._num_epochs = num_epochs
        self._min_after_dequeue = min_after_dequeue
        
    def get_category_description_from_csv(self, level = 0):
        category_map = dict()
        csv = pd.read_csv(self._category_level_csv).values
        for row in csv:  
            category_id, levels = row[0], row[1:]
            category_map[category_id] = levels[level]
        return category_map

    def create_dataset(self):
        opts = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
        reader = lambda : tf.TFRecordReader(options=opts)
        keys_to_features = {
            'img_raw': tf.FixedLenFeature([], tf.string, default_value=''),
            'product_id': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64)),
            # notice that we don't have this feature in our TFRecord, so always default provided
            'format': tf.FixedLenFeature([], tf.string, default_value='jpg'),
            'category_id': tf.FixedLenFeature([], tf.int64, default_value=tf.zeros([], dtype=tf.int64))
        }

        items_to_handlers = {
            # automated decode image from features in FixedLenFeature
            'image': slim.tfexample_decoder.Image(image_key='img_raw', format_key='format'),
            'label': slim.tfexample_decoder.Tensor('category_id'),
        }

        decoder = slim.tfexample_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)

        labels_to_name_dict = self.get_category_description_from_csv()

        self._dataset = slim.dataset.Dataset(
            data_sources = self._file_path_pattern,
            decoder = decoder,
            reader = reader,
            # num_readers = 8,
            num_samples = self._num_examples,
            #num_classes = self._num_classes,
            #labels_to_name = labels_to_name_dict,
            items_to_descriptions = None)
        
        # notice that DatasetDataProvider can automate shuffle the examples by ParallelReader using its RandomShuffleQueue
        self._data_provider = slim.dataset_data_provider.DatasetDataProvider(
            self._dataset,
            num_readers = self._num_reader,
            shuffle = True, # default is True
            num_epochs = self._num_epochs,
            common_queue_capacity = self._min_after_dequeue + 3 * self._batch_size,
            common_queue_min = self._min_after_dequeue)
        
        return self._data_provider.get(['image', 'label'])
        

In [7]:
def preprocess_for_inception(input_image, is_training = True):
    # inception_v3.default_image_size = 299
    return inception_preprocessing.preprocess_image(input_image, 299, 299, is_training)

In [8]:
def cvt_csv2tfrecord():
    count = 0
    category_map = dict()
    csv = pd.read_csv(CATEGORY_NAME_PATH).values
    for row in csv:  
        category_id, _ = row[0], row[1:]
        category_map[category_id] = count
        count += 1
    return category_map

In [9]:
def one_hot_process(org_label, map_table, num_classes):
    return tf.one_hot(map_table.lookup(tf.as_string(org_label)), num_classes, axis=-1)

In [10]:
with tf.Graph().as_default() as graph:
    # define main train operation
    def train_step(input_examples, one_hot_labels):   
        with slim.arg_scope(inception.inception_v3_arg_scope()):
            # here logits is the pre-softmax activations
            logits, end_points = inception.inception_v3(
                input_examples,
                num_classes = NUM_CLASS,
                is_training = True)
        # we retrain for diferrent num classes
        # and don't define any Variables before get_variables_to_restore
        variables_to_restore = slim.get_variables_to_restore(exclude = ['InceptionV3/Logits', 'InceptionV3/AuxLogits'])
#         loss = slim.losses.softmax_cross_entropy(logits, one_hot_labels)
#         total_loss = slim.losses.get_total_loss()
        # Performs the equivalent to tf.nn.sparse_softmax_cross_entropy_with_logits but enhanced, e.x. label smothing
        loss = tf.losses.softmax_cross_entropy(onehot_labels = one_hot_labels, logits = logits)
        total_loss = tf.losses.get_total_loss()    # obtain the regularization losses as well

        # Create the global step for monitoring the learning_rate and training.
        # since supervisor will also create one global_step, so we create n advance in order to feed into exponential_decay
        global_step = get_or_create_global_step(graph = graph)

        #Define your exponentially decaying learning rate
        lr = tf.train.exponential_decay(
            learning_rate = initial_learning_rate,
            global_step = global_step,
            decay_steps = decay_steps,
            decay_rate = learning_rate_decay_factor,
            staircase = True)

        #Now we can define the optimizer that takes on the learning rate
        optimizer = tf.train.AdamOptimizer(learning_rate = lr)

        #Create the train_op.
        train_op = slim.learning.create_train_op(total_loss, optimizer)

        #State the metrics that you want to predict. We get a predictions that is not one_hot_encoded.
        predictions = tf.argmax(end_points['Predictions'], 1)
        probabilities = end_points['Predictions']
        accuracy, accuracy_update = tf.contrib.metrics.streaming_accuracy(predictions, tf.argmax(one_hot_labels, 1))
        metrics_op = tf.group(accuracy_update)


        #Now finally create all the summaries you need to monitor and group them into one summary op.
        tf.summary.scalar('losses/Total_Loss', total_loss)
        tf.summary.scalar('accuracy', accuracy)
        tf.summary.scalar('learning_rate', lr)
        my_summary_op = tf.summary.merge_all()

        return train_op, global_step, metrics_op, variables_to_restore, predictions, my_summary_op, lr, accuracy, total_loss

    mapping_strings = tf.constant( [ str(key) for key in cvt_csv2tfrecord().keys() ] )
    mapping_table = tf.contrib.lookup.index_table_from_tensor(mapping=mapping_strings, default_value=0)
    
    # acctually we don't need a placeholder anymore
    #X = tf.placeholder(tf.float32, [BATCH_SIZE, IMAGE_WIDTH, IMAGE_HEIGHT, 3], name='input_image')
    #Y = tf.placeholder(tf.int64, [BATCH_SIZE,], name='input_label')
    #ids_Y = mapping_table.lookup(tf.as_string(Y))
    # Perform one-hot-encoding of the labels
    #one_hot_labels = tf.one_hot(ids_Y, NUM_CLASS, axis=-1)

    dataset = MiniDataSet(TRAIN_PATH + "output_file*.tfrecords", CATEGORY_NAME_PATH, TOTAL_EXAMPLES, NUM_CLASS)
    org_image, org_label = dataset.create_dataset()
    image = preprocess_for_inception(org_image) # final image to train
    
    label = one_hot_process(org_label, mapping_table, NUM_CLASS) # final label for training
    # no need for shuffle, DatasetDataProvider do this for us
    batch_images, batch_labels = tf.train.batch([image, label], BATCH_SIZE,\
                                        num_threads = INPUT_THREADS,\
                                        capacity = 1000 + 3 * BATCH_SIZE,\
                                        allow_smaller_final_batch = True)
    
    
    with tf.device('/gpu:0'):
        train_op, global_step, metrics_op, variables_to_restore, pred_op, summary_op, lr, accuracy, total_loss = train_step(batch_images, batch_labels)

    # Create a saver that restores only the pre-trained variables.
    pre_train_saver = tf.train.Saver(variables_to_restore)
    # Define an init function that loads the pretrained checkpoint.
    # sess is the managed session passed by Supervisor
    def load_pretrain(sess):
        pre_train_saver.restore(sess, PRETRAINED_MODEL_PATH)

    # no need for specify local_variables_initializer and tables_initializer, Supervisor will do this via default local_init_op
    # init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer())
    init_op = tf.group(tf.global_variables_initializer())
    # Pass the init function to the supervisor.
    # - The init function is called _after_ the variables have been initialized by running the init_op.
    # - use default tf.Saver() for ordinary save and restore
    # - save checkpoint every 1.5 hours
    # - manage summary in current process by ourselves for memory saving
    # - no need to specify global_step, supervisor will find this automately
    # - initialize order: checkpoint -> local_init_op -> init_op -> init_func
    sv = tf.train.Supervisor(logdir=LOG_PATH, init_fn = load_pretrain, init_op = init_op, summary_op = None, save_model_secs=5400, checkpoint_basename='inception_v3_model.ckpt')
    
    final_loss = 0.
    final_accuracy = 0.
    training_state = True
    with sv.managed_session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)) as sess:
    #with sv.prepare_or_wait_for_session(config=tf.ConfigProto(log_device_placement=True, allow_soft_placement=True)) as sess:

        # Here sess was either initialized from the pre-trained-checkpoint or
        # recovered from a checkpoint saved in a previous run of this code.
        for step in range(int(num_steps_per_epoch * NUM_EPOCHES)):       
            if sv.should_stop():
                tf_logging.info('Supervisor emit finished!')
                tf_logging.info('Current Loss: %s', loss)
                tf_logging.info('Current Accuracy: %s', accuracy)
                tf_logging.info('Saving current model to disk(maybe invalid).')
                training_state = False
                break

            start_time = time.time()
            if step % 1000 == 0:
                with tf.device('/gpu:0'):
                    _, _, _, summ = sess.run([train_op, global_step, metrics_op, summary_op])#,\
#                                                                               feed_dict={
#                                                                                             X: batch_images,
#                                                                                             Y: batch_labels
#                                                                                         })   
                sv.summary_computed(sess, summ)
            else:
                with tf.device('/gpu:0'):
                    _, total_step, _, cur_loss, cur_acc, cur_lr = sess.run([train_op, global_step, metrics_op, total_loss, accuracy, lr])
                time_elapsed = time.time() - start_time
                if step % 10 == 0:
                    final_loss = cur_loss
                    final_accuracy = cur_acc
                    tf_logging.info('Current Speed: {:f}sec/batch'.format(time_elapsed))
                    tf_logging.info('Current Streaming Accuracy: {}'.format(cur_acc))
                    tf_logging.info('Current Loss: {}'.format(cur_loss))
                    tf_logging.info('Epoch %s/%s, Global Step: %s', int(step / num_steps_per_epoch + 1), NUM_EPOCHES, total_step)
                    tf_logging.info('Current Learning Rate: {}'.format(cur_lr))
        if training_state:
            #We log the final training loss and accuracy
            tf_logging.info('Final Loss: %s', final_loss)
            tf_logging.info('Final Accuracy: %s', final_accuracy)
            # Once all the training has been done, save the log files and checkpoint model
            tf_logging.info('Finished training! Model saved.')
        sv.saver.save(sess, sv.save_path, global_step = sv.global_step)
    

INFO:tensorflow:Restoring parameters from /media/rs/0E06CD1706CD0127/Kapok/kaggle/logs222/inception_v3_model.ckpt-0
INFO:tensorflow:Starting standard services.
INFO:tensorflow:Saving checkpoint to path /media/rs/0E06CD1706CD0127/Kapok/kaggle/logs222/inception_v3_model.ckpt
INFO:tensorflow:Starting queue runners.
INFO:tensorflow:global_step/sec: 0
INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.ResourceExhaustedError'>, OOM when allocating tensor with shape[128,17,17,192]
	 [[Node: InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](InceptionV3/InceptionV3/Mixed_6d/concat, InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/weights/read)]]
	 [[Node: total_loss/_7079 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replic

ResourceExhaustedError: OOM when allocating tensor with shape[128,17,17,192]
	 [[Node: InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](InceptionV3/InceptionV3/Mixed_6d/concat, InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/weights/read)]]
	 [[Node: total_loss/_7079 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_29249_total_loss", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]

Caused by op 'InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/convolution', defined at:
  File "/home/rs/.pyenv/versions/3.5.2/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/home/rs/.pyenv/versions/3.5.2/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel/kernelapp.py", line 477, in start
    ioloop.IOLoop.instance().start()
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/zmq/eventloop/ioloop.py", line 177, in start
    super(ZMQIOLoop, self).start()
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tornado/ioloop.py", line 888, in start
    handler_func(fd_obj, events)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 440, in _handle_events
    self._handle_recv()
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 472, in _handle_recv
    self._run_callback(callback, msg)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/zmq/eventloop/zmqstream.py", line 414, in _run_callback
    callback(*args, **kwargs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tornado/stack_context.py", line 277, in null_wrapper
    return fn(*args, **kwargs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 235, in dispatch_shell
    handler(stream, idents, msg)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel/ipkernel.py", line 196, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/ipykernel/zmqshell.py", line 533, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2698, in run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2802, in run_ast_nodes
    if self.run_code(code, result):
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/IPython/core/interactiveshell.py", line 2862, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-bcacf7fc7ba8>", line 72, in <module>
    train_op, global_step, metrics_op, variables_to_restore, pred_op, summary_op, lr, accuracy, total_loss = train_step(batch_images, batch_labels)
  File "<ipython-input-10-bcacf7fc7ba8>", line 9, in train_step
    is_training = True)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/contrib/slim/python/slim/nets/inception_v3.py", line 576, in inception_v3
    depth_multiplier=depth_multiplier)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/contrib/slim/python/slim/nets/inception_v3.py", line 372, in inception_v3_base
    net, depth(192), [1, 1], scope='Conv2d_0a_1x1')
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/contrib/framework/python/ops/arg_scope.py", line 181, in func_with_args
    return func(*args, **current_args)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/contrib/layers/python/layers/layers.py", line 1027, in convolution
    outputs = layer.apply(inputs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 503, in apply
    return self.__call__(inputs, *args, **kwargs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/layers/base.py", line 450, in __call__
    outputs = self.call(inputs, *args, **kwargs)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/layers/convolutional.py", line 158, in call
    data_format=utils.convert_data_format(self.data_format, self.rank + 2))
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 672, in convolution
    op=op)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 338, in with_space_to_batch
    return op(input, num_spatial_dims, padding)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 664, in op
    name=name)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/ops/nn_ops.py", line 131, in _non_atrous_convolution
    name=name)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/ops/gen_nn_ops.py", line 397, in conv2d
    data_format=data_format, name=name)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/framework/op_def_library.py", line 767, in apply_op
    op_def=op_def)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 2630, in create_op
    original_op=self._default_original_op, op_def=op_def)
  File "/home/kapok/pyenv35/lib/python3.5/site-packages/tensorflow/python/framework/ops.py", line 1204, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

ResourceExhaustedError (see above for traceback): OOM when allocating tensor with shape[128,17,17,192]
	 [[Node: InceptionV3/InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/convolution = Conv2D[T=DT_FLOAT, data_format="NHWC", padding="SAME", strides=[1, 1, 1, 1], use_cudnn_on_gpu=true, _device="/job:localhost/replica:0/task:0/gpu:0"](InceptionV3/InceptionV3/Mixed_6d/concat, InceptionV3/Mixed_6e/Branch_0/Conv2d_0a_1x1/weights/read)]]
	 [[Node: total_loss/_7079 = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/cpu:0", send_device="/job:localhost/replica:0/task:0/gpu:0", send_device_incarnation=1, tensor_name="edge_29249_total_loss", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/cpu:0"]()]]


In [None]:
#mapping_strings = tf.constant( [ str(key) for key in cvt_csv2tfrecord().keys() ] )
#mapping_table = tf.contrib.lookup.index_table_from_tensor(mapping=mapping_strings, default_value=0)

# with tf.device('/cpu:0'):
#     example_train, product_train, category_train = input_pipeline(TRAIN_PATH + "output_file*.tfrecords", BATCH_SIZE, 4)
#     example_test, product_test, category_test = input_pipeline(TEST_PATH + "output_file*.tfrecords", BATCH_SIZE, 4)

# with tf.device('/gpu:0'):
#     train_op, global_step, metrics_op, probabilities = buildClassificationNetwork(X, ids_Y)

In [None]:
# # Create the graph, etc.

# # Create a session for running operations in the Graph.
# sess = tf.Session()
# # Initialize the variables (like the epoch counter).
# sess.run(init_op)
# # initialize local variables, like local counter epochs
   
    
# # Start input enqueue threads.
# coord = tf.train.Coordinator()
# threads = tf.train.start_queue_runners(sess=sess, coord=coord)
# try:
#     while not coord.should_stop():
#         #Check the time for each sess run
#         start_time = time.time()
#         total_loss, global_step_count, cur_acc, _, _ = sess.run([train_op, global_step, metrics_op, probabilities], feed_dict={
#             X: example_train,
#             Y: category_train
#         })
#         time_elapsed = time.time() - start_time

#         #Run the logging to print some results
#         logging.info('global step %s: loss: %.4f (%.2f sec/step)', global_step_count, total_loss, time_elapsed)
        
# except tf.errors.OutOfRangeError:
#     print('Done training -- epoch limit reached')
# finally:
#     # When done, ask the threads to stop.
#     coord.request_stop()

# # Wait for threads to finish.
# coord.join(threads)
# sess.close()

In [None]:
# # create a partition vector
# partitions = [0] * len(all_filepaths)
# partitions[:test_set_size] = [1] * test_set_size
# random.shuffle(partitions)

# # partition our data into a test and train set according to our partition vector
# train_images, test_images = tf.dynamic_partition(all_images, partitions, 2)
# train_labels, test_labels = tf.dynamic_partition(all_labels, partitions, 2)

# # create input queues
# train_input_queue = tf.train.slice_input_producer(
#                                     [train_images, train_labels],
#                                     shuffle=False)
# test_input_queue = tf.train.slice_input_producer(
#                                     [test_images, test_labels],
#                                     shuffle=False)