In [1]:
"""Train the model"""

import argparse
import logging
import os
import random

import tensorflow as tf

from read_tfrecords import read_tfrecords
from model.input_fn import input_fn
from model.utils import Params
from model.utils import set_logger
from model.utils import save_dict_to_json
from model.model_fn import model_fn
from model.training import train_and_evaluate

In [2]:
model_dir = 'experiments/base_model/'
data_dir_1 = 'data/SIGNS_TFRecord/'
data_dir_2 = 'data/SIGNS_TFRecord_v2/'

In [3]:
# Set the random seed for the whole graph for being able to reproduce the experiments
tf.set_random_seed(230)

# Load the parameters from json file

json_path = os.path.join(model_dir, 'params.json')
assert os.path.isfile(json_path), "No json configuration file found at {}".format(json_path)
params = Params(json_path)

# Check that we are not overwriting some previous experiment
# Comment these lines if you are developing your model and don't care about overwritting
# model_dir_has_best_weights = os.path.isdir(os.path.join(args.model_dir, "best_weights"))
# overwritting = model_dir_has_best_weights and args.restore_from is None
# assert not overwritting, "Weights found in model_dir, aborting to avoid overwrite"

# Set the logger
set_logger(os.path.join(model_dir, 'train.log'))

In [13]:
# Read the number of examples of each dataset (This could be optimized by storing the number in an external file
# when creating the TFRecords)
params.train_size =  len([x for x in tf.python_io.tf_record_iterator(os.path.join(data_dir, 'train.tfrecords'))])
logging.info('Train size: {}'.format(params.train_size))

Train size: 720


In [4]:
# Create the input data pipeline
logging.info("Reading the TFRecords training file...")
train_inputs = read_tfrecords(data_path=os.path.join(data_dir, 'train.tfrecords'),
                              batch_size=params.batch_size,
                              num_threads=params.num_parallel_calls,
                              img_size=64, num_channels=3)

Reading the TFRecords training file...


Creating a queue to hold filenames in a FIFO basis...
Defining the graph for extracting the data


In [9]:
train_inputs

{'images': <tf.Tensor 'shuffle_batch:0' shape=(10, 64, 64, 3) dtype=float32>,
 'labels': <tf.Tensor 'shuffle_batch:1' shape=(10,) dtype=int32>,
 'init_op_global': <tf.Operation 'init' type=NoOp>,
 'init_op_local': <tf.Operation 'init_1' type=NoOp>}

In [14]:
720*64*64*3

8847360

In [8]:
with tf.Session() as sess:
    
    # Initialize all global and local variables
    sess.run(train_inputs['init_op_global'])
    sess.run(train_inputs['init_op_local'])
    
    # Create a coordinator and run all QueueRunner objects
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    
    img, lbl = sess.run([train_inputs['images'], train_inputs['labels']])
    
    # Stop the threads
    coord.request_stop()
    
    # Wait for threads to stop
    coord.join(threads)

INFO:tensorflow:Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.InvalidArgumentError'>, Input to reshape is a tensor with 150528 values, but the requested shape has 12288
	 [[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](DecodeRaw, Reshape/shape)]]


Error reported to Coordinator: <class 'tensorflow.python.framework.errors_impl.InvalidArgumentError'>, Input to reshape is a tensor with 150528 values, but the requested shape has 12288
	 [[Node: Reshape = Reshape[T=DT_FLOAT, Tshape=DT_INT32, _device="/job:localhost/replica:0/task:0/device:CPU:0"](DecodeRaw, Reshape/shape)]]


OutOfRangeError: RandomShuffleQueue '_1_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10, current size 0)
	 [[Node: shuffle_batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]

Caused by op 'shuffle_batch', defined at:
  File "/Users/cesc/anaconda3/lib/python3.6/runpy.py", line 193, in _run_module_as_main
    "__main__", mod_spec)
  File "/Users/cesc/anaconda3/lib/python3.6/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelapp.py", line 497, in start
    self.io_loop.start()
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tornado/platform/asyncio.py", line 132, in start
    self.asyncio_loop.run_forever()
  File "/Users/cesc/anaconda3/lib/python3.6/asyncio/base_events.py", line 422, in run_forever
    self._run_once()
  File "/Users/cesc/anaconda3/lib/python3.6/asyncio/base_events.py", line 1434, in _run_once
    handle._run()
  File "/Users/cesc/anaconda3/lib/python3.6/asyncio/events.py", line 145, in _run
    self._callback(*self._args)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tornado/ioloop.py", line 758, in _run_callback
    ret = callback()
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 536, in <lambda>
    self.io_loop.add_callback(lambda : self._handle_events(self.socket, 0))
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 450, in _handle_events
    self._handle_recv()
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 480, in _handle_recv
    self._run_callback(callback, msg)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/zmq/eventloop/zmqstream.py", line 432, in _run_callback
    callback(*args, **kwargs)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tornado/stack_context.py", line 300, in null_wrapper
    return fn(*args, **kwargs)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 283, in dispatcher
    return self.dispatch_shell(stream, msg)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 233, in dispatch_shell
    handler(stream, idents, msg)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel/kernelbase.py", line 399, in execute_request
    user_expressions, allow_stdin)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel/ipkernel.py", line 208, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/ipykernel/zmqshell.py", line 537, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2662, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2785, in _run_cell
    interactivity=interactivity, compiler=compiler, result=result)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2901, in run_ast_nodes
    if self.run_code(code, result):
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-4-22f75d5fc1f2>", line 6, in <module>
    img_size=64, num_channels=3)
  File "/Users/cesc/Documents/UPC MIRI-FIB/TFM/transfer-learning/SIGNS_tutorial/SIGNS_v2_TFRecord/read_tfrecords.py", line 57, in read_tfrecords
    capacity=30, num_threads=num_threads, min_after_dequeue=10)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 1300, in shuffle_batch
    name=name)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/input.py", line 846, in _shuffle_batch
    dequeued = queue.dequeue_many(batch_size, name=name)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/data_flow_ops.py", line 483, in dequeue_many
    self._queue_ref, n=n, component_types=self._dtypes, name=name)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_data_flow_ops.py", line 3480, in queue_dequeue_many_v2
    component_types=component_types, timeout_ms=timeout_ms, name=name)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 787, in _apply_op_helper
    op_def=op_def)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3414, in create_op
    op_def=op_def)
  File "/Users/cesc/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1740, in __init__
    self._traceback = self._graph._extract_stack()  # pylint: disable=protected-access

OutOfRangeError (see above for traceback): RandomShuffleQueue '_1_shuffle_batch/random_shuffle_queue' is closed and has insufficient elements (requested 10, current size 0)
	 [[Node: shuffle_batch = QueueDequeueManyV2[component_types=[DT_FLOAT, DT_INT32], timeout_ms=-1, _device="/job:localhost/replica:0/task:0/device:CPU:0"](shuffle_batch/random_shuffle_queue, shuffle_batch/n)]]


In [None]:
with tf.Session() as sess:
    print(sess.run(train_inputs))