We have trained a model now we want to evaluate the results.

See eval.py

In [1]:
# Copyright 2016 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS-IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Binary for evaluating Tensorflow models on the YouTube-8M dataset."""

import time

# import modules from local files
# modules in `vggish` folder
import sys
sys.path.append('./vggish')

# import some adepted custom serval modules
sys.path.append('./custom_serval_code')

# we skip this one from vggish
#import eval_util
# and use this one
import eval_util_serval as eval_util
import export_model
import losses
import frame_level_models
import video_level_models
import readers

# from here we write the evaluation result needed in step 3
import utils_serval as utils


The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
# load tensorflow stuff
import tensorflow as tf
from tensorflow import app
from tensorflow import flags
from tensorflow import gfile
from tensorflow import logging

In [3]:
import os
# set parameters
# inside docker goto right place 
os.chdir("/tf/serval/serval")

# parameters
# evaluation data directory
edp = "./dataset/eval_hugo_new/*.tfrecord"
 
# temp model directory
tmd = "./tmp/serval03hugo"
tmdl = tmd + "/eval_log"

# number of outcome classes (see labels csv)
# moet om een of andere reden > 20 zijn
num_classes = 20

# class_map
class_map = "./dataset/classlabels/csv_files/class_labels_indices_hugo.csv"

# needed in custom utils.py
evaluation_results_csv = "eval-serval-hugo.csv"

In [4]:
# tensorflow FLAGS
# most stuff we need to change is in here

FLAGS = flags.FLAGS

if __name__ == "__main__":
  # Dataset flags.
  flags.DEFINE_string("train_dir", tmd,
                      "The directory to load the model files from. "
                      "The tensorboard metrics files are also saved to this "
                      "directory.")
  # defined twice ...?
  delattr(flags.FLAGS, "log_dir")
  flags.DEFINE_string("log_dir", tmdl,"The directory to write eval log to. ")
  
  # eval
  # eval_new_labels
  flags.DEFINE_string("eval_data_pattern", edp,
                      "File glob defining the evaluation dataset in tensorflow.SequenceExample "
                      "format. The SequenceExamples are expected to have an 'rgb' byte array "
                      "sequence feature as well as a 'labels' int64 context feature.")
  flags.DEFINE_string("feature_names", "audio_embedding", "Name of the feature "
                      "to use for training.")
  flags.DEFINE_string("feature_sizes", "128", "Length of the feature vectors.")
  flags.DEFINE_string("class_map", class_map , "location of class_label_indices.csv.")
  flags.DEFINE_integer("num_classes", num_classes, "Number of classes in dataset.") # 527 31

  # Model flags.
  flags.DEFINE_bool("frame_features", True,
                      "If set, then --eval_data_pattern must be frame-level features. "
                      "Otherwise, --eval_data_pattern must be aggregated video-level "
                      "features. The model must also be set appropriately (i.e. to read 3D "
                      "batches VS 4D batches.")
  flags.DEFINE_string("model", "FrameLevelLogisticModel",
                      "Which architecture to use for the model. Options include 'Logistic', "
                      "'SingleMixtureMoe', and 'TwoLayerSigmoid'. See aggregated_models.py and "
                      "frame_level_models.py for the model definitions.")
  flags.DEFINE_integer("batch_size", 512,
                       "How many examples to process per batch.")
  flags.DEFINE_string("label_loss", "CrossEntropyLoss",
                      "Loss computed on validation data")

  # Other flags.
  flags.DEFINE_integer("num_readers", 8,
                       "How many threads to use for reading input files.")
  flags.DEFINE_boolean("run_once", True, "Whether to run eval only once.")
  flags.DEFINE_integer("top_k", 10, "How many predictions to output per video.")

  # jupyter fix
  flags.DEFINE_string('f', '', 'kernel')

In [5]:
# HK  get ground truth
ground_thruth = eval_util.get_labels(FLAGS.class_map)

gt_labels = [ground_thruth[k] for k in ground_thruth]
#print(len(gt_labels))


In [6]:
# functions we use

def find_class_by_name(name, modules):
  """Searches the provided modules for the named class and returns it."""
  modules = [getattr(module, name, None) for module in modules]
  return next(a for a in modules if a)


def get_input_evaluation_tensors(reader,
                                 data_pattern,
                                 batch_size=1024,
                                 num_readers=1):
  """Creates the section of the graph which reads the evaluation data.

  Args:
    reader: A class which parses the training data.
    data_pattern: A 'glob' style path to the data files.
    batch_size: How many examples to process at a time.
    num_readers: How many I/O threads to use.

  Returns:
    A tuple containing the features tensor, labels tensor, and optionally a
    tensor containing the number of frames per video. The exact dimensions
    depend on the reader being used.

  Raises:
    IOError: If no files matching the given pattern were found.
  """
  logging.info("Using batch size of " + str(batch_size) + " for evaluation.")
  with tf.name_scope("eval_input"):
    files = gfile.Glob(data_pattern)
    if not files:
      raise IOError("Unable to find the evaluation files.")
    logging.info("number of evaluation files: " + str(len(files)))
    filename_queue = tf.train.string_input_producer(
        files, shuffle=False, num_epochs=1)
    eval_data = [
        reader.prepare_reader(filename_queue) for _ in range(num_readers)
    ]
    return tf.train.batch_join(
        eval_data,
        batch_size=batch_size,
        capacity=3 * batch_size,
        allow_smaller_final_batch=True,
        enqueue_many=True)


def build_graph(reader,
                model,
                eval_data_pattern,
                label_loss_fn,
                batch_size=1024,
                num_readers=1):
  """Creates the Tensorflow graph for evaluation.

  Args:
    reader: The data file reader. It should inherit from BaseReader.
    model: The core model (e.g. logistic or neural net). It should inherit
           from BaseModel.
    eval_data_pattern: glob path to the evaluation data files.
    label_loss_fn: What kind of loss to apply to the model. It should inherit
                from BaseLoss.
    batch_size: How many examples to process at a time.
    num_readers: How many threads to use for I/O operations.
  """

  global_step = tf.Variable(0, trainable=False, name="global_step")
  video_id_batch, model_input_raw, labels_batch, num_frames = get_input_evaluation_tensors(  # pylint: disable=g-line-too-long
      reader,
      eval_data_pattern,
      batch_size=batch_size,
      num_readers=num_readers)
  tf.summary.histogram("model_input_raw", model_input_raw)

  feature_dim = len(model_input_raw.get_shape()) - 1

  # Normalize input features.
  model_input = tf.nn.l2_normalize(model_input_raw, feature_dim)

  with tf.variable_scope("tower"):
    result = model.create_model(model_input,
                                num_frames=num_frames,
                                vocab_size=reader.num_classes,
                                labels=labels_batch,
                                is_training=False)
    predictions = result["predictions"]
    tf.summary.histogram("model_activations", predictions)
    if "loss" in result.keys():
      label_loss = result["loss"]
    else:
      label_loss = label_loss_fn.calculate_loss(predictions, labels_batch)

  tf.add_to_collection("global_step", global_step)
  tf.add_to_collection("loss", label_loss)
  tf.add_to_collection("predictions", predictions)
  tf.add_to_collection("input_batch", model_input)
  tf.add_to_collection("video_id_batch", video_id_batch)
  tf.add_to_collection("num_frames", num_frames)
  tf.add_to_collection("labels", tf.cast(labels_batch, tf.float32))
  tf.add_to_collection("summary_op", tf.summary.merge_all())


def evaluation_loop(video_id_batch, prediction_batch, label_batch, loss,
                    summary_op, saver, summary_writer, evl_metrics,
                    last_global_step_val):
  """Run the evaluation loop once.

  Args:
    video_id_batch: a tensor of video ids mini-batch.
    prediction_batch: a tensor of predictions mini-batch.
    label_batch: a tensor of label_batch mini-batch.
    loss: a tensor of loss for the examples in the mini-batch.
    summary_op: a tensor which runs the tensorboard summary operations.
    saver: a tensorflow saver to restore the model.
    summary_writer: a tensorflow summary_writer
    evl_metrics: an EvaluationMetrics object.
    last_global_step_val: the global step used in the previous evaluation.

  Returns:
    The global_step used in the latest model.
  """

  global_step_val = -1
  with tf.Session() as sess:
    latest_checkpoint = tf.train.latest_checkpoint(FLAGS.train_dir)
    if latest_checkpoint:
      logging.info("Loading checkpoint for eval: " + latest_checkpoint)
      # Restores from checkpoint
      saver.restore(sess, latest_checkpoint)
      # Assuming model_checkpoint_path looks something like:
      # /my-favorite-path/yt8m_train/model.ckpt-0, extract global_step from it.
      global_step_val = latest_checkpoint.split("/")[-1].split("-")[-1]
    else:
      logging.info("No checkpoint file found.")
      return global_step_val

    if global_step_val == last_global_step_val:
      logging.info("skip this checkpoint global_step_val=%s "
                   "(same as the previous one).", global_step_val)
      return global_step_val

    sess.run([tf.local_variables_initializer()])

    # Start the queue runners.
    fetches = [video_id_batch, prediction_batch, label_batch, loss, summary_op]
    coord = tf.train.Coordinator()
    try:
      threads = []
      for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
        threads.extend(qr.create_threads(
            sess, coord=coord, daemon=True,
            start=True))
      logging.info("enter eval_once loop global_step_val = %s. ",
                   global_step_val)

      # klopt dit wel?  
      # als je deze doet heb je straks alleen de laatste batch bewaard
      #evl_metrics.clear()

      examples_processed = 0
      while not coord.should_stop():
        batch_start_time = time.time()
        _, predictions_val, labels_val, loss_val, summary_val = sess.run(
            fetches)
        seconds_per_batch = time.time() - batch_start_time
        example_per_second = labels_val.shape[0] / seconds_per_batch
        examples_processed += labels_val.shape[0]
        
        # hk
#         print(predictions_val)
#         print(labels_val)
#         print(predictions_val.shape)
#         print(labels_val.shape)
        
        iteration_info_dict = evl_metrics.accumulate(predictions_val,
                                                     labels_val, loss_val, gt_labels)
        iteration_info_dict["examples_per_second"] = example_per_second
        
        

        iterinfo = utils.AddGlobalStepSummary(
            summary_writer,
            global_step_val,
            iteration_info_dict,
            summary_scope="Eval")
        logging.info("examples_processed: %d | %s", examples_processed,
                     iterinfo)

    except tf.errors.OutOfRangeError as e:
      logging.info(
          "Done with batched inference. Now calculating global performance "
          "metrics.")
      # calculate the metrics for the entire epoch
      epoch_info_dict = evl_metrics.get()
      epoch_info_dict["epoch_id"] = global_step_val

      summary_writer.add_summary(summary_val, global_step_val)
      epochinfo = utils.AddEpochSummary(
          summary_writer,
          global_step_val,
          epoch_info_dict,
          gt_labels,
          summary_scope="Eval",
          csv_file=evaluation_results_csv) # this stores the aps per class
      logging.info(epochinfo)
      # hk
      # here we store predictions.csv and labels.csv
      evl_metrics.store()
    
      evl_metrics.clear()
    except Exception as e:  # pylint: disable=broad-except
      logging.info("Unexpected exception: " + str(e))
      coord.request_stop(e)

    coord.request_stop()
    coord.join(threads, stop_grace_period_secs=10)

    return global_step_val


def evaluate():
  tf.set_random_seed(0)  # for reproducibility
  with tf.Graph().as_default():
    # convert feature_names and feature_sizes to lists of values
    feature_names, feature_sizes = utils.GetListOfFeatureNamesAndSizes(
        FLAGS.feature_names, FLAGS.feature_sizes)
    num_classes = FLAGS.num_classes

    if FLAGS.frame_features:
      reader = readers.YT8MFrameFeatureReader(
          num_classes=num_classes,
          feature_names=feature_names,feature_sizes=feature_sizes)
    else:
      reader = readers.YT8MAggregatedFeatureReader(
          num_classes=num_classes,
          feature_names=feature_names, feature_sizes=feature_sizes)

    model = find_class_by_name(FLAGS.model,
        [frame_level_models, video_level_models])()
    label_loss_fn = find_class_by_name(FLAGS.label_loss, [losses])()

    if FLAGS.eval_data_pattern is "":
      raise IOError("'eval_data_pattern' was not specified. " +
                     "Nothing to evaluate.")

    build_graph(
        reader=reader,
        model=model,
        eval_data_pattern=FLAGS.eval_data_pattern,
        label_loss_fn=label_loss_fn,
        num_readers=FLAGS.num_readers,
        batch_size=FLAGS.batch_size)
    logging.info("built evaluation graph")
    video_id_batch = tf.get_collection("video_id_batch")[0]
    prediction_batch = tf.get_collection("predictions")[0]
    label_batch = tf.get_collection("labels")[0]
    loss = tf.get_collection("loss")[0]
    summary_op = tf.get_collection("summary_op")[0]

    saver = tf.train.Saver(tf.global_variables())
    summary_writer = tf.summary.FileWriter(
        FLAGS.log_dir, graph=tf.get_default_graph())

    evl_metrics = eval_util.EvaluationMetrics(reader.num_classes, FLAGS.top_k)

    last_global_step_val = -1
    while True:
      last_global_step_val = evaluation_loop(video_id_batch, prediction_batch,
                                             label_batch, loss, summary_op,
                                             saver, summary_writer, evl_metrics,
                                             last_global_step_val)
      if FLAGS.run_once:
        break


In [7]:
def main(unused_argv):
  logging.set_verbosity(tf.logging.INFO)
  print("tensorflow version: %s" % tf.__version__)
  evaluate()


if __name__ == "__main__":
  app.run()


tensorflow version: 1.15.2
INFO:tensorflow:Using batch size of 512 for evaluation.


I0320 13:54:57.910221 140062221981504 <ipython-input-6-821193002e8b>:29] Using batch size of 512 for evaluation.


INFO:tensorflow:number of evaluation files: 1925


I0320 13:54:57.918305 140062221981504 <ipython-input-6-821193002e8b>:34] number of evaluation files: 1925


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


W0320 13:54:57.919097 140062221981504 deprecation.py:323] From <ipython-input-6-821193002e8b>:36: string_input_producer (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(string_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


W0320 13:54:57.925977 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/input.py:277: input_producer (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensor_slices(input_tensor).shuffle(tf.shape(input_tensor, out_type=tf.int64)[0]).repeat(num_epochs)`. If `shuffle=False`, omit the `.shuffle(...)`.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.


W0320 13:54:57.926868 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/input.py:189: limit_epochs (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.from_tensors(tensor).repeat(num_epochs)`.


Instructions for updating:
Prefer Dataset.range instead.


W0320 13:54:57.929265 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/input.py:112: RefVariable.count_up_to (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Prefer Dataset.range instead.


Instructions for updating:
Prefer Dataset.range instead.


W0320 13:54:57.929934 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/ops/variables.py:2522: count_up_to (from tensorflow.python.ops.state_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Prefer Dataset.range instead.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


W0320 13:54:57.932404 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/input.py:198: QueueRunner.__init__ (from tensorflow.python.training.queue_runner_impl) is deprecated and will be removed in a future version.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
To construct input pipelines, use the `tf.data` module.


W0320 13:54:57.933849 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/input.py:198: add_queue_runner (from tensorflow.python.training.queue_runner_impl) is deprecated and will be removed in a future version.
Instructions for updating:
To construct input pipelines, use the `tf.data` module.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.


W0320 13:54:57.938345 140062221981504 deprecation.py:323] From ./vggish/readers.py:207: TFRecordReader.__init__ (from tensorflow.python.ops.io_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.TFRecordDataset`.





W0320 13:54:57.940247 140062221981504 module_wrapper.py:139] From ./vggish/readers.py:216: The name tf.parse_single_sequence_example is deprecated. Please use tf.io.parse_single_sequence_example instead.






W0320 13:54:57.941010 140062221981504 module_wrapper.py:139] From ./vggish/readers.py:218: The name tf.FixedLenFeature is deprecated. Please use tf.io.FixedLenFeature instead.






W0320 13:54:57.942551 140062221981504 module_wrapper.py:139] From ./vggish/readers.py:220: The name tf.VarLenFeature is deprecated. Please use tf.io.VarLenFeature instead.






W0320 13:54:57.943298 140062221981504 module_wrapper.py:139] From ./vggish/readers.py:223: The name tf.FixedLenSequenceFeature is deprecated. Please use tf.io.FixedLenSequenceFeature instead.



Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


W0320 13:54:57.948358 140062221981504 deprecation.py:323] From ./vggish/readers.py:229: sparse_to_dense (from tensorflow.python.ops.sparse_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Create a `tf.sparse.SparseTensor` and use `tf.sparse.to_dense` instead.


Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.interleave(...).batch(batch_size)` (or `padded_batch(...)` if `dynamic_pad=True`).


W0320 13:54:58.077551 140062221981504 deprecation.py:323] From <ipython-input-6-821193002e8b>:45: batch_join (from tensorflow.python.training.input) is deprecated and will be removed in a future version.
Instructions for updating:
Queue-based input pipelines have been replaced by `tf.data`. Use `tf.data.Dataset.interleave(...).batch(batch_size)` (or `padded_batch(...)` if `dynamic_pad=True`).


Instructions for updating:
Please use `layer.__call__` method instead.


W0320 13:54:58.094066 140062221981504 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow_core/contrib/layers/python/layers/layers.py:1866: Layer.apply (from tensorflow.python.keras.engine.base_layer) is deprecated and will be removed in a future version.
Instructions for updating:
Please use `layer.__call__` method instead.





W0320 13:54:58.105929 140062221981504 module_wrapper.py:139] From ./vggish/losses.py:48: The name tf.log is deprecated. Please use tf.math.log instead.



INFO:tensorflow:built evaluation graph


I0320 13:54:58.112764 140062221981504 <ipython-input-6-821193002e8b>:252] built evaluation graph


INFO:tensorflow:Loading checkpoint for eval: ./tmp/serval03hugo/model.ckpt-620


I0320 13:54:58.268244 140062221981504 <ipython-input-6-821193002e8b>:127] Loading checkpoint for eval: ./tmp/serval03hugo/model.ckpt-620


INFO:tensorflow:Restoring parameters from ./tmp/serval03hugo/model.ckpt-620


I0320 13:54:58.270780 140062221981504 saver.py:1284] Restoring parameters from ./tmp/serval03hugo/model.ckpt-620


INFO:tensorflow:enter eval_once loop global_step_val = 620. 


I0320 13:54:58.630665 140062221981504 <ipython-input-6-821193002e8b>:154] enter eval_once loop global_step_val = 620. 


INFO:tensorflow:examples_processed: 512 | global_step 620 | Batch Hit@1: 0.742 | Batch PERR: 0.741 | Batch Loss: 1.815 | Examples_per_sec: 845.198


I0320 13:54:59.259624 140062221981504 <ipython-input-6-821193002e8b>:186] examples_processed: 512 | global_step 620 | Batch Hit@1: 0.742 | Batch PERR: 0.741 | Batch Loss: 1.815 | Examples_per_sec: 845.198


INFO:tensorflow:examples_processed: 910 | global_step 620 | Batch Hit@1: 0.658 | Batch PERR: 0.653 | Batch Loss: 2.033 | Examples_per_sec: 2415.203


I0320 13:54:59.443073 140062221981504 <ipython-input-6-821193002e8b>:186] examples_processed: 910 | global_step 620 | Batch Hit@1: 0.658 | Batch PERR: 0.653 | Batch Loss: 2.033 | Examples_per_sec: 2415.203


INFO:tensorflow:Done with batched inference. Now calculating global performance metrics.


I0320 13:54:59.453566 140062221981504 <ipython-input-6-821193002e8b>:190] Done with batched inference. Now calculating global performance metrics.


INFO:tensorflow:epoch/eval number 620 | Avg_Hit@1: 0.705 | Avg_PERR: 0.703 | MAP: 0.380 | GAP: 0.729 | Avg_Loss: 1.910131


I0320 13:54:59.611238 140062221981504 <ipython-input-6-821193002e8b>:204] epoch/eval number 620 | Avg_Hit@1: 0.705 | Avg_PERR: 0.703 | MAP: 0.380 | GAP: 0.729 | Avg_Loss: 1.910131


SystemExit: 

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
#%tb
#who

#prediction_batch = tf.get_collection("predictions")[0]
#label_batch = tf.get_collection("labels")[0]
evl_metrics.predictions[0]