In [2]:
import tensorflow as tf
from tensorflow.contrib import learn
from tensorflow.contrib import slim
tf.logging.set_verbosity('DEBUG')
print(tf.__version__)

  return f(*args, **kwds)


1.4.0


# Accuracy resetting in training/evaluation loop

In [2]:
def input_fn():
    queue = tf.FIFOQueue(100, tf.float32)
    enqueue = queue.enqueue_many([[1., 0.] * 100])
    qr = tf.train.QueueRunner(queue, [enqueue])
    tf.train.add_queue_runner(qr)
    labels = queue.dequeue()

    queue = tf.FIFOQueue(100, tf.float32)
    enqueue = queue.enqueue_many([[x * 1. for x in range(100)]])
    qr = tf.train.QueueRunner(queue, [enqueue])
    tf.train.add_queue_runner(qr)
    features = {"x": queue.dequeue()}

    return features, labels

In [3]:
def model_fn(features, labels, mode, params, config):
    # Build metrics and add summary
    eval_metric_ops = None
    predict_classes = tf.constant([1.])

    w = tf.Variable([0.])
    loss = predict_classes * w
    optimizer = tf.train.AdamOptimizer()
    train_op = slim.learning.create_train_op(loss, optimizer)

    accuracy = tf.metrics.accuracy(labels, predict_classes)
    recall = tf.metrics.recall(labels, predict_classes)
    loss = tf.Print(loss, [features["x"], labels, predict_classes], "****")

    if mode == tf.estimator.ModeKeys.EVAL:
        eval_metric_ops = {"accuracy": accuracy, "recall": recall}
    else:
        tf.summary.scalar("accuracy", accuracy[1])
        tf.summary.scalar("recall", recall[1])

    predictions = None

    return tf.estimator.EstimatorSpec(mode, predictions=predictions,
                                      loss=loss, train_op=train_op,
                                      eval_metric_ops=eval_metric_ops)

In [6]:
config = tf.estimator.RunConfig(save_summary_steps=1, save_checkpoints_steps=5)
estimator = tf.estimator.Estimator(model_fn, model_dir="/tmp/testtf",
                                   config=config)
experiment = learn.Experiment(estimator,
                              input_fn,
                              input_fn,
                              train_steps=15,
                              eval_steps=4)

experiment.train_and_evaluate()

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': '/tmp/testtf', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fa860300a90>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


## Remark: 
#### Accuracy in evaluation mode: average (streaming) accuracy of eval_steps, reset for each evaluation.
#### Accuracy in training mode: average (streaming) accuracy of mini-batch since the start of training (not reset after an evaluation).

# Precision and recall with non-binary classification

In [24]:
x = tf.placeholder(tf.int32, [5])
y = tf.placeholder(tf.int32, [5])
pre, pre_op = tf.metrics.precision(labels=x, predictions=y)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    v = sess.run([pre, pre_op], feed_dict={x: [1, 2, 3, 0, 5],
                                           y: [1, 1, 0, 1, 1]})
    print(v)

[0.0, 0.75]


In [11]:
x = tf.placeholder(tf.int32, [5])
y = tf.placeholder(tf.int32, [5])
rec, rec_op = tf.metrics.recall(labels=x, predictions=y)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    v = sess.run([rec, rec_op], feed_dict={x: [1, 1, 1, 0, 0],
                                           y: [1, 0, 0, 0, 1]})
    print(v)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())
    
    v = sess.run([rec, rec_op], feed_dict={x: [0, 0, 0, 1, 1],
                                           y: [0, 1, 1, 1, 0]})
    print(v)

[0.0, 0.33333334]
[0.0, 0.5]


## Remark: 
#### Precision and recall function work correctly only on binary classification problem: the predictions and labels are mapped to boolean before metrics are calculated.

# Mean per class accuracy vs recall

In [36]:
x = tf.placeholder(tf.int32, [5])
y = tf.placeholder(tf.int32, [5])
acc, acc_op = tf.metrics.mean_per_class_accuracy(labels=x, predictions=y, num_classes=2)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    v = sess.run([acc, acc_op], feed_dict={x: [1, 1, 1, 0, 0],
                                           y: [1, 0, 0, 0, 1]})
    print(v)
    
    print(sess.run(acc))

[0.0, array([[ 1.,  1.],
       [ 2.,  1.]])]
0.416667


In [33]:
x = tf.placeholder(tf.int32, [5])
y = tf.placeholder(tf.int32, [5])
rec, rec_op = tf.metrics.recall(labels=x, predictions=y)

with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    v1 = sess.run([rec, rec_op], feed_dict={x: [1, 1, 1, 0, 0],
                                           y: [1, 0, 0, 0, 1]})
    print(v1)
    
with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    sess.run(tf.local_variables_initializer())

    v2 = sess.run([rec, rec_op], feed_dict={x: [0, 0, 0, 1, 1],
                                           y: [0, 1, 1, 1, 0]})
    print(v2)
    
print((v1[1] + v2[1]) * 0.5)

[0.0, 0.33333334]
[0.0, 0.5]
0.416666686535


## Remark: 
#### Mean per class accuracy is the mean of recall for each class.