In [1]:
# Running %env without any arguments
# lists all environment variables

# The line below sets the environment
# variable CUDA_VISIBLE_DEVICES
%env CUDA_VISIBLE_DEVICES = 

import numpy as np
import pandas as pd
import os
import shutil
import uuid
import io
import time
import bson                       # this is installed with the pymongo package
import matplotlib.pyplot as plt
from scipy.misc import imread, imsave
import tensorflow as tf
from tensorflow.python.platform import tf_logging
from tensorflow.contrib import layers
from tensorflow.contrib.training import add_gradients_summaries
from tensorflow.python.ops import math_ops
from tensorflow.python.framework import ops
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import control_flow_ops
from tensorflow.python.training import optimizer as tf_optimizer
from tensorflow.python.ops import variables as tf_variables

from tensorflow.python.ops import init_ops
from tensorflow.contrib import layers
from tensorflow.contrib.framework.python.ops import arg_scope
from tensorflow.contrib.layers.python.layers import layers as layers_lib
from tensorflow.python.ops import variable_scope

import os.path
import tensorflow.contrib.slim as slim
from tensorflow.contrib.slim.python.slim.nets import inception
import inception_preprocessing
import logging

# This is a bit of magic to make matplotlib figures appear inline in the notebook
# rather than in a new window.
%matplotlib inline
plt.rcParams['figure.figsize'] = (10.0, 8.0) # set default size of plots
plt.rcParams['image.interpolation'] = 'nearest'
plt.rcParams['image.cmap'] = 'gray'

# Some more magic so that the notebook will reload external python modules;
# see http://stackoverflow.com/questions/1907993/autoreload-of-modules-in-ipython
%load_ext autoreload
%autoreload 2

env: CUDA_VISIBLE_DEVICES=


In [2]:
LIB_NAME = 'focal_loss'

def load_op_module(lib_name):
    """
    Load TensorFlow operator library.
    """
    # use absolute path so that ops.py can be called from other directory
    lib_path = os.path.join(os.path.dirname(os.path.realpath('.')), 'lib{0}.so'.format(lib_name))
    # duplicate library with a random new name so that
    # a running program will not be interrupted when the original library is updated
    lib_copy_path = '/tmp/lib{0}_{1}.so'.format(str(uuid.uuid4())[:8], LIB_NAME)
    shutil.copyfile(lib_path, lib_copy_path)
    oplib = tf.load_op_library(lib_copy_path)
    return oplib

op_module = load_op_module(LIB_NAME)
focal_loss = op_module.focal_loss

# tf.reduce_mean(focal_loss(logits, one_hot_labels, batch_weight, focal_loss_gamma))

In [3]:
@ops.RegisterGradient("FocalLoss")
def _focal_loss_grad(op, grad):
    """The gradients for `focal_loss`.

    Args:
    op: The `focal_loss` `Operation` that we are differentiating, which we can use
      to find the inputs and outputs of the original op.
    grad: Gradient with respect to the output of the `focal_loss` op.

    Returns:
    Gradients with respect to the input of `focal_loss`.
    """
    logits = op.inputs[0]
    one_hot_labels = op.inputs[1]
    batch_weight = op.inputs[2]
    focal_loss_gamma = op.inputs[3]
    focal_loss = op.outputs[0]
    ligits_shape = array_ops.shape(logits)
    
    probs = tf.nn.softmax(logits)
    # in fact, tf.shape(probs)[0] is also a tensor
    # tf.get_shape().as_list() for known shape
    indices = tf.stack((tf.range(tf.cast(tf.shape(probs)[0], tf.int64), dtype=tf.int64), tf.argmax(one_hot_labels, axis=1)), axis=1)
    #indices = tf.stack((tf.range(probs.get_shape()[0], dtype=tf.int64), tf.argmax(one_hot_labels, axis=1)), axis=1)
    prob_foreach = tf.gather_nd( probs, indices )
    prob_foreach_subbyone = 1 - prob_foreach
    
    grad_true = 0. - tf.add(prob_foreach*focal_loss*focal_loss_gamma, batch_weight * tf.pow(prob_foreach_subbyone, focal_loss_gamma + 1))
    scatter_mask = 1. - tf.scatter_nd(tf.cast(indices, tf.int32), tf.ones_like(prob_foreach), ligits_shape)
    grad_false = tf.expand_dims(tf.div(prob_foreach * focal_loss * focal_loss_gamma, prob_foreach_subbyone) + batch_weight*tf.pow(prob_foreach_subbyone, focal_loss_gamma), axis=1) * probs
    scatter_grad_true = tf.scatter_nd(tf.cast(indices, tf.int32), grad_true, ligits_shape)
    #grad_false * scatter_mask + scatter_grad_true
    return [tf.expand_dims(grad, 1)  * (grad_false * scatter_mask + scatter_grad_true), None, None, None]  # List of one Tensor, use None for no well-defined gradient of some input,

In [4]:
def focal_loss_tf(logits, one_hot, weight, gamma):
    prob = tf.nn.softmax(logits)
    percls_loss = tf.subtract(tf.zeros_like(prob), tf.multiply(tf.pow(tf.subtract(tf.ones_like(prob), prob), gamma), tf.log(prob)))
    losses = tf.reduce_mean(tf.multiply(tf.reduce_sum(tf.multiply(one_hot, percls_loss), 1), weight))
    return losses
def focal_loss_tf2(logits, one_hot, weight, gamma):
    #percls_loss = tf.subtract(tf.zeros_like(prob), tf.multiply(tf.pow(tf.subtract(tf.ones_like(prob), prob), gamma), tf.log(prob)))
    #losses = tf.reduce_mean(tf.multiply(tf.reduce_sum(tf.multiply(one_hot, percls_loss), 1), weight))
#     prob_log = tf.nn.log_softmax(logits)
#     prob = tf.exp(prob_log)
    prob = tf.nn.softmax(logits)
    #return tf.reduce_mean(tf.reduce_sum(one_hot * (0. - tf.pow(1 - prob, gamma) * tf.nn.log_softmax(logits)), 1) * weight)    
    return tf.reduce_sum(one_hot * (0. - tf.pow(1 - prob, gamma) * tf.nn.log_softmax(logits)), 1) * weight

In [5]:
def test_focal_loss_grad(logits, one_hot_labels, batch_weight, focal_loss_gamma, focal_loss):
    ligits_shape = array_ops.shape(logits)
    
    probs = tf.nn.softmax(logits)
    # in fact, tf.shape(probs)[0] is also a tensor
    # tf.get_shape().as_list() for known shape
    indices = tf.stack((tf.range(tf.cast(tf.shape(probs)[0], tf.int64), dtype=tf.int64), tf.argmax(one_hot_labels, axis=1)), axis=1)
    #indices = tf.stack((tf.range(probs.get_shape()[0], dtype=tf.int64), tf.argmax(one_hot_labels, axis=1)), axis=1)
    prob_foreach = tf.gather_nd( probs, indices )
    prob_foreach_subbyone = 1 - prob_foreach
    
    grad_true = 0. - tf.add(prob_foreach*focal_loss*focal_loss_gamma, batch_weight * tf.pow(prob_foreach_subbyone, focal_loss_gamma + 1))
    scatter_mask = 1. - tf.scatter_nd(tf.cast(indices, tf.int32), tf.ones_like(prob_foreach), ligits_shape)
    grad_false = tf.expand_dims(tf.div(prob_foreach * focal_loss * focal_loss_gamma, prob_foreach_subbyone) + batch_weight*tf.pow(prob_foreach_subbyone, focal_loss_gamma), axis=1) * probs
    scatter_grad_true = tf.scatter_nd(tf.cast(indices, tf.int32), grad_true, ligits_shape)
    #grad_false * scatter_mask + scatter_grad_true
    return [grad_false, None, None, None]  # List of one Tensor, use None for no well-defined gradient of some input,

In [6]:
logits = tf.constant([[1.0, 2., 3., 30, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0]], dtype=tf.float32)
rand_logits = tf.placeholder(tf.float32, shape=(4, 5))
one_hot = tf.constant([[1., 0, 0, 0, 0],[0, 1., 0., 0, 0],[0, 0, 1., 0, 0],[0, 0, 0, 1., 0]], dtype=tf.float32)
weight = tf.constant([2, 1., 1, 1], dtype=tf.float32)
loss = focal_loss(logits, one_hot, weight, tf.constant(5., dtype=tf.float32))
rand_loss = focal_loss(rand_logits, one_hot, weight, tf.constant(5., dtype=tf.float32))
#loss = tf.reduce_mean(focal_loss(logits, one_hot, weight, tf.constant(5., dtype=tf.float32)))
loss_tf = focal_loss_tf2(logits, one_hot, weight, tf.constant(5., dtype=tf.float32))
rand_loss_tf = focal_loss_tf2(rand_logits, one_hot, weight, tf.constant(5., dtype=tf.float32))
#grad_cpp = test_focal_loss_grad(logits, one_hot, weight, tf.constant(5., dtype=tf.float32), tf.constant([5.80000000e+01, 3.99600983e-01, 3.54937464e-03, 2.96500897e+00], dtype=tf.float32))
with tf.Session() as sess:
    print(sess.run(loss))
    print(sess.run(loss_tf))
    #print(sess.run(grad_cpp[0]))
    print(tf.test.compute_gradient_error(logits, [4, 5], loss, [4], delta=0.0001, x_init_value=np.array([[1.0, 2., 3., 30, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0]])))
    print(tf.test.compute_gradient_error(logits, [4, 5], loss_tf, [4], delta=0.0001, x_init_value=np.array([[1.0, 2., 3., 30, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0]])))
    print(tf.test.compute_gradient(logits, [4, 5], loss, [4], x_init_value=np.array([[1.0, 2., 3., 30, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0]])))
    print(tf.test.compute_gradient(logits, [4, 5], loss_tf, [4], x_init_value=np.array([[1.0, 2., 3., 30, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0]])))
    for _ in range(1000):
        logits_temp = 0.000000000000000000001 * np.random.randn(4, 5)
        #print(logits_temp)
        
        l=sess.run(rand_loss, feed_dict={rand_logits: [[1, 1., 1., 1, 1],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0],[1., 2., 3., 0, 0]]})
        print(l)
        #print(sess.run(rand_loss, feed_dict={rand_logits: logits_temp}))
    print('finished.')

[  5.80000000e+01   3.99600983e-01   3.54937464e-03   2.96500897e+00]
[  5.80000000e+01   3.99601102e-01   3.54937790e-03   2.96500921e+00]
0.016357421875
0.016357421875
(array([[ -2.00000000e+00,  -0.00000000e+00,  -0.00000000e+00,
         -0.00000000e+00],
       [  1.38288003e-12,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00],
       [  3.75905774e-12,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00],
       [  2.00000000e+00,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00],
       [  1.87152458e-13,   0.00000000e+00,   0.00000000e+00,
          0.00000000e+00],
       [  0.00000000e+00,   7.31836110e-02,   0.00000000e+00,
          0.00000000e+00],
       [ -0.00000000e+00,  -6.67786956e-01,  -0.00000000e+00,
         -0.00000000e+00],
       [  0.00000000e+00,   5.40757775e-01,   0.00000000e+00,
          0.00000000e+00],
       [  0.00000000e+00,   2.69227438e-02,   0.00000000e+00,
          0.00000000e+00],
       [  0.00000000e+00,   

[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]


[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]


[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
[ 1.05476129  0.39960098  0.00354937  2.96500897]
