In [1]:
import tensorflow as tf
import tensorflow.examples.tutorials.mnist.input_data as input_data

In [2]:
mnist = input_data.read_data_sets('MNIST_data', one_hot=True)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [3]:
def mnist_binary(images, labels, binary):
#     binary = [binary[0]-1 , binary[1]-1]
    select = [i for i,l in enumerate(labels) if sum(l[binary])>=1]
    x = images[select]
    y = labels[select][:,binary].astype(int)
    return x, y

In [4]:
numbers = [1, 2, 3, 4, 5]
train_x, train_y = mnist_binary(mnist.train.images, mnist.train.labels, numbers)
test_x, test_y = mnist_binary(mnist.test.images, mnist.test.labels, numbers)

In [5]:
len_numbers = len(numbers)

In [6]:
def variable_summaries(var, name):
  """Attach a lot of summaries to a Tensor."""
  with tf.name_scope('summaries'):
    mean = tf.reduce_mean(var)
    tf.scalar_summary('mean/' + name, mean)
    with tf.name_scope('stddev'):
      stddev = tf.sqrt(tf.reduce_sum(tf.square(var - mean)))
    tf.scalar_summary('sttdev/' + name, stddev)
    tf.scalar_summary('max/' + name, tf.reduce_max(var))
    tf.scalar_summary('min/' + name, tf.reduce_min(var))
    tf.histogram_summary(name, var)

In [7]:
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

In [8]:
def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)

In [9]:
  def nn_layer(input_tensor, input_dim, output_dim, layer_name, act=tf.nn.relu):
    """Reusable code for making a simple neural net layer.
    It does a matrix multiply, bias add, and then uses relu to nonlinearize.
    It also sets up name scoping so that the resultant graph is easy to read,
    and adds a number of summary ops.
    """
    # Adding a name scope ensures logical grouping of the layers in the graph.
    with tf.name_scope(layer_name):
      # This Variable will hold the state of the weights for the layer
      with tf.name_scope('weights'):
        weights = weight_variable([input_dim, output_dim])
        variable_summaries(weights, layer_name + '/weights')
      with tf.name_scope('biases'):
        biases = bias_variable([output_dim])
        variable_summaries(biases, layer_name + '/biases')
      with tf.name_scope('Wx_plus_b'):
        preactivate = tf.matmul(input_tensor, weights) + biases
        tf.histogram_summary(layer_name + '/pre_activations', preactivate)
      activations = act(preactivate, 'activation')
      tf.histogram_summary(layer_name + '/activations', activations)
      return activations

In [10]:
# set variables
with tf.name_scope('input'):
  x = tf.placeholder(tf.float32, [None, 784], name='x-input') # 28*28
  y_ = tf.placeholder(tf.float32, [None, len_numbers], name='y-input')

In [11]:
with tf.name_scope('input_reshape'):
  image_shaped_input = tf.reshape(x, [-1, 28, 28, 1])
  tf.image_summary('input', image_shaped_input, 10) # 10 sample in IMAGES of tensorboard 

In [12]:
# Set graphs and GRAPHS of tensorboard
y = nn_layer(x, 784, len_numbers, 'layer1', tf.nn.softmax)

In [13]:
# Set loss function
with tf.name_scope('cross_entropy'):
  diff = y_ * tf.log(y + 1e-10)
  with tf.name_scope('total'):
    cross_entropy = -tf.reduce_mean(diff)
  tf.scalar_summary('cross entropy', cross_entropy)

In [14]:
# # Set hinge loss function for binary class
# with tf.name_scope('cross_entropy'):
#   with tf.name_scope('total'):
#     hinge_loss = tf.reduce_mean(tf.maximum(0., 1. - (y_[:,1] - y_[:,0])*y[:,1]))
#   tf.scalar_summary('hinge_loss', hinge_loss)

In [15]:
# Set hinge loss function for multiclass
with tf.name_scope('cross_entropy'):
  true_index = tf.cast(y_, dtype = bool)
  margin = tf.boolean_mask(y, true_index)
  false_prediction = tf.reshape(tf.boolean_mask(y, ~true_index), [tf.shape(y_)[0], len_numbers-1])
  false_prediction_max = tf.reduce_max(false_prediction, reduction_indices=[1])
  loss = 1 - (margin - false_prediction_max)
  loss = tf.maximum(loss, tf.zeros_like(loss))
  with tf.name_scope('total'):
#     hinge_loss = tf.reduce_mean(loss)
    hinge_loss = tf.reduce_mean(tf.square(loss))
  tf.scalar_summary('hinge_loss', hinge_loss)

In [16]:
# Set optimizer method
with tf.variable_scope("trainer") as scope:
    optimizer = tf.train.GradientDescentOptimizer(0.01)
#     trainer = optimizer.minimize(cross_entropy)
    trainer = optimizer.minimize(hinge_loss)

  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


In [17]:
with tf.name_scope('accuracy'):
  with tf.name_scope('correct_prediction'):
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
  with tf.name_scope('accuracy'):
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
  tf.scalar_summary('accuracy', accuracy)

In [18]:
merged = tf.merge_all_summaries()

In [19]:
sess = tf.Session()

In [20]:
train_writer = tf.train.SummaryWriter('mnist_logs' + '/train', sess.graph)
test_writer = tf.train.SummaryWriter('mnist_logs' + '/test', sess.graph)

In [21]:
sess.run(tf.initialize_all_variables())

In [22]:
for i in range(1000): # train 1000 times
    sess.run(trainer, feed_dict={x: train_x, y_: train_y})
    result = sess.run(merged, feed_dict={x: train_x, y_: train_y})
    train_writer.add_summary(result, i)
    if i % 50 == 0: # save summary if after train 50 times
        result, acc = sess.run([merged, accuracy], feed_dict={x: test_x, y_: test_y})
        test_writer.add_summary(result, i)
        print "accuracy is %0.8s at times %4d" % (acc,i)

accuracy is 0.158186 at times    0
accuracy is 0.543853 at times   50
accuracy is 0.677292 at times  100
accuracy is 0.729163 at times  150
accuracy is 0.765393 at times  200
accuracy is 0.809741 at times  250
accuracy is 0.844783 at times  300
accuracy is 0.865967 at times  350
accuracy is 0.880816 at times  400
accuracy is 0.89309 at times  450
accuracy is 0.901208 at times  500
accuracy is 0.907741 at times  550
accuracy is 0.911503 at times  600
accuracy is 0.914274 at times  650
accuracy is 0.916452 at times  700
accuracy is 0.918234 at times  750
accuracy is 0.920214 at times  800
accuracy is 0.921996 at times  850
accuracy is 0.923777 at times  900
accuracy is 0.924569 at times  950


# Test

In [23]:
print sess.run(y_, feed_dict={x: train_x, y_: train_y})
print sess.run(true_index, feed_dict={x: train_x, y_: train_y})

[[ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  1.  0.]
 [ 1.  0.  0.  0.  0.]
 ..., 
 [ 1.  0.  0.  0.  0.]
 [ 0.  0.  1.  0.  0.]
 [ 0.  0.  0.  0.  1.]]
[[False False  True False False]
 [False False False  True False]
 [ True False False False False]
 ..., 
 [ True False False False False]
 [False False  True False False]
 [False False False False  True]]


In [25]:
sess.run(y_, feed_dict={x: train_x, y_: train_y})

array([[ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  1.,  0.],
       [ 1.,  0.,  0.,  0.,  0.],
       ..., 
       [ 1.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  1.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  1.]], dtype=float32)

In [26]:
sess.run(y, feed_dict={x: train_x, y_: train_y})

array([[ 0.01232991,  0.02073836,  0.87376052,  0.01221038,  0.08096086],
       [ 0.00248122,  0.00413203,  0.05257058,  0.89045465,  0.05036161],
       [ 0.93920898,  0.02350495,  0.02156768,  0.00410834,  0.01161002],
       ..., 
       [ 0.71395719,  0.09155858,  0.10753477,  0.04913901,  0.03781044],
       [ 0.01073636,  0.02185084,  0.93757486,  0.00664217,  0.02319565],
       [ 0.01166299,  0.00323938,  0.08034842,  0.04200077,  0.86274838]], dtype=float32)

In [27]:
sess.run(tf.boolean_mask(y, true_index), feed_dict={x: train_x, y_: train_y})

array([ 0.87376052,  0.89045465,  0.93920898, ...,  0.71395719,
        0.93757486,  0.86274838], dtype=float32)

In [28]:
sess.run(tf.reshape(tf.boolean_mask(y, ~true_index), [tf.shape(y_)[0], len_numbers-1]), feed_dict={x: train_x, y_: train_y})

array([[ 0.01232991,  0.02073836,  0.01221038,  0.08096086],
       [ 0.00248122,  0.00413203,  0.05257058,  0.05036161],
       [ 0.02350495,  0.02156768,  0.00410834,  0.01161002],
       ..., 
       [ 0.09155858,  0.10753477,  0.04913901,  0.03781044],
       [ 0.01073636,  0.02185084,  0.00664217,  0.02319565],
       [ 0.01166299,  0.00323938,  0.08034842,  0.04200077]], dtype=float32)

In [29]:
sess.run(tf.reduce_max(tf.reshape(tf.boolean_mask(y, ~true_index), [tf.shape(y_)[0], len_numbers-1]), reduction_indices=[1]), 
             feed_dict={x: train_x, y_: train_y})

array([ 0.08096086,  0.05257058,  0.02350495, ...,  0.10753477,
        0.02319565,  0.08034842], dtype=float32)

In [30]:
sess.run(tf.boolean_mask(y, true_index) - (tf.reduce_max(tf.reshape(tf.boolean_mask(y, ~true_index), [tf.shape(y_)[0], len_numbers-1]), reduction_indices=[1])), 
             feed_dict={x: train_x, y_: train_y})

array([ 0.79279965,  0.83788407,  0.91570401, ...,  0.60642242,
        0.91437924,  0.78239995], dtype=float32)

In [31]:
sess.run(1 - (tf.boolean_mask(y, true_index) - (tf.reduce_max(tf.reshape(tf.boolean_mask(y, ~true_index), [tf.shape(y_)[0], len_numbers-1]), reduction_indices=[1]))), 
             feed_dict={x: train_x, y_: train_y})

array([ 0.20720035,  0.16211593,  0.08429599, ...,  0.39357758,
        0.08562076,  0.21760005], dtype=float32)

In [None]:
train_writer.close()
test_writer.close()

In [None]:
!pwd

# shell: 
```
tensorboard --logdir=mnist_logs/
```
# Browser: http://127.0.0.1:6006