# Neuronal Attention model 

## Visualisation using Tensor-board
Enter this command 
tensorboard --logdir logs/100x100-4glimpse-12x12-4scales-128batch-100epochs
or this one
tensorboard --logdir logs/

In [2]:
import sys
print('Using Python {}!'.format(sys.version_info[0]))

import tensorflow as tf
print(tf.test.gpu_device_name() )

from tensorflow.python.client import device_lib

local_device_protos = device_lib.list_local_devices()

print(local_device_protos)

Using Python 3!
/device:GPU:0
[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 2400627669966820159
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 4945621811
locality {
  bus_id: 1
  links {
  }
}
incarnation: 5626239723515773655
physical_device_desc: "device: 0, name: GeForce GTX 1060 with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 6.1"
]


In [3]:
"""Recurrent Models of Visual Attention V. Mnih et al."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import logging
import tensorflow as tf
import numpy as np
from keras.utils.np_utils import to_categorical

from tensorflow.examples.tutorials.mnist import input_data

try:
  xrange
except NameError:
  xrange=range

Using TensorFlow backend.


In [4]:
class Config(object):

  win_size = 12 ## kim: glimse sensor  ~~
  num_glimpses = 4 ## of gimpse per image
  num_scales = 4 ##

  batch_size = 256 # 128 ##
  eval_batch_size = batch_size #128 ##


  step = 100 ## this is the nb of epoch
    
  #AdamOptimizer is configured somewhere else

  lr_start = 1e-3
  lr_min = 1e-4
  decay = 0.97  
  
  #less important config  
  loc_std = 0.22
  original_size = 100 ##
  num_channels = 1 # do not change, not tested
  bandwidth = win_size**2
  sensor_size = win_size**2 * num_channels * num_scales
  minRadius = 8
  hg_size = hl_size = 128
  g_size = 256
  cell_output_size = 256
  loc_dim = 2
  cell_size = 256
  cell_out_size = cell_size
  num_classes = 10
  max_grad_norm = 5.

  # Monte Carlo sampling
  M = 10

  # Run name
  run_name = "{}x{}-{}glimpse-{}x{}-{}scales-{}batch-{}epochs".format(original_size,
                                                     original_size,
                                                     num_glimpses,
                                                     win_size,
                                                     win_size,
                                                     num_scales,
                                                     batch_size,
                                                     step
                                                    )
config = Config()

In [5]:
#File Created
basicConfigFileName = 'run-{}.log'.format(config.run_name)
tfLogFile = "./logs/"+config.run_name
savedModel_path =  "model-{}.ckpt".format(config.run_name)
print("File created:")
print(basicConfigFileName)
print(tfLogFile)
print(savedModel_path)

File created:
run-100x100-4glimpse-12x12-4scales-256batch-100epochs.log
./logs/100x100-4glimpse-12x12-4scales-256batch-100epochs
model-100x100-4glimpse-12x12-4scales-256batch-100epochs.ckpt


In [6]:
logging.basicConfig(filename= basicConfigFileName,level=logging.DEBUG)
logging.getLogger().setLevel(logging.DEBUG)

rnn_cell = tf.nn.rnn_cell
seq2seq = tf.contrib.legacy_seq2seq

#mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
data = np.load('../data/mnist_digit_sample_8dsistortions9x9.npz')

# the data, shuffled and split between train and test sets
x_train = np.reshape(data['X_train'], (-1, 10000))
y_train = np.reshape(data['y_train'], (-1))
x_va = np.reshape(data['X_valid'], (-1, 10000))
y_va = np.reshape(data['y_valid'], (-1))
x_test = np.reshape(data['X_test'], (-1, 10000))
y_test = np.reshape(data['y_test'], (-1))

print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_va.shape[0], 'validation samples')
print(x_test.shape[0], 'test samples')

x_train shape: (54000, 10000)
54000 train samples
6000 validation samples
10000 test samples


In [7]:
input_shape = (config.original_size, config.original_size, 1)

num_epochs = config.step

loc_mean_arr = []
sampled_loc_arr = []

def get_next_input(output, i):
  loc, loc_mean = loc_net(output)
  gl_next = gl(loc)
  loc_mean_arr.append(loc_mean)
  sampled_loc_arr.append(loc)
  return gl_next

In [8]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

distributions = tf.contrib.distributions


def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.01)
  return tf.Variable(initial)


def bias_variable(shape):
  initial = tf.constant(0.0, shape=shape)
  return tf.Variable(initial)


def loglikelihood(mean_arr, sampled_arr, sigma):
  mu = tf.stack(mean_arr)  # mu = [timesteps, batch_sz, loc_dim]
  sampled = tf.stack(sampled_arr)  # same shape as mu
  gaussian = distributions.Normal(mu, sigma)
  logll = gaussian.log_prob(sampled)  # [timesteps, batch_sz, loc_dim]
  logll = tf.reduce_sum(logll, 2)
  logll = tf.transpose(logll)  # [batch_sz, timesteps]
  return logll


In [9]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

class GlimpseNet(object):
  """Glimpse network.

  Take glimpse location input and output features for RNN.

  """

  def __init__(self, config, images_ph):
    self.original_size = config.original_size
    self.num_channels = config.num_channels
    self.sensor_size = config.sensor_size
    self.win_size = config.win_size
    self.minRadius = config.minRadius
    self.num_scales = config.num_scales

    self.hg_size = config.hg_size
    self.hl_size = config.hl_size
    self.g_size = config.g_size
    self.loc_dim = config.loc_dim

    self.images_ph = images_ph

    self.init_weights()

  def init_weights(self):
    """ Initialize all the trainable weights."""
    self.w_g0 = weight_variable((self.sensor_size, self.hg_size))
    self.b_g0 = bias_variable((self.hg_size,))
    self.w_l0 = weight_variable((self.loc_dim, self.hl_size))
    self.b_l0 = bias_variable((self.hl_size,))
    self.w_g1 = weight_variable((self.hg_size, self.g_size))
    self.b_g1 = bias_variable((self.g_size,))
    self.w_l1 = weight_variable((self.hl_size, self.g_size))
    self.b_l1 = weight_variable((self.g_size,))

  def get_glimpse(self, loc):
    """Take glimpse on the original images.

    :param loc: 2D tuple locations, values between [-1.0, 1.0]
    :return: glimpse vector
    """
    imgs = tf.reshape(self.images_ph, [
        tf.shape(self.images_ph)[0], self.original_size, self.original_size,
        self.num_channels
    ])

    glimpse_all_scales = []
    for scale in range(1, self.num_scales + 1):
      glimpse_imgs = tf.image.extract_glimpse(imgs,
                                              [self.win_size * scale, self.win_size * scale], loc) # BHWC

      glimpse_imgs = tf.image.resize_bilinear(glimpse_imgs, (self.win_size, self.win_size)) # BHWC
      glimpse_imgs = tf.reshape(glimpse_imgs, [
          tf.shape(loc)[0], self.win_size * self.win_size * self.num_channels
      ]) #(B, H * W * C)

      glimpse_all_scales.append(glimpse_imgs)

    return tf.stack(glimpse_all_scales, axis=1) # (B, H * W * C * S)

  def __call__(self, loc):
    glimpse_input = self.get_glimpse(loc) # (B, H * W * C * S)
    glimpse_input = tf.reshape(glimpse_input,
                               (tf.shape(loc)[0], self.sensor_size))
    g = tf.nn.relu(tf.nn.xw_plus_b(glimpse_input, self.w_g0, self.b_g0))
    g = tf.nn.xw_plus_b(g, self.w_g1, self.b_g1)
    l = tf.nn.relu(tf.nn.xw_plus_b(loc, self.w_l0, self.b_l0))
    l = tf.nn.xw_plus_b(l, self.w_l1, self.b_l1)
    g = tf.nn.relu(g + l)
    return g


class LocNet(object):
  """Location network.

  Take output from other network and produce and sample the next location.

  """

  def __init__(self, config):
    self.loc_dim = config.loc_dim
    self.input_dim = config.cell_output_size
    self.loc_std = config.loc_std
    self._sampling = True

    self.init_weights()

  def init_weights(self):
    self.w = weight_variable((self.input_dim, self.loc_dim))
    self.b = bias_variable((self.loc_dim,))

  def __call__(self, input):
    mean = tf.clip_by_value(tf.nn.xw_plus_b(input, self.w, self.b), -1., 1.)
    mean = tf.stop_gradient(mean)
    if self._sampling:
      loc = mean + tf.random_normal(
          (tf.shape(input)[0], self.loc_dim), stddev=self.loc_std)
      loc = tf.clip_by_value(loc, -1., 1.)
    else:
      loc = mean
    loc = tf.stop_gradient(loc)
    return loc, mean

  @property
  def sampling(self):
    return self._sampling

  @sampling.setter
  def sampling(self, sampling):
    self._sampling = sampling


In [10]:
#import glimpse.py
# placeholders
images_ph = tf.placeholder(tf.float32,
                           [None, config.original_size * config.original_size *
                            config.num_channels])
labels_ph = tf.placeholder(tf.int64, [None])

# Monte Carlo sampling, duplicate M times, see Eqn (2)
images_expanded = tf.tile(images_ph, [config.M, 1])
labels_expanded = tf.tile(labels_ph, [config.M])

# Build the aux nets.
with tf.variable_scope('glimpse_net'):
  # gl = GlimpseNet(config, images_ph)
  gl = GlimpseNet(config, images_expanded)
with tf.variable_scope('loc_net'):
  loc_net = LocNet(config)

# number of examples
# N = tf.shape(images_ph)[0]
N = tf.shape(images_expanded)[0]
init_loc = tf.random_uniform((N, 2), minval=-1, maxval=1)
init_glimpse = gl(init_loc)
# Core network.
lstm_cell = rnn_cell.LSTMCell(config.cell_size, state_is_tuple=True)
init_state = lstm_cell.zero_state(N, tf.float32)
inputs = [init_glimpse]
inputs.extend([0] * (config.num_glimpses))
outputs, _ = seq2seq.rnn_decoder(
    inputs, init_state, lstm_cell, loop_function=get_next_input)

# Time independent baselines
with tf.variable_scope('baseline'):
  w_baseline = weight_variable((config.cell_output_size, 1))
  b_baseline = bias_variable((1,))
baselines = []
for t, output in enumerate(outputs[1:]):
  baseline_t = tf.nn.xw_plus_b(output, w_baseline, b_baseline)
  baseline_t = tf.squeeze(baseline_t)
  baselines.append(baseline_t)
baselines = tf.stack(baselines)  # [timesteps, batch_sz]
baselines = tf.transpose(baselines)  # [batch_sz, timesteps]

# Take the last step only.
output = outputs[-1]
# Build classification network.
with tf.variable_scope('cls'):
  w_logit = weight_variable((config.cell_output_size, config.num_classes))
  b_logit = bias_variable((config.num_classes,))
logits = tf.nn.xw_plus_b(output, w_logit, b_logit)
softmax = tf.nn.softmax(logits)
correct_prediction = tf.equal(tf.argmax(softmax,1), labels_expanded)
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

# average statistics after Monte Carlo sampling "M"
avg_softmax = tf.reshape(softmax, [config.M, -1, config.num_classes])
avg_softmax = tf.reduce_mean(avg_softmax, axis=0) # (B, num_classes)
avg_y_pred = tf.argmax(avg_softmax, axis=1) #(B, )
avg_acc = tf.reduce_mean(tf.cast(tf.equal(avg_y_pred, labels_ph), tf.float32))

# cross-entropy.
xent = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels_expanded)
xent = tf.reduce_mean(xent)

# 0/1 reward.
y_pred = tf.argmax(logits, 1)
reward = tf.cast(tf.equal(y_pred, labels_expanded), tf.float32)
rewards = tf.expand_dims(reward, 1)  # [batch_sz, 1]
rewards = tf.tile(rewards, (1, config.num_glimpses))  # [batch_sz, timesteps]
logll = loglikelihood(loc_mean_arr, sampled_loc_arr, config.loc_std)
advs = rewards - tf.stop_gradient(baselines)
logllratio = tf.reduce_mean(logll * advs)
reward = tf.reduce_mean(reward)

baselines_mse = tf.reduce_mean(tf.square((rewards - baselines)))
var_list = tf.trainable_variables()

# hybrid loss
loss = -logllratio + xent + baselines_mse  # `-` for minimize
grads = tf.gradients(loss, var_list)
grads, _ = tf.clip_by_global_norm(grads, config.max_grad_norm)

# learning rate
global_step = tf.get_variable(
    'global_step', [], initializer=tf.constant_initializer(0), trainable=False)
training_steps_per_epoch = x_train.shape[0] // config.batch_size
starter_learning_rate = config.lr_start
# decay per training epoch
learning_rate = tf.train.exponential_decay(
    starter_learning_rate,
    global_step,
    training_steps_per_epoch,
    config.decay,
    staircase=True)
learning_rate = tf.maximum(learning_rate, config.lr_min)
opt = tf.train.AdamOptimizer(learning_rate)
train_op = opt.apply_gradients(zip(grads, var_list), global_step=global_step)

# tensorboard logging
tf.summary.scalar("loss", loss)
tf.summary.scalar("reward", reward)
tf.summary.scalar("xent", xent)
tf.summary.scalar("baselines_mse", baselines_mse)
tf.summary.scalar("logllratio", logllratio)
tf.summary.scalar("avg_accuracy", avg_acc)
summary_op = tf.summary.merge_all()



Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
The TensorFlow Distributions library has moved to TensorFlow Probability (https://github.com/tensorflow/probability). You should update all references to use `tfp.distributions` instead of `tf.distributions`.
Instructions for updating:
Use tf.cast instead.


In [11]:
# stats
import time
import datetime

In [12]:
saver = tf.train.Saver()

with tf.Session() as sess:
  sess.run(tf.initialize_all_variables())
  writer = tf.summary.FileWriter(logdir=tfLogFile, graph=tf.get_default_graph())
  start___time = time.time()
  for epoch in xrange(num_epochs):
    print("num_epochs: ",epoch)
    start_epoch = time.time()
  
    num_batches = x_train.shape[0] // config.batch_size
    num_samples = num_batches * config.batch_size
    avg_loss = 0.

    for batch in range(num_batches):
      start = batch * config.batch_size
      end = (batch + 1) * config.batch_size
      images, labels = x_train[start:end], y_train[start:end]

      loc_net.samping = True
      avg_acc_val, softmax_val, adv_val, baselines_val, rewards_val, baselines_mse_val, xent_val, logllratio_val, \
          reward_val, loss_val, lr_val, _, summary_val = sess.run(
              [avg_acc, softmax, advs, baselines, rewards, baselines_mse, xent, logllratio,
               reward, loss, learning_rate, train_op, summary_op],
              feed_dict={
                  images_ph: images,
                  labels_ph: labels
              })
      writer.add_summary(summary_val, epoch * num_batches + batch)

      avg_loss += loss_val / num_batches

      if batch and batch % 100 == 0:
        logging.info('epoch {}: batch: {}/{}'.format(epoch, batch, num_batches - 1))
        logging.info('epoch {}: avg_accuracy: {}'.format(epoch, avg_acc_val))
        logging.info('epoch {}: lr = {:3.6f}'.format(epoch, lr_val))
        logging.info(
            'epoch {}: reward = {:3.4f}\tloss = {:3.4f}\txent = {:3.4f}'.format(
                epoch, reward_val, loss_val, xent_val))
        logging.info('llratio = {:3.4f}\tbaselines_mse = {:3.4f}'.format(
            logllratio_val, baselines_mse_val))
        logging.debug('baselines = {}\trewards = {}'.format(baselines_val, rewards_val))

    # if epoch and epoch % training_steps_per_epoch == 0:
    if True: # print each epoch
      # Evaluation
      for dataset in [(x_va, y_va,'va')]:
        num_batches = dataset[0].shape[0] // config.eval_batch_size
        correct_cnt = 0
        num_samples = num_batches * config.eval_batch_size
        loc_net.sampling = True
        for test_step in xrange(num_batches):
          images, labels = dataset[0][test_step * config.eval_batch_size : (test_step+1) * config.eval_batch_size], dataset[1][test_step * config.eval_batch_size : (test_step+1) * config.eval_batch_size]

          avg_y_pred_val = sess.run(avg_y_pred,
                                 feed_dict={
                                     images_ph: images,
                                     labels_ph: labels
                                 })

          correct_cnt += np.sum(avg_y_pred_val == labels)
        acc = correct_cnt / num_samples

        logging.info('epoch {}: valid_accuracy = {}'.format(epoch, acc))
    
    print("time_per_epoch: ",str(time.time() - start_epoch) )
    logging.info('time_per_epoch: {}'.format(time.time() - start_epoch))
    print("time left: ",str(datetime.timedelta(seconds=(time.time() - start___time)/(1+epoch) * (num_epochs-epoch) )))
  logging.info('Training_time = {}'.format(time.time() - start___time))
  for dataset in [(x_test, y_test, 'test')]:
    num_batches = dataset[0].shape[0] // config.eval_batch_size
    correct_cnt = 0
    num_samples = num_batches * config.eval_batch_size
    loc_net.sampling = True
    for test_step in xrange(num_batches):
      images, labels = dataset[0][test_step * config.eval_batch_size: (test_step + 1) * config.eval_batch_size], \
                       dataset[1][test_step * config.eval_batch_size: (test_step + 1) * config.eval_batch_size]

      avg_y_pred_val = sess.run(avg_y_pred,
                                feed_dict={
                                  images_ph: images,
                                  labels_ph: labels
                                })

      correct_cnt += np.sum(avg_y_pred_val == labels)
    acc = correct_cnt / num_samples
    logging.info('test_accuracy = {}'.format(acc))
    
  save_path = saver.save(sess, savedModel_path)
  logging.info('Model saved in file: {}'.format(save_path))
  print('Model saved in file: {}'.format(save_path))
  logging.info('total time = {}'.format(time.time() - start___time))
  print("time: ",time.time() - start___time)
  
  value = str(str(local_device_protos))
  text_tensor = tf.make_tensor_proto(value, dtype=tf.string)
  meta = tf.SummaryMetadata()
  meta.plugin_data.plugin_name = "text"
  summary = tf.Summary()
  summary.value.add(tag="gpu_tag", metadata=meta, tensor=text_tensor)
  writer.add_summary(summary)
  logging.info('gpu = {}'.format(str(local_device_protos)))


Instructions for updating:
Use `tf.global_variables_initializer` instead.
num_epochs:  0
time_per_epoch:  43.8110625743866
time left:  1:13:01.106257
num_epochs:  1
time_per_epoch:  42.51531219482422
time left:  1:11:13.155551
num_epochs:  2
time_per_epoch:  42.76786756515503
time left:  1:10:17.111169
num_epochs:  3
time_per_epoch:  44.9837441444397
time left:  1:10:21.415363
num_epochs:  4
time_per_epoch:  43.57057166099548
time left:  1:09:38.890627
num_epochs:  5
time_per_epoch:  43.03393769264221
time left:  1:08:47.520230
num_epochs:  6
time_per_epoch:  43.233421087265015
time left:  1:08:01.196782
num_epochs:  7
time_per_epoch:  44.02527475357056
time left:  1:07:24.874338
num_epochs:  8
time_per_epoch:  43.946797609329224
time left:  1:06:46.017101
num_epochs:  9
time_per_epoch:  45.61204385757446
time left:  1:06:21.295692
num_epochs:  10
time_per_epoch:  42.81254744529724
time left:  1:05:29.871015
num_epochs:  11
time_per_epoch:  42.71181631088257
time left:  1:04:39.134604


In [13]:
  print('Model saved in file: {}'.format(save_path))

Model saved in file: model-100x100-4glimpse-12x12-4scales-256batch-100epochs.ckpt
