<a href="https://colab.research.google.com/github/JLrumberger/TensorflowTryOuts/blob/master/4_FishNet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Authenticate gcs bucket

In [0]:
import json
import os
import pprint
import tensorflow as tf
import time

use_tpu = True #@param {type:"boolean"}
bucket = 'gcolab' #@param {type:"string"}

assert bucket, 'Must specify an existing GCS bucket name'
print('Using bucket: {}'.format(bucket)) #gcolab

if use_tpu:
    assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

MODEL_DIR = 'gs://{}/{}'.format(bucket, time.strftime('tpuestimator/%Y-%m-%d-%H-%M-%S'))
print('Using model dir: {}'.format(MODEL_DIR))

from google.colab import auth
auth.authenticate_user()

if 'COLAB_TPU_ADDR' in os.environ:
  TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])
  
  # Upload credentials to TPU.
  with tf.Session(TF_MASTER) as sess:    
    with open('/content/adc.json', 'r') as f:
      auth_info = json.load(f)
    tf.contrib.cloud.configure_gcs(sess, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.
else:
  TF_MASTER=''

with tf.Session(TF_MASTER) as session:
  pprint.pprint(session.list_devices())

Using bucket: gcolab
Using model dir: gs://gcolab/tpuestimator/2019-01-24-13-01-07
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 15154662220332540682),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 14132379737731697547),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 8747735079900212106),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2318908220440493821),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 77492806112203606),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 4001567152007306733),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 100740209273723394),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 14292765378049059121),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU

Load data and set parameters

In [0]:
# Load training and eval datasets
import numpy as np
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

params = {"x_train": {"x": train_data},
          "y_train": train_labels,
          "x_test" : {"x": eval_data},
          "y_test" : eval_labels,
          "iterations_per_loop": 1024, # after each loop the TPU passes back information to host CPU
          "train_batch_size":1024,
          "eval_batch_size":1024,
          "predict_batch_size":1024,
          "epochs": 20}


Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz


#  FishNet Backbone
A little [FishNet](https://papers.nips.cc/paper/7356-fishnet-a-versatile-backbone-for-image-region-and-pixel-level-prediction.pdf) implementation

![alt text](https://raw.githubusercontent.com/kevin-ssy/FishNet/master/head_pic.jpg)

## Upsampling & Refinement Block
.. and the rest of the blocks used

In [0]:
def bottleneck_Resblock(x, filters, is_training,dilation):
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,filters,(1,1),padding='same')    
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,filters,(3,3),padding='same',dilation_rate=dilation)    
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,filters,(1,1),padding='same')    
    return x
  
def resblock(x, filters, is_training):
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,filters,(3,3),padding='same')
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = tf.layers.conv2d(x,filters,(3,3),padding='same')
    return x
  
def fishblock(a_prev,a_trans, is_training, k, mode):
    # a_prev contains the feature maps from the previous layer
    # a_trans contains the feature maps transfered from the fishtail
    x_sh = tf.concat([a_prev,a_trans], axis=3)
    n,h,w,c = x_sh.get_shape().as_list()
    m = bottleneck_Resblock(x_sh, c//k, is_training,(1, 1))
    if mode == "UP":
        r = tf.reduce_sum(tf.reshape(x_sh,[n, h, w, c//k, k]), axis=4,keepdims=False)
        out = tf.layers.conv2d_transpose(m+r,c//k,(2,2),(2,2))
    if mode == "DOWN":
        out = tf.nn.max_pool(x_sh+m, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
    return out
  
def se_block(x, out_ch, is_training):
    n,h,w,c = x.get_shape()
    squeeze = tf.layers.average_pooling2d(x,[h,w],[h,w],"valid")
    excitation = tf.layers.conv2d(squeeze,out_ch//16,(1,1),padding='same')
    excitation = tf.nn.relu(excitation)
    excitation = tf.layers.conv2d(excitation,out_ch,(1,1),padding='same')
    excitation = tf.nn.sigmoid(excitation)
    excitation = tf.reshape(excitation, [-1,1,1,out_ch])
    scale = x * excitation
    return scale
  
def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
  
def Fishnet(x,is_training):
    """ original 224->56->28->14->7->1->7->14->28->56->28->14->7->1
        ours 28->14->7->1->7->14->28->14->7->1 for mnist
    """
    filters = 32
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    # TAIL
    x = tf.layers.conv2d(x, filters=filters, kernel_size=(3,3),padding='same')
    hw_28 = x
    x += resblock(x,filters,is_training)
    x = maxpool2d(x) # pools to 14x14
    hw_14 = x
    x += resblock(x,filters,is_training)
    x = maxpool2d(x) # pools to 7x7
    hw_7 = x
    x += resblock(x,filters,is_training)
    x += se_block(x,32,is_training) 
    # BODY
    x = fishblock(x,hw_7,is_training,2,"UP") # resizes to 14x14
    hw_14_2 = x
    x += resblock(x,filters,is_training)
    x = fishblock(x,hw_14,is_training,2,"UP") # resizes to 28x28
    hw_28_2 = x
    x += resblock(hw_28_2,filters,is_training)
    # HEAD
    x = fishblock(x,hw_28_2,is_training,1,"DOWN") # resizes to 14x14, outputs c(32,32) = 64 channels
    filters = 96
    x = fishblock(x,hw_14_2,is_training,1,"DOWN") # resizes to 7x7, outputs c(64,32) = 96 channels
    x = tf.reshape(x, [-1, 7*7*filters])
    x = tf.layers.dense(x,512,activation='relu')
    out = tf.layers.dense(x,10)
    return out
    

In [0]:
def model_fn(features, labels, mode, params):
  # Specify the model
  logits = Fishnet(x = features["x"],
                  is_training=(mode == tf.estimator.ModeKeys.TRAIN))

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions)

  # Calculate Loss (for both TRAIN and EVAL modes)
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)
  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    learning_rate = tf.train.exponential_decay(0.001,
          #FLAGS.learning_rate,
          tf.train.get_global_step(),
          decay_steps=100000,
          decay_rate=0.96)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
    # update op for batch_norm layer
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss=loss,
            global_step=tf.train.get_global_step())
    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
  return tf.contrib.tpu.TPUEstimatorSpec(
      mode=mode, loss=loss, eval_metrics=(metric_fn,[labels,logits]))

  
def metric_fn(labels, logits):
    accuracy = tf.metrics.accuracy(
        labels=labels, predictions=tf.argmax(logits, axis=1))
    return {"accuracy": accuracy}

# Create model input functions
def train_input_fn(params):
  # Convert the inputs to a Dataset.
  dataset = tf.data.Dataset.from_tensor_slices((params["x_train"], params["y_train"]))
  # Shuffle, repeat, and batch the examples.
  dataset = dataset.shuffle(1000).repeat()
  dataset = dataset.batch(params["batch_size"], drop_remainder=True)
  return dataset

def eval_input_fn(params):
  # Convert the inputs to a Dataset.
  dataset = tf.data.Dataset.from_tensor_slices((params["x_test"], params["y_test"]))
  # Shuffle, repeat, and batch the examples.
  dataset = dataset.shuffle(1000).repeat()
  dataset = dataset.batch(params["batch_size"], drop_remainder=True)
  return dataset

def predict_input_fn(params):
  # generates MC dropout probabilities
  dataset = tf.data.Dataset.from_tensor_slices(params["x_test"])
  dataset = dataset.repeat(params["nb_MC_samples"])
  dataset = dataset.batch(params["batch_size"])
  return dataset
  
# Create the Estimator
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])

run_config = tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=MODEL_DIR, # google cloud services bucket
    session_config=tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=True),
    tpu_config=tf.contrib.tpu.TPUConfig(params["iterations_per_loop"])
)

estimator = tf.contrib.tpu.TPUEstimator(
    model_fn=model_fn,
    use_tpu=True,
    train_batch_size=params["train_batch_size"],
    eval_batch_size=params["eval_batch_size"],
    predict_batch_size=params["predict_batch_size"],
    config=run_config,
    params=params
)

# Train the model

estimator.train(input_fn=train_input_fn, steps=params["epochs"]*params["train_batch_size"])

estimator.evaluate(input_fn= eval_input_fn, steps=params["eval_steps"])



INFO:tensorflow:Using config: {'_model_dir': 'gs://gcolab/tpuestimator/2019-01-24-13-01-07', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
log_device_placement: true
cluster_def {
  job {
    name: "worker"
    tasks {
      value: "10.80.133.34:8470"
    }
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': None, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f559109f358>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': b'grpc://10.80.133.34:8470', '_evaluation_master': b'grpc://10.80.133.34:8470', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1, '_tpu_config': TPUConfig(iterations_per_loop=1024, n

{'accuracy': 0.9876995, 'global_step': 20480, 'loss': 0.036881298}