<a href="https://colab.research.google.com/github/JLrumberger/TensorflowTryOuts/blob/master/3_DropConnect_and_BCNNs.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Replace DropOut with DropConnect
Oh wow my first custom layer: [DropConnect](https://cs.nyu.edu/~wanli/dropc/dropc.pdf)


In [0]:
import tensorflow as tf

class DropConnectConv2dLayer(tf.keras.layers.Layer):
  def __init__(self, filter_num, kernel_size, padding, drop_connect, keep_prob):
    super(DropConnectConv2dLayer, self).__init__()
    self.num_outputs = filter_num
    self.kernel_size_h = kernel_size[0]
    self.kernel_size_w = kernel_size[1]
    self.padding = padding
    self.drop_connect = drop_connect
    self.keep_prob = keep_prob
    self.built = False
    
  def build(self,input_shape):
    if self.built == False:
        self.kernel = self.add_variable("kernel", 
                                      shape=[self.kernel_size_h,
                                             self.kernel_size_w,
                                             int(input_shape[-1]), 
                                             self.num_outputs])
    self.built = True

  def call(self,x):
    # input must be NHWC format
    self.build(tf.shape(x))
    if self.drop_connect:
      # multiplying by the keep_prob to undo the weight change in dropout
      kernel = tf.nn.dropout(self.kernel, keep_prob=self.keep_prob) * self.keep_prob
    else:
      kernel = self.kernel
    out = tf.nn.conv2d(
          input=x,
          filter=kernel,
          strides=(1,1,1,1),
          padding = self.padding,
          use_cudnn_on_gpu=True,
          data_format='NHWC',
          dilations=[1, 1, 1, 1])
    return out
  
class DropConnectDenseReluLayer(tf.keras.layers.Layer):
  def __init__(self, nodes, drop_connect, keep_prob):
    super(DropConnectDenseReluLayer, self).__init__()
    self.num_outputs = nodes
    self.drop_connect = drop_connect
    self.keep_prob = keep_prob
    self.built = False
    
  def build(self,input_shape):
    if self.built == False:
        self.kernel = self.add_variable("kernel", 
                                      shape=[int(input_shape[-1]), 
                                             self.num_outputs])
    self.built = True

  def call(self,x):
    # input must be NHWC format
    self.build(tf.shape(x))
    if self.drop_connect:
      # multiplying by the keep_prob to undo the weight change in dropout
      kernel = tf.nn.dropout(self.kernel, keep_prob=self.keep_prob) * self.keep_prob
    else:
      kernel = self.kernel
    return tf.nn.relu(tf.matmul(x, kernel))

# Bayesian Convolutional Neural Network
[Gal and Ghahramani, 2016](https://arxiv.org/pdf/1506.02142.pdf) but with DropConnect instead of DropOut

In [0]:
import json
import os
import pprint
import tensorflow as tf
import time

use_tpu = True #@param {type:"boolean"}
bucket = 'gcolab' #@param {type:"string"}

assert bucket, 'Must specify an existing GCS bucket name'
print('Using bucket: {}'.format(bucket)) #gcolab

if use_tpu:
    assert 'COLAB_TPU_ADDR' in os.environ, 'Missing TPU; did you request a TPU in Notebook Settings?'

MODEL_DIR = 'gs://{}/{}'.format(bucket, time.strftime('tpuestimator/%Y-%m-%d-%H-%M-%S'))
print('Using model dir: {}'.format(MODEL_DIR))

from google.colab import auth
auth.authenticate_user()

if 'COLAB_TPU_ADDR' in os.environ:
  TF_MASTER = 'grpc://{}'.format(os.environ['COLAB_TPU_ADDR'])
  
  # Upload credentials to TPU.
  with tf.Session(TF_MASTER) as sess:    
    with open('/content/adc.json', 'r') as f:
      auth_info = json.load(f)
    tf.contrib.cloud.configure_gcs(sess, credentials=auth_info)
  # Now credentials are set for all future sessions on this TPU.
else:
  TF_MASTER=''

with tf.Session(TF_MASTER) as session:
  pprint.pprint(session.list_devices())

Using bucket: gcolab
Using model dir: gs://gcolab/tpuestimator/2019-01-24-13-01-07
[_DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:CPU:0, CPU, -1, 15154662220332540682),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_CPU:0, XLA_CPU, 17179869184, 14132379737731697547),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:XLA_GPU:0, XLA_GPU, 17179869184, 8747735079900212106),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:0, TPU, 17179869184, 2318908220440493821),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:1, TPU, 17179869184, 77492806112203606),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:2, TPU, 17179869184, 4001567152007306733),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:3, TPU, 17179869184, 100740209273723394),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU:4, TPU, 17179869184, 14292765378049059121),
 _DeviceAttributes(/job:tpu_worker/replica:0/task:0/device:TPU

In [0]:
# Load training and eval datasets
import numpy as np
mnist = tf.contrib.learn.datasets.load_dataset("mnist")
train_data = mnist.train.images # Returns np.array
train_labels = np.asarray(mnist.train.labels, dtype=np.int32)
eval_data = mnist.test.images # Returns np.array
eval_labels = np.asarray(mnist.test.labels, dtype=np.int32)

params = {"x_train": {"x": train_data},
          "y_train": train_labels,
          "x_test" : {"x": eval_data},
          "y_test" : eval_labels,
          "iterations_per_loop": 1024, # after each loop the TPU passes back information to host CPU
          "train_batch_size":1024,
          "eval_batch_size":1024,
          "predict_batch_size":1024,
          "epochs": 20,
          "eval_steps":1024,
          "nb_MC_samples":100}


Extracting MNIST-data/train-images-idx3-ubyte.gz
Extracting MNIST-data/train-labels-idx1-ubyte.gz
Extracting MNIST-data/t10k-images-idx3-ubyte.gz
Extracting MNIST-data/t10k-labels-idx1-ubyte.gz


In [0]:
import numpy as np
import os
import tensorflow as tf

def maxpool2d(x):
    return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')
  
def resblock(x, filters, is_training):
    # MC Dropout
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = DropConnectConv2dLayer(filter_num=filters,kernel_size=(3,3),padding="SAME",drop_connect=True,keep_prob=0.8)(x)
    x = tf.layers.batch_normalization(x,training=is_training)
    x = tf.nn.relu(x)
    x = DropConnectConv2dLayer(filter_num=filters,kernel_size=(3,3),padding="SAME",drop_connect=True,keep_prob=0.8)(x)
    return x


def ResNet(x,is_training):
    filters = 64
    # Reshape input to a 4D tensor 
    x = tf.reshape(x, shape=[-1, 28, 28, 1])
    x =  DropConnectConv2dLayer(filter_num=filters,kernel_size=(3,3),padding="SAME",drop_connect=True,keep_prob=0.8)(x)
    x += resblock(x,filters,is_training)
    x += resblock(x,filters,is_training)
    x += resblock(x,filters,is_training)
    x = maxpool2d(x)
    x += resblock(x,filters,is_training)
    x += resblock(x,filters,is_training)
    x += resblock(x,filters,is_training)
    x = maxpool2d(x)
    x += resblock(x,filters,is_training)
    x += resblock(x,filters,is_training)
    x += resblock(x,filters,is_training)
    x = tf.reshape(x, [-1, 7*7*filters])
    x = DropConnectDenseReluLayer(nodes=1024,drop_connect=True,keep_prob=0.8)(x)
    out = tf.layers.dense(x,10)
    return out
  
def model_fn(features, labels, mode, params):
  # Specify the model
  logits = ResNet(x = features["x"],
                  is_training=(mode == tf.estimator.ModeKeys.TRAIN))

  predictions = {
      # Generate predictions (for PREDICT and EVAL mode)
      "classes": tf.argmax(input=logits, axis=1),
      # Add `softmax_tensor` to the graph. It is used for PREDICT and by the
      # `logging_hook`.
      "probabilities": tf.nn.softmax(logits, name="softmax_tensor")
  }

  if mode == tf.estimator.ModeKeys.PREDICT:
    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, predictions=predictions)

  # Calculate Loss (for both TRAIN and EVAL modes)
  loss = tf.losses.sparse_softmax_cross_entropy(labels=labels, logits=logits)

  # Configure the Training Op (for TRAIN mode)
  if mode == tf.estimator.ModeKeys.TRAIN:
    learning_rate = tf.train.exponential_decay(0.001,
          #FLAGS.learning_rate,
          tf.train.get_global_step(),
          decay_steps=100000,
          decay_rate=0.96)
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
    optimizer = tf.contrib.tpu.CrossShardOptimizer(optimizer)
    # update op for batch_norm layer
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(loss=loss,
            global_step=tf.train.get_global_step())
    return tf.contrib.tpu.TPUEstimatorSpec(mode=mode, loss=loss, train_op=train_op)

  # Add evaluation metrics (for EVAL mode)
  return tf.contrib.tpu.TPUEstimatorSpec(
      mode=mode, loss=loss, eval_metrics=(metric_fn,[labels,logits]))

  
def metric_fn(labels, logits):
    accuracy = tf.metrics.accuracy(
        labels=labels, predictions=tf.argmax(logits, axis=1))
    return {"accuracy": accuracy}

# Create model input functions
def train_input_fn(params):
  # Convert the inputs to a Dataset.
  dataset = tf.data.Dataset.from_tensor_slices((params["x_train"], params["y_train"]))
  # Shuffle, repeat, and batch the examples.
  dataset = dataset.shuffle(1000).repeat()
  dataset = dataset.batch(params["batch_size"], drop_remainder=True)
  return dataset

def eval_input_fn(params):
  # Convert the inputs to a Dataset.
  dataset = tf.data.Dataset.from_tensor_slices((params["x_test"], params["y_test"]))
  # Shuffle, repeat, and batch the examples.
  dataset = dataset.shuffle(1000).repeat()
  dataset = dataset.batch(params["batch_size"], drop_remainder=True)
  return dataset

def predict_input_fn(params):
  # generates MC dropout probabilities
  dataset = tf.data.Dataset.from_tensor_slices(params["x_test"])
  dataset = dataset.repeat(params["nb_MC_samples"])
  dataset = dataset.batch(params["batch_size"])
  return dataset
  
# Create the Estimator
tpu_cluster_resolver = tf.contrib.cluster_resolver.TPUClusterResolver(tpu='grpc://' + os.environ['COLAB_TPU_ADDR'])

run_config = tf.contrib.tpu.RunConfig(
    cluster=tpu_cluster_resolver,
    model_dir=MODEL_DIR, # google cloud services bucket
    session_config=tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=True),
    tpu_config=tf.contrib.tpu.TPUConfig(params["iterations_per_loop"])
)

estimator = tf.contrib.tpu.TPUEstimator(
    model_fn=model_fn,
    use_tpu=True,
    train_batch_size=params["train_batch_size"],
    eval_batch_size=params["eval_batch_size"],
    predict_batch_size=params["predict_batch_size"],
    config=run_config,
    params=params
)

# Train the model

estimator.train(input_fn=train_input_fn, steps=params["epochs"]*params["train_batch_size"])

estimator.evaluate(input_fn= eval_input_fn, steps=params["eval_steps"])

