<a href="https://colab.research.google.com/github/JoyeBright/Semi-supervised-sentiment/blob/PiEnsembling/PiEnsembling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow==1.13.2

Collecting tensorflow==1.13.2
[?25l  Downloading https://files.pythonhosted.org/packages/db/d3/651f95288a6cd9094f7411cdd90ef12a3d01a268009e0e3cd66b5c8d65bd/tensorflow-1.13.2-cp36-cp36m-manylinux1_x86_64.whl (92.6MB)
[K     |████████████████████████████████| 92.6MB 33kB/s 
Collecting tensorflow-estimator<1.14.0rc0,>=1.13.0
[?25l  Downloading https://files.pythonhosted.org/packages/bb/48/13f49fc3fa0fdf916aa1419013bb8f2ad09674c275b4046d5ee669a46873/tensorflow_estimator-1.13.0-py2.py3-none-any.whl (367kB)
[K     |████████████████████████████████| 368kB 32.4MB/s 
Collecting tensorboard<1.14.0,>=1.13.0
[?25l  Downloading https://files.pythonhosted.org/packages/0f/39/bdd75b08a6fba41f098b6cb091b9e8c7a80e1b4d679a581a0ccd17b10373/tensorboard-1.13.1-py3-none-any.whl (3.2MB)
[K     |████████████████████████████████| 3.2MB 42.7MB/s 
Collecting mock>=2.0.0
  Downloading https://files.pythonhosted.org/packages/cd/74/d72daf8dff5b6566db857cfd088907bb0355f5dd2914c4b3ef065c790735/mock-4.0.2-py3-non

In [2]:
import math
import numpy as np 
import tensorflow as tf

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


## **Some Necessary Imports and Modules**

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorflow.python.eager import context
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.layers import base
from tensorflow.python.layers import utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.util.tf_export import tf_export


class _Conv(base.Layer):

    def __init__(self, rank,
                 filters,
                 kernel_size,
                 strides=1,
                 padding='valid',
                 data_format='channels_last',
                 dilation_rate=1,
                 activation=None,
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 weight_norm=True,
                 mean_only_batch_norm=True,
                 mean_only_batch_norm_momentum=0.99,
                 name=None,
                 **kwargs):
        super(_Conv, self).__init__(trainable=trainable, name=name,
                                    activity_regularizer=activity_regularizer,
                                    **kwargs)
        self.rank = rank
        self.filters = filters
        self.kernel_size = utils.normalize_tuple(
            kernel_size, rank, 'kernel_size')
        self.strides = utils.normalize_tuple(strides, rank, 'strides')
        self.padding = utils.normalize_padding(padding)
        self.data_format = utils.normalize_data_format(data_format)
        self.dilation_rate = utils.normalize_tuple(
            dilation_rate, rank, 'dilation_rate')
        self.activation = activation
        self.use_bias = use_bias
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
        self.kernel_constraint = kernel_constraint
        self.bias_constraint = bias_constraint
        self.input_spec = base.InputSpec(ndim=self.rank + 2)
        self.weight_norm = weight_norm
        self.mean_only_batch_norm = mean_only_batch_norm
        self.mean_only_batch_norm_momentum = mean_only_batch_norm_momentum

    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape[channel_axis].value is None:
            raise ValueError('The channel dimension of the inputs '
                             'should be defined. Found `None`.')
        input_dim = input_shape[channel_axis].value
        kernel_shape = self.kernel_size + (input_dim, self.filters)

        self.kernel = self.add_variable(name='kernel',
                                        shape=kernel_shape,
                                        initializer=self.kernel_initializer,
                                        regularizer=self.kernel_regularizer,
                                        constraint=self.kernel_constraint,
                                        trainable=True,
                                        dtype=self.dtype)

        if self.weight_norm:
            self.V = self.add_variable(name='V_weight_norm',
                                       shape=kernel_shape,
                                       dtype=tf.float32,
                                       initializer=tf.random_normal_initializer(
                                           0, 0.05),
                                       trainable=True)
            self.g = self.add_variable(name='g_weight_norm',
                                       shape=(self.filters,),
                                       initializer=init_ops.ones_initializer(),
                                       dtype=self.dtype,
                                       trainable=True)
        if self.mean_only_batch_norm:
            self.batch_norm_running_average = []

        if self.use_bias:
            self.bias = self.add_variable(name='bias',
                                          shape=(self.filters,),
                                          initializer=self.bias_initializer,
                                          regularizer=self.bias_regularizer,
                                          constraint=self.bias_constraint,
                                          trainable=True,
                                          dtype=self.dtype)
        else:
            self.bias = None
        self.input_spec = base.InputSpec(ndim=self.rank + 2,
                                         axes={channel_axis: input_dim})
        self._convolution_op = nn_ops.Convolution(
            input_shape,
            filter_shape=self.kernel.get_shape(),
            dilation_rate=self.dilation_rate,
            strides=self.strides,
            padding=self.padding.upper(),
            data_format=utils.convert_data_format(self.data_format,
                                                  self.rank + 2))
        self.built = True

    def call(self, inputs, training=True):
        if self.weight_norm:
            self.kernel = tf.reshape(
                self.g, [1, 1, 1, self.filters])*tf.nn.l2_normalize(self.V, [0, 1, 2])

        outputs = self._convolution_op(inputs, self.kernel)

        if self.mean_only_batch_norm:
            mean = tf.reduce_mean(outputs, reduction_indices=0)
            if training:
                # If first iteration
                if self.batch_norm_running_average == []:
                    self.batch_norm_running_average = mean
                else:
                    self.batch_norm_running_average = self.batch_norm_running_average * \
                        self.mean_only_batch_norm_momentum + mean * \
                        (1-self.mean_only_batch_norm_momentum)
                outputs = outputs - mean
            else:
                outputs = outputs - self.batch_norm_running_average

        if self.use_bias:
            if self.data_format == 'channels_first':
                if self.rank == 1:
                    # nn.bias_add does not accept a 1D input tensor.
                    bias = array_ops.reshape(self.bias, (1, self.filters, 1))
                    outputs += bias
                if self.rank == 2:
                    outputs = nn.bias_add(
                        outputs, self.bias, data_format='NCHW')
                if self.rank == 3:
                    # As of Mar 2017, direct addition is significantly slower than
                    # bias_add when computing gradients. To use bias_add, we collapse Z
                    # and Y into a single dimension to obtain a 4D input tensor.
                    outputs_shape = outputs.shape.as_list()
                    if outputs_shape[0] is None:
                        outputs_shape[0] = -1
                    outputs_4d = array_ops.reshape(outputs,
                                                   [outputs_shape[0], outputs_shape[1],
                                                    outputs_shape[2] *
                                                    outputs_shape[3],
                                                    outputs_shape[4]])
                    outputs_4d = nn.bias_add(
                        outputs_4d, self.bias, data_format='NCHW')
                    outputs = array_ops.reshape(outputs_4d, outputs_shape)
            else:
                outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs

    def compute_output_shape(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape).as_list()
        if self.data_format == 'channels_last':
            space = input_shape[1:-1]
            new_space = []
            for i in range(len(space)):
                new_dim = utils.conv_output_length(
                    space[i],
                    self.kernel_size[i],
                    padding=self.padding,
                    stride=self.strides[i],
                    dilation=self.dilation_rate[i])
                new_space.append(new_dim)
            return tensor_shape.TensorShape([input_shape[0]] + new_space +
                                            [self.filters])
        else:
            space = input_shape[2:]
            new_space = []
            for i in range(len(space)):
                new_dim = utils.conv_output_length(
                    space[i],
                    self.kernel_size[i],
                    padding=self.padding,
                    stride=self.strides[i],
                    dilation=self.dilation_rate[i])
                new_space.append(new_dim)
            return tensor_shape.TensorShape([input_shape[0], self.filters] +
                                            new_space)


@tf_export('layers.Conv2D')
class Conv2D(_Conv):

    def __init__(self, filters,
                 kernel_size,
                 strides=(1, 1),
                 padding='valid',
                 data_format='channels_last',
                 dilation_rate=(1, 1),
                 activation=None,
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 weight_norm=True,
                 mean_only_batch_norm=True,
                 name=None,
                 **kwargs):
        super(Conv2D, self).__init__(
            rank=2,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            data_format=data_format,
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            trainable=trainable,
            weight_norm=weight_norm,
            mean_only_batch_norm=mean_only_batch_norm,
            name=name, **kwargs)


@tf_export('layers.conv2d')
def conv2d(inputs,
           filters,
           kernel_size,
           strides=(1, 1),
           padding='valid',
           data_format='channels_last',
           dilation_rate=(1, 1),
           activation=None,
           use_bias=True,
           kernel_initializer=None,
           bias_initializer=init_ops.zeros_initializer(),
           kernel_regularizer=None,
           bias_regularizer=None,
           activity_regularizer=None,
           kernel_constraint=None,
           bias_constraint=None,
           weight_norm=True,
           mean_only_batch_norm=True,
           trainable=True,
           name=None,
           reuse=None):
  
    layer = Conv2D(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        trainable=trainable,
        name=name,
        dtype=inputs.dtype.base_dtype,
        _reuse=reuse,
        _scope=name)
    return layer.apply(inputs)

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import six
from six.moves import xrange  # pylint: disable=redefined-builtin
import numpy as np

import tensorflow as tf

from tensorflow.python.eager import context
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.layers import base
from tensorflow.python.layers import utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import standard_ops
from tensorflow.python.util.tf_export import tf_export


@tf_export('layers.Dense')
class Dense(base.Layer):

    def __init__(self, units,
                 activation=None,
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 weight_norm=True,
                 mean_only_batch_norm=True,
                 mean_only_batch_norm_momentum=0.99,
                 name=None,
                 **kwargs):
        super(Dense, self).__init__(trainable=trainable, name=name,
                                    activity_regularizer=activity_regularizer,
                                    **kwargs)
        self.units = units
        self.activation = activation
        self.use_bias = use_bias
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
        self.kernel_constraint = kernel_constraint
        self.bias_constraint = bias_constraint
        self.input_spec = base.InputSpec(min_ndim=2)
        self.weight_norm = weight_norm
        self.mean_only_batch_norm = mean_only_batch_norm
        self.mean_only_batch_norm_momentum = mean_only_batch_norm_momentum

    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape[-1].value is None:
            raise ValueError('The last dimension of the inputs to `Dense` '
                             'should be defined. Found `None`.')
        self.input_spec = base.InputSpec(min_ndim=2,
                                         axes={-1: input_shape[-1].value})
        self.kernel = self.add_variable('kernel',
                                        shape=[
                                            input_shape[-1].value, self.units],
                                        initializer=self.kernel_initializer,
                                        regularizer=self.kernel_regularizer,
                                        constraint=self.kernel_constraint,
                                        dtype=self.dtype,
                                        trainable=True)

        if self.weight_norm:
            self.V = self.add_variable(name='V_weight_norm',
                                       shape=[
                                            input_shape[-1].value, self.units],
                                       dtype=tf.float32,
                                       initializer=tf.random_normal_initializer(
                                           0, 0.05),
                                       trainable=True)
            self.g = self.add_variable(name='g_weight_norm',
                                       shape=(self.units,),
                                       initializer=init_ops.ones_initializer(),
                                       dtype=self.dtype,
                                       trainable=True)

        if self.mean_only_batch_norm:
            self.batch_norm_running_average = []

        if self.use_bias:
            self.bias = self.add_variable('bias',
                                          shape=[self.units, ],
                                          initializer=self.bias_initializer,
                                          regularizer=self.bias_regularizer,
                                          constraint=self.bias_constraint,
                                          dtype=self.dtype,
                                          trainable=True)
        else:
            self.bias = None
        self.built = True

    def call(self, inputs, training=True):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()

        if self.weight_norm:
            inputs = tf.matmul(inputs, self.V)
            scaler = self.g/tf.sqrt(tf.reduce_sum(tf.square(self.V), [0]))
            outputs = tf.reshape(scaler, [1, self.units])*inputs
        else:
            if len(shape) > 2:
                # Broadcasting is required for the inputs.
                outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
                                                                       [0]])
                # Reshape the output back to the original ndim of the input.
                if not context.executing_eagerly():
                    output_shape = shape[:-1] + [self.units]
                    outputs.set_shape(output_shape)
            else:
                outputs = gen_math_ops.mat_mul(inputs, self.kernel)


        if self.mean_only_batch_norm:
            mean = tf.reduce_mean(outputs, reduction_indices=0)
            if training:
                # If first iteration
                if self.batch_norm_running_average == []:
                    self.batch_norm_running_average = mean
                else:
                    self.batch_norm_running_average = self.batch_norm_running_average * \
                        self.mean_only_batch_norm_momentum + mean * \
                        (1-self.mean_only_batch_norm_momentum)
                    outputs = outputs - mean
            else:
                outputs = outputs - self.batch_norm_running_average

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs

    def compute_output_shape(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        input_shape = input_shape.with_rank_at_least(2)
        if input_shape[-1].value is None:
            raise ValueError(
                'The innermost dimension of input_shape must be defined, but saw: %s'
                % input_shape)
        return input_shape[:-1].concatenate(self.units)


@tf_export('layers.dense')
def dense(
        inputs, units,
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=init_ops.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None,
        weight_norm=True,
        mean_only_batch_norm=True,
        trainable=True,
        name=None,
        reuse=None):

    layer = Dense(units,
                  activation=activation,
                  use_bias=use_bias,
                  kernel_initializer=kernel_initializer,
                  bias_initializer=bias_initializer,
                  kernel_regularizer=kernel_regularizer,
                  bias_regularizer=bias_regularizer,
                  activity_regularizer=activity_regularizer,
                  kernel_constraint=kernel_constraint,
                  bias_constraint=bias_constraint,
                  trainable=trainable,
                  weight_norm=weight_norm,
                  mean_only_batch_norm=mean_only_batch_norm,
                  name=name,
                  dtype=inputs.dtype.base_dtype,
                  _scope=name,
                  _reuse=reuse)
    return layer.apply(inputs)

**Loss of Pi Ensembling Model**

In [0]:
def pi_model_loss(X_train_labeled, y_train_labeled, X_train_unlabeled,
                  pi_model, unsupervised_weight):

    z_labeled = pi_model(X_train_labeled)
    z_labeled_i = pi_model(X_train_labeled)

    z_unlabeled = pi_model(X_train_unlabeled)
    z_unlabeled_i = pi_model(X_train_unlabeled)

    # Loss = Supervised loss + unsup loss of labeled sample + unsup loss unlabeled sample (Unsupervised Loss)
    return tf.losses.softmax_cross_entropy(
        y_train_labeled, z_labeled) + unsupervised_weight * (
            tf.losses.mean_squared_error(z_labeled, z_labeled_i) +
            tf.losses.mean_squared_error(z_unlabeled, z_unlabeled_i))

## **Pi Gradient Generation**

In [0]:
def pi_model_gradients(X_train_labeled, y_train_labeled, X_train_unlabeled,
                       pi_model, unsupervised_weight):
  
    with tf.GradientTape() as tape:
        loss_value = pi_model_loss(X_train_labeled, y_train_labeled, X_train_unlabeled,
                                   pi_model, unsupervised_weight)
    return loss_value, tape.gradient(loss_value, pi_model.variables)

## **Ramp-up and -down Function**
NB: keep it slow in intial epochs

In [0]:
def ramp_up_function(epoch, epoch_with_max_rampup=80):

    if epoch < epoch_with_max_rampup:
        p = max(0.0, float(epoch)) / float(epoch_with_max_rampup)
        p = 1.0 - p
        return math.exp(-p*p*5.0)
    else:
        return 1.0

In [0]:
def ramp_down_function(epoch, num_epochs):
  
    epoch_with_max_rampdown = 50

    if epoch >= (num_epochs - epoch_with_max_rampdown):
        ep = (epoch - (num_epochs - epoch_with_max_rampdown)) * 0.5
        return math.exp(-(ep * ep) / epoch_with_max_rampdown)
    else:
        return 1.0

### **Pi-Model Class**

In [0]:
class PiModel(tf.keras.Model):

    def __init__(self):
    
        
        super(PiModel, self).__init__()
        self._conv1a = Conv2D(filters=128, kernel_size=[3, 3],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._conv1b = Conv2D(filters=128, kernel_size=[3, 3],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._conv1c = Conv2D(filters=128, kernel_size=[3, 3],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._pool1 = tf.keras.layers.MaxPool2D(
            pool_size=2, strides=2, padding="same")
        self._dropout1 = tf.keras.layers.Dropout(0.5)

        self._conv2a = Conv2D(filters=256, kernel_size=[3, 3],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._conv2b = Conv2D(filters=256, kernel_size=[3, 3],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._conv2c = Conv2D(filters=256, kernel_size=[3, 3],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._pool2 = tf.keras.layers.MaxPool2D(
            pool_size=2, strides=2, padding="same")
        self._dropout2 = tf.keras.layers.Dropout(0.5)

        self._conv3a = Conv2D(filters=512, kernel_size=[3, 3],
                                                        padding="valid", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._conv3b = Conv2D(filters=256, kernel_size=[1, 1],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)
        self._conv3c = Conv2D(filters=128, kernel_size=[1, 1],
                                                        padding="same", activation=tf.keras.layers.LeakyReLU(alpha=0.1),
                                                        kernel_initializer=tf.keras.initializers.he_uniform(),
                                                        bias_initializer=tf.keras.initializers.constant(
                                                            0.1),
                                                        weight_norm=True, mean_only_batch_norm=True)

        self._dense = Dense(units=10, activation=tf.nn.softmax,
                                                     kernel_initializer=tf.keras.initializers.he_uniform(),
                                                     bias_initializer=tf.keras.initializers.constant(
                                                         0.1),
                                                     weight_norm=True, mean_only_batch_norm=True)

    def __aditive_gaussian_noise(self, input, std):

        noise = tf.random_normal(shape=tf.shape(
            input), mean=0.0, stddev=std, dtype=tf.float32)
        return input + noise

    def __apply_image_augmentation(self, image):

        random_shifts = np.random.randint(-2, 2, (image.numpy().shape[0], 2))
        random_transformations = tf.contrib.image.translations_to_projective_transforms(
            random_shifts)
        image = tf.contrib.image.transform(image, random_transformations, 'NEAREST',
                                           output_shape=tf.convert_to_tensor(image.numpy().shape[1:3], dtype=np.int32))
        return image

    def call(self, input, training=True):

        if training:
            h = self.__aditive_gaussian_noise(input, 0.15)
            h = self.__apply_image_augmentation(h)
        else:
            h = input

        h = self._conv1a(h, training)
        h = self._conv1b(h, training)
        h = self._conv1c(h, training)
        h = self._pool1(h)
        h = self._dropout1(h, training=training)

        h = self._conv2a(h, training)
        h = self._conv2b(h, training)
        h = self._conv2c(h, training)
        h = self._pool2(h)
        h = self._dropout2(h, training=training)

        h = self._conv3a(h, training)
        h = self._conv3b(h, training)
        h = self._conv3c(h, training)

        # Average Pooling
        h = tf.reduce_mean(h, reduction_indices=[1, 2])
        return self._dense(h, training)

## **DataSet Preparation**

In [0]:
from scipy.io import loadmat


In [0]:
import tensorflow as tf

class TfrecordLoader:

    def __init__(self, dataset_path, batch_size, epochs, image_size, num_classes, 
    fraction_of_labeled_per_batch=1.0, fraction_of_unlabeled_per_batch=1.0, shuffle=True):

        assert (fraction_of_labeled_per_batch <=1.0 and fraction_of_labeled_per_batch > 0),"Fraction should be between 0 and 1"
        self._dataset_path = dataset_path
        self._labeled_tfrecord_path = dataset_path + '/labeled_train.tfrecords'
        self._unlabeled_tfrecord_path = dataset_path + '/unlabeled_train.tfrecords'
        self._validation_tfrecord_path = dataset_path + '/validation_set.tfrecords'
        self._test_tfrecord_path = dataset_path + '/test_set.tfrecords'
        self._batch_size = batch_size
        self._epochs = epochs
        self._image_size = image_size
        self._num_classes = num_classes
        self._fraction_of_labeled_per_batch = fraction_of_labeled_per_batch
        self._fraction_of_unlabeled_per_batch = fraction_of_unlabeled_per_batch
        self._shuffle = shuffle

    def load_dataset(self):

        def __tfrecord_parser(sample):
            """ Helper parser
            """
            # Image index is needed to keep track of the temporal ensembling past predictions 
            # without loosing the shuffle batches
            keys_to_features = {
                'image': tf.FixedLenFeature(
                    [self._image_size[0]*self._image_size[1]*self._image_size[2]], tf.float32),
                'label': tf.FixedLenFeature([], tf.int64),
                'image_index': tf.FixedLenFeature([], tf.int64)
            }
            parsed_features = tf.parse_single_example(sample, keys_to_features)
            image = tf.reshape(parsed_features['image'], self._image_size)
            label = tf.one_hot(tf.cast(parsed_features['label'], tf.int64), self._num_classes)
            return image, label, tf.cast(parsed_features['image_index'], tf.int64)
        
        labeled_train_dataset = tf.data.TFRecordDataset([self._labeled_tfrecord_path])
        if self._shuffle:
            labeled_train_dataset = labeled_train_dataset.shuffle(10000, seed=None, reshuffle_each_iteration=True)
            
        labeled_train_dataset = labeled_train_dataset.repeat(self._epochs*1000)
        labeled_train_dataset = labeled_train_dataset.map(__tfrecord_parser)
        if self._fraction_of_labeled_per_batch == 1.0:
            labeled_train_dataset = labeled_train_dataset.batch(self._batch_size)
        else:
            labeled_train_dataset = labeled_train_dataset.batch(
                round(self._batch_size*self._fraction_of_labeled_per_batch))
        
        train_labeled_iterator = labeled_train_dataset.make_one_shot_iterator()
        

        unlabeled_train_dataset = tf.data.TFRecordDataset([self._unlabeled_tfrecord_path])
        if self._shuffle:
            unlabeled_train_dataset = unlabeled_train_dataset.shuffle(10000)

        unlabeled_train_dataset = unlabeled_train_dataset.repeat(self._epochs)
        unlabeled_train_dataset = unlabeled_train_dataset.map(__tfrecord_parser)
        if self._fraction_of_labeled_per_batch == 1.0:
            unlabeled_train_dataset = unlabeled_train_dataset.batch(self._batch_size)
        else:
            unlabeled_train_dataset = unlabeled_train_dataset.batch(
                round(self._batch_size*self._fraction_of_unlabeled_per_batch))

        train_unlabeled_iterator = unlabeled_train_dataset.make_one_shot_iterator()

        validation_dataset = tf.data.TFRecordDataset([self._validation_tfrecord_path])
        if self._shuffle:
            validation_dataset = validation_dataset.shuffle(10000)
        validation_dataset = validation_dataset.repeat(self._epochs)
        validation_dataset = validation_dataset.map(__tfrecord_parser)
        validation_dataset = validation_dataset.batch(self._batch_size)
        validation_iterator = validation_dataset.make_one_shot_iterator()

        test_dataset = tf.data.TFRecordDataset([self._test_tfrecord_path])
        if self._shuffle:
            test_dataset = test_dataset.shuffle(10000)
        test_dataset = test_dataset.repeat(self._epochs)
        test_dataset = test_dataset.map(__tfrecord_parser)
        test_dataset = test_dataset.batch(self._batch_size)
        test_iterator = test_dataset.make_one_shot_iterator()

        return train_labeled_iterator, train_unlabeled_iterator, validation_iterator, test_iterator

In [0]:
import sys
class SvnhLoader:

    # Constant attributes
    _NUM_TOTAL_SAMPLES = 99289
    _TRAIN_URL = 'http://ufldl.stanford.edu/housenumbers/train_32x32.mat'
    _TEST_URL = 'http://ufldl.stanford.edu/housenumbers/test_32x32.mat'
    _IMAGE_SIZE = [32, 32, 3]
    _NUM_CLASSES = 10

    def __init__(self, dataset_path, num_train_samples, num_validation_samples,
                 num_labeled_samples, random_seed=666):


        self._dataset_path = dataset_path
        self._num_train_samples = num_train_samples
        self._num_test_samples = self._NUM_TOTAL_SAMPLES - self._num_train_samples
        self._num_validation_samples = num_validation_samples
        self._num_labeled_samples = num_labeled_samples
        self._num_unlabeled_train_samples = num_train_samples - \
            num_validation_samples - num_labeled_samples
        self._random_seed = random_seed

    def __normalize_and_prepare_dataset(self, mat_dataset):

        # Convert data to numpy array
        X = mat_dataset['X'].astype(np.float64)

        # Original dataset comes with wrong order in the dimensions
        X = X.transpose((3, 0, 1, 2))

        # Convert it to zero mean and unit variance
        X -= np.mean(X, axis=(1, 2, 3), keepdims=True)
        X /= (np.mean(X ** 2, axis=(1, 2, 3), keepdims=True) ** 0.5)

        X = X.reshape([X.shape[0], -1])
        y = mat_dataset['y'].flatten().astype(np.int32)
        # 0 is label 10
        y[y == 10] = 0

        return X, y

    def __download_and_extract_dataset(self):

        filepath_train = self._dataset_path + '/train_32x32.mat'
        print(filepath_train)
        filepath_test = self._dataset_path + '/test_32x32.mat'

        def download_progress(count, block_size, total_size):
            sys.stdout.write('\r>> Downloading %.1f%%' % (
                float(count * block_size) / float(total_size) * 100.0))
            sys.stdout.flush()

        # Download dataset
        urllib.request.urlretrieve(
            self._TRAIN_URL, filepath_train, download_progress)
        urllib.request.urlretrieve(
            self._TEST_URL, filepath_test, download_progress)

        print('\n')

        # Load resultant mat files
        train_data = loadmat(filepath_train)
        test_data = loadmat(filepath_test)

        # Normalize between 0 and 1
        train_X, train_y = self.__normalize_and_prepare_dataset(train_data)
        test_X, test_y = self.__normalize_and_prepare_dataset(test_data)

        # Remove mat files
        os.remove(filepath_train)
        os.remove(filepath_test)

        return train_X, train_y, test_X, test_y

    def __generate_tfrecord(self, images, labels, filename):

        # If we are taking care of unlabeled data
        if labels == []:
            pass
        elif images.shape[0] != labels.shape[0]:
            raise ValueError("Images size %d does not match label size %d." %
                             (images.shape[0], labels.shape[0]))

        print('Writing', filename)

        writer = tf.python_io.TFRecordWriter(filename)

        # Write each image for the tfrecords file
        for index in range(images.shape[0]):
            image = images[index].tolist()

            # If unlabeled dataset label is -1
            if labels == []:
                current_label = -1
            else:
                current_label = int(labels[index])

            # Image index is needed to keep track of the temporal ensembling past predictions 
            # without loosing the shuffle batches
            sample = tf.train.Example(features=tf.train.Features(feature={
                'height': tf.train.Feature(int64_list=tf.train.Int64List(value=[32])),
                'width': tf.train.Feature(int64_list=tf.train.Int64List(value=[32])),
                'depth': tf.train.Feature(int64_list=tf.train.Int64List(value=[3])),
                'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[current_label])),
                'image': tf.train.Feature(float_list=tf.train.FloatList(value=image)),
                'image_index': tf.train.Feature(int64_list=tf.train.Int64List(value=[index]))}))
            writer.write(sample.SerializeToString())
        writer.close()

    def download_images_and_generate_tf_record(self):
        # Create folder if needed
        if not os.path.exists(self._dataset_path):
            os.makedirs(self._dataset_path)
        else:  # Dataset already loaded
            return

        # Download and process dataset
        train_X, train_y, test_X, test_y = self.__download_and_extract_dataset()

        # Use the seed provided
        rng = np.random.RandomState(self._random_seed)

        # I know I could initalize to zeros to avoid the appends, but it's only
        # done once, so let me have it
        labeled_train_X = np.empty(shape=(0, 32*32*3))
        labeled_train_y = []
        unlabeled_train_X = np.empty(shape=(0, 32*32*3))
        validation_X = np.empty(shape=(0, 32*32*3))
        validation_y = []

        # Randomly shuffle the dataset, and have balanced labeled and validation
        # datasets (avoid having and unbalenced train set that could hurt the results)
        for label in range(10):
            label_mask = (train_y == label)
            current_label_X = train_X[label_mask]
            current_label_y = train_y[label_mask]
            current_label_X, current_label_y = rng.permutation(
                current_label_X), rng.permutation(current_label_y)
            # Take care of the labeled train set
            labeled_train_X = np.append(labeled_train_X, current_label_X[:int(
                self._num_labeled_samples/self._NUM_CLASSES), :], axis=0)
            labeled_train_y = np.append(labeled_train_y, current_label_y[:int(
                self._num_labeled_samples/self._NUM_CLASSES)])
            current_label_X = current_label_X[int(
                self._num_labeled_samples/self._NUM_CLASSES):, :]
            current_label_y = current_label_y[int(
                self._num_labeled_samples/self._NUM_CLASSES):]
            # Now let's take care of validation
            validation_X = np.append(validation_X, current_label_X[:int(
                self._num_validation_samples/self._NUM_CLASSES)], axis=0)
            validation_y = np.append(validation_y, current_label_y[:int(
                self._num_validation_samples/self._NUM_CLASSES)])
            current_label_X = current_label_X[int(
                self._num_validation_samples/self._NUM_CLASSES):, :]
            current_label_y = current_label_y[int(
                self._num_validation_samples/self._NUM_CLASSES):]
            # The rest goes to Unlabeled train
            unlabeled_train_X = np.append(
                unlabeled_train_X, current_label_X, axis=0)

        # Print final set shapes
        print("Labeled train shape: ", labeled_train_X.shape)
        print("Unlabeled train shape: ", unlabeled_train_X.shape)
        print("Validation shape: ", validation_X.shape)
        print("Test shape: ", test_X.shape)

        # Write tfrecords to disk
        self.__generate_tfrecord(labeled_train_X, labeled_train_y, os.path.join(
            self._dataset_path, 'labeled_train.tfrecords'))

        self.__generate_tfrecord(unlabeled_train_X, [], os.path.join(
            self._dataset_path, 'unlabeled_train.tfrecords'))

        self.__generate_tfrecord(validation_X, validation_y, os.path.join(
            self._dataset_path, 'validation_set.tfrecords'))

        self.__generate_tfrecord(test_X, test_y, os.path.join(
            self._dataset_path, 'test_set.tfrecords'))

    def load_dataset(self, batch_size, epochs, fraction_of_labeled_per_batch=1.0,
                     fraction_of_unlabeled_per_batch=1.0, shuffle=True):

        tfrecord_loader = TfrecordLoader(
            './data', batch_size, epochs, self._IMAGE_SIZE, self._NUM_CLASSES,
            fraction_of_labeled_per_batch, fraction_of_unlabeled_per_batch, shuffle)
        return tfrecord_loader.load_dataset()

## **Main Function**

In [0]:
import math
import tensorflow.contrib.eager as tfe
tf.enable_eager_execution()
import os
import urllib
from tqdm import tqdm
from tqdm.auto import trange

In [0]:
def main():
    # Constants variables
    NUM_TRAIN_SAMPLES = 73257
    NUM_TEST_SAMPLES = 26032

    # Editable variables
    num_labeled_samples = 1000
    num_validation_samples = 200
    num_train_unlabeled_samples = NUM_TRAIN_SAMPLES - \
        num_labeled_samples - num_validation_samples
    batch_size = 25
    epochs = 300
    max_learning_rate = 0.001
    initial_beta1 = 0.9
    final_beta1 = 0.5
    checkpoint_directory = './checkpoints/PiModel'
    tensorboard_logs_directory = './logs/PiModel'

    # Assign it as tfe.variable since we will change it across epochs
    learning_rate = tfe.Variable(max_learning_rate)
    beta_1 = tfe.Variable(initial_beta1)

    # Download and Save Dataset in Tfrecords/content/data
    loader = SvnhLoader('/content/data', NUM_TRAIN_SAMPLES,
                        num_validation_samples, num_labeled_samples)
    loader.download_images_and_generate_tf_record()
    # Generate data loaders
    train_labeled_iterator, train_unlabeled_iterator, validation_iterator, test_iterator = loader.load_dataset(
        batch_size, epochs)

    batches_per_epoch = int(num_labeled_samples/batch_size)
    batches_per_epoch_val = int(num_validation_samples / batch_size)

    model = PiModel()
    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate, beta1=beta_1, beta2=0.990)
    max_unsupervised_weight = 100 * num_labeled_samples / \
        (NUM_TRAIN_SAMPLES - num_validation_samples)
    
    best_val_accuracy = 0
    global_step = tf.train.get_or_create_global_step()
    writer = tf.contrib.summary.create_file_writer(tensorboard_logs_directory)
    writer.set_as_default()

    for epoch in trange(epochs):

        rampdown_value = ramp_down_function(epoch, epochs)
        rampup_value = ramp_up_function(epoch)

        if epoch == 0:
            unsupervised_weight = 0
        else:
            unsupervised_weight = max_unsupervised_weight * \
                rampup_value

        learning_rate.assign(rampup_value * rampdown_value * max_learning_rate)
        beta_1.assign(rampdown_value * initial_beta1 +
                      (1.0 - rampdown_value) * final_beta1)

        epoch_loss_avg = tfe.metrics.Mean()
        epoch_accuracy = tfe.metrics.Accuracy()
        epoch_loss_avg_val = tfe.metrics.Mean()
        epoch_accuracy_val = tfe.metrics.Accuracy()
        for batch_nr in trange(batches_per_epoch):
            X_labeled_train, y_labeled_train, _ = train_labeled_iterator.get_next()
            X_unlabeled_train, _, _ = train_unlabeled_iterator.get_next()

            loss_val, grads = pi_model_gradients(X_labeled_train, y_labeled_train, X_unlabeled_train,
                                                 model, unsupervised_weight)
            
            optimizer.apply_gradients(zip(grads, model.variables),
                                      global_step=global_step)
            epoch_loss_avg(loss_val)
            epoch_accuracy(
                tf.argmax(model(X_labeled_train), 1), tf.argmax(y_labeled_train, 1))
            if (batch_nr == batches_per_epoch - 1):
                for batch_val_nr in trange(batches_per_epoch_val):
                    X_val, y_val, _ = validation_iterator.get_next()
                    y_val_predictions = model(X_val, training=False)

                    epoch_loss_avg_val(tf.losses.softmax_cross_entropy(
                        y_val, y_val_predictions))
                    epoch_accuracy_val(
                        tf.argmax(y_val_predictions, 1), tf.argmax(y_val, 1))

        print("Epoch {:03d}/{:03d}: Train Loss: {:9.7f}, Train Accuracy: {:02.6%}, Validation Loss: {:9.7f}, "
              "Validation Accuracy: {:02.6%}, lr={:.9f}, unsupervised weight={:5.3f}, beta1={:.9f}".format(epoch+1,
                                                                                                           epochs,
                                                                                                           epoch_loss_avg.result(),
                                                                                                           epoch_accuracy.result(),
                                                                                                           epoch_loss_avg_val.result(),
                                                                                                           epoch_accuracy_val.result(),
                                                                                                           learning_rate.numpy(),
                                                                                                           unsupervised_weight,
                                                                                                           beta_1.numpy()))

        # If the accuracy of validation improves save a checkpoint Best 85%
        if best_val_accuracy < epoch_accuracy_val.result():
            best_val_accuracy = epoch_accuracy_val.result()
            checkpoint = tfe.Checkpoint(optimizer=optimizer,
                                        model=model,
                                        optimizer_step=global_step)
            checkpoint.save(file_prefix=checkpoint_directory)

        # Record summaries
        with tf.contrib.summary.record_summaries_every_n_global_steps(1):
            tf.contrib.summary.scalar('Train Loss', epoch_loss_avg.result())
            tf.contrib.summary.scalar(
                'Train Accuracy', epoch_accuracy.result())
            tf.contrib.summary.scalar(
                'Validation Loss', epoch_loss_avg_val.result())
            tf.contrib.summary.scalar(
                'Validation Accuracy', epoch_accuracy_val.result())
            tf.contrib.summary.scalar(
                'Unsupervised Weight', unsupervised_weight)
            tf.contrib.summary.scalar('Learning Rate', learning_rate.numpy())
            tf.contrib.summary.scalar('Ramp Up Function', rampup_value)
            tf.contrib.summary.scalar('Ramp Down Function', rampdown_value)
            

    print('\nTrain Ended! Best Validation accuracy = {}\n'.format(best_val_accuracy))

    # Load the best model
    root = tfe.Checkpoint(optimizer=optimizer,
                          model=model,
                          optimizer_step=tf.train.get_or_create_global_step())
    root.restore(tf.train.latest_checkpoint(checkpoint_directory))

    # Evaluate on the final test set
    num_test_batches = math.ceil(NUM_TEST_SAMPLES/batch_size)
    test_accuracy = tfe.metrics.Accuracy()
    for test_batch in range(num_test_batches):
        X_test, y_test, _ = test_iterator.get_next()
        y_test_predictions = model(X_test, training=False)
        test_accuracy(tf.argmax(y_test_predictions, 1), tf.argmax(y_test, 1))

    print("Final Test Accuracy: {:.6%}".format(test_accuracy.result()))

## **Run the model**

In [0]:
main()

/content/data/train_32x32.mat
>> Downloading 100.0%

Labeled train shape:  (1000, 3072)
Unlabeled train shape:  (72057, 3072)
Validation shape:  (200, 3072)
Test shape:  (26032, 3072)
Writing /content/data/labeled_train.tfrecords




Writing /content/data/unlabeled_train.tfrecords
Writing /content/data/validation_set.tfrecords
Writing /content/data/test_set.tfrecords


HBox(children=(FloatProgress(value=0.0, max=300.0), HTML(value='')))

HBox(children=(FloatProgress(value=0.0, max=40.0), HTML(value='')))

Instructions for updating:
Use tf.cast instead.
