<a href="https://colab.research.google.com/github/JoyeBright/Semi-supervised-sentiment/blob/PiEnsembling/PiEnsembling.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import math
import numpy as np 
import tensorflow as tf

## **Some Necessary Imports and Modules**

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import tensorflow as tf

from tensorflow.python.eager import context
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.layers import base
from tensorflow.python.layers import utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.util.tf_export import tf_export


class _Conv(base.Layer):

    def __init__(self, rank,
                 filters,
                 kernel_size,
                 strides=1,
                 padding='valid',
                 data_format='channels_last',
                 dilation_rate=1,
                 activation=None,
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 weight_norm=True,
                 mean_only_batch_norm=True,
                 mean_only_batch_norm_momentum=0.99,
                 name=None,
                 **kwargs):
        super(_Conv, self).__init__(trainable=trainable, name=name,
                                    activity_regularizer=activity_regularizer,
                                    **kwargs)
        self.rank = rank
        self.filters = filters
        self.kernel_size = utils.normalize_tuple(
            kernel_size, rank, 'kernel_size')
        self.strides = utils.normalize_tuple(strides, rank, 'strides')
        self.padding = utils.normalize_padding(padding)
        self.data_format = utils.normalize_data_format(data_format)
        self.dilation_rate = utils.normalize_tuple(
            dilation_rate, rank, 'dilation_rate')
        self.activation = activation
        self.use_bias = use_bias
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
        self.kernel_constraint = kernel_constraint
        self.bias_constraint = bias_constraint
        self.input_spec = base.InputSpec(ndim=self.rank + 2)
        self.weight_norm = weight_norm
        self.mean_only_batch_norm = mean_only_batch_norm
        self.mean_only_batch_norm_momentum = mean_only_batch_norm_momentum

    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if self.data_format == 'channels_first':
            channel_axis = 1
        else:
            channel_axis = -1
        if input_shape[channel_axis].value is None:
            raise ValueError('The channel dimension of the inputs '
                             'should be defined. Found `None`.')
        input_dim = input_shape[channel_axis].value
        kernel_shape = self.kernel_size + (input_dim, self.filters)

        self.kernel = self.add_variable(name='kernel',
                                        shape=kernel_shape,
                                        initializer=self.kernel_initializer,
                                        regularizer=self.kernel_regularizer,
                                        constraint=self.kernel_constraint,
                                        trainable=True,
                                        dtype=self.dtype)

        if self.weight_norm:
            self.V = self.add_variable(name='V_weight_norm',
                                       shape=kernel_shape,
                                       dtype=tf.float32,
                                       initializer=tf.random_normal_initializer(
                                           0, 0.05),
                                       trainable=True)
            self.g = self.add_variable(name='g_weight_norm',
                                       shape=(self.filters,),
                                       initializer=init_ops.ones_initializer(),
                                       dtype=self.dtype,
                                       trainable=True)
        if self.mean_only_batch_norm:
            self.batch_norm_running_average = []

        if self.use_bias:
            self.bias = self.add_variable(name='bias',
                                          shape=(self.filters,),
                                          initializer=self.bias_initializer,
                                          regularizer=self.bias_regularizer,
                                          constraint=self.bias_constraint,
                                          trainable=True,
                                          dtype=self.dtype)
        else:
            self.bias = None
        self.input_spec = base.InputSpec(ndim=self.rank + 2,
                                         axes={channel_axis: input_dim})
        self._convolution_op = nn_ops.Convolution(
            input_shape,
            filter_shape=self.kernel.get_shape(),
            dilation_rate=self.dilation_rate,
            strides=self.strides,
            padding=self.padding.upper(),
            data_format=utils.convert_data_format(self.data_format,
                                                  self.rank + 2))
        self.built = True

    def call(self, inputs, training=True):
        if self.weight_norm:
            self.kernel = tf.reshape(
                self.g, [1, 1, 1, self.filters])*tf.nn.l2_normalize(self.V, [0, 1, 2])

        outputs = self._convolution_op(inputs, self.kernel)

        if self.mean_only_batch_norm:
            mean = tf.reduce_mean(outputs, reduction_indices=0)
            if training:
                # If first iteration
                if self.batch_norm_running_average == []:
                    self.batch_norm_running_average = mean
                else:
                    self.batch_norm_running_average = self.batch_norm_running_average * \
                        self.mean_only_batch_norm_momentum + mean * \
                        (1-self.mean_only_batch_norm_momentum)
                outputs = outputs - mean
            else:
                outputs = outputs - self.batch_norm_running_average

        if self.use_bias:
            if self.data_format == 'channels_first':
                if self.rank == 1:
                    # nn.bias_add does not accept a 1D input tensor.
                    bias = array_ops.reshape(self.bias, (1, self.filters, 1))
                    outputs += bias
                if self.rank == 2:
                    outputs = nn.bias_add(
                        outputs, self.bias, data_format='NCHW')
                if self.rank == 3:
                    # As of Mar 2017, direct addition is significantly slower than
                    # bias_add when computing gradients. To use bias_add, we collapse Z
                    # and Y into a single dimension to obtain a 4D input tensor.
                    outputs_shape = outputs.shape.as_list()
                    if outputs_shape[0] is None:
                        outputs_shape[0] = -1
                    outputs_4d = array_ops.reshape(outputs,
                                                   [outputs_shape[0], outputs_shape[1],
                                                    outputs_shape[2] *
                                                    outputs_shape[3],
                                                    outputs_shape[4]])
                    outputs_4d = nn.bias_add(
                        outputs_4d, self.bias, data_format='NCHW')
                    outputs = array_ops.reshape(outputs_4d, outputs_shape)
            else:
                outputs = nn.bias_add(outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            return self.activation(outputs)
        return outputs

    def compute_output_shape(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape).as_list()
        if self.data_format == 'channels_last':
            space = input_shape[1:-1]
            new_space = []
            for i in range(len(space)):
                new_dim = utils.conv_output_length(
                    space[i],
                    self.kernel_size[i],
                    padding=self.padding,
                    stride=self.strides[i],
                    dilation=self.dilation_rate[i])
                new_space.append(new_dim)
            return tensor_shape.TensorShape([input_shape[0]] + new_space +
                                            [self.filters])
        else:
            space = input_shape[2:]
            new_space = []
            for i in range(len(space)):
                new_dim = utils.conv_output_length(
                    space[i],
                    self.kernel_size[i],
                    padding=self.padding,
                    stride=self.strides[i],
                    dilation=self.dilation_rate[i])
                new_space.append(new_dim)
            return tensor_shape.TensorShape([input_shape[0], self.filters] +
                                            new_space)


@tf_export('layers.Conv2D')
class Conv2D(_Conv):

    def __init__(self, filters,
                 kernel_size,
                 strides=(1, 1),
                 padding='valid',
                 data_format='channels_last',
                 dilation_rate=(1, 1),
                 activation=None,
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 weight_norm=True,
                 mean_only_batch_norm=True,
                 name=None,
                 **kwargs):
        super(Conv2D, self).__init__(
            rank=2,
            filters=filters,
            kernel_size=kernel_size,
            strides=strides,
            padding=padding,
            data_format=data_format,
            dilation_rate=dilation_rate,
            activation=activation,
            use_bias=use_bias,
            kernel_initializer=kernel_initializer,
            bias_initializer=bias_initializer,
            kernel_regularizer=kernel_regularizer,
            bias_regularizer=bias_regularizer,
            activity_regularizer=activity_regularizer,
            kernel_constraint=kernel_constraint,
            bias_constraint=bias_constraint,
            trainable=trainable,
            weight_norm=weight_norm,
            mean_only_batch_norm=mean_only_batch_norm,
            name=name, **kwargs)


@tf_export('layers.conv2d')
def conv2d(inputs,
           filters,
           kernel_size,
           strides=(1, 1),
           padding='valid',
           data_format='channels_last',
           dilation_rate=(1, 1),
           activation=None,
           use_bias=True,
           kernel_initializer=None,
           bias_initializer=init_ops.zeros_initializer(),
           kernel_regularizer=None,
           bias_regularizer=None,
           activity_regularizer=None,
           kernel_constraint=None,
           bias_constraint=None,
           weight_norm=True,
           mean_only_batch_norm=True,
           trainable=True,
           name=None,
           reuse=None):
  
    layer = Conv2D(
        filters=filters,
        kernel_size=kernel_size,
        strides=strides,
        padding=padding,
        data_format=data_format,
        dilation_rate=dilation_rate,
        activation=activation,
        use_bias=use_bias,
        kernel_initializer=kernel_initializer,
        bias_initializer=bias_initializer,
        kernel_regularizer=kernel_regularizer,
        bias_regularizer=bias_regularizer,
        activity_regularizer=activity_regularizer,
        kernel_constraint=kernel_constraint,
        bias_constraint=bias_constraint,
        trainable=trainable,
        name=name,
        dtype=inputs.dtype.base_dtype,
        _reuse=reuse,
        _scope=name)
    return layer.apply(inputs)

In [0]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function


import six
from six.moves import xrange  # pylint: disable=redefined-builtin
import numpy as np

import tensorflow as tf

from tensorflow.python.eager import context
from tensorflow.python.framework import ops
from tensorflow.python.framework import tensor_shape
from tensorflow.python.layers import base
from tensorflow.python.layers import utils
from tensorflow.python.ops import array_ops
from tensorflow.python.ops import init_ops
from tensorflow.python.ops import math_ops
from tensorflow.python.ops import gen_math_ops
from tensorflow.python.ops import nn
from tensorflow.python.ops import nn_ops
from tensorflow.python.ops import standard_ops
from tensorflow.python.util.tf_export import tf_export


@tf_export('layers.Dense')
class Dense(base.Layer):

    def __init__(self, units,
                 activation=None,
                 use_bias=True,
                 kernel_initializer=None,
                 bias_initializer=init_ops.zeros_initializer(),
                 kernel_regularizer=None,
                 bias_regularizer=None,
                 activity_regularizer=None,
                 kernel_constraint=None,
                 bias_constraint=None,
                 trainable=True,
                 weight_norm=True,
                 mean_only_batch_norm=True,
                 mean_only_batch_norm_momentum=0.99,
                 name=None,
                 **kwargs):
        super(Dense, self).__init__(trainable=trainable, name=name,
                                    activity_regularizer=activity_regularizer,
                                    **kwargs)
        self.units = units
        self.activation = activation
        self.use_bias = use_bias
        self.kernel_initializer = kernel_initializer
        self.bias_initializer = bias_initializer
        self.kernel_regularizer = kernel_regularizer
        self.bias_regularizer = bias_regularizer
        self.kernel_constraint = kernel_constraint
        self.bias_constraint = bias_constraint
        self.input_spec = base.InputSpec(min_ndim=2)
        self.weight_norm = weight_norm
        self.mean_only_batch_norm = mean_only_batch_norm
        self.mean_only_batch_norm_momentum = mean_only_batch_norm_momentum

    def build(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        if input_shape[-1].value is None:
            raise ValueError('The last dimension of the inputs to `Dense` '
                             'should be defined. Found `None`.')
        self.input_spec = base.InputSpec(min_ndim=2,
                                         axes={-1: input_shape[-1].value})
        self.kernel = self.add_variable('kernel',
                                        shape=[
                                            input_shape[-1].value, self.units],
                                        initializer=self.kernel_initializer,
                                        regularizer=self.kernel_regularizer,
                                        constraint=self.kernel_constraint,
                                        dtype=self.dtype,
                                        trainable=True)

        if self.weight_norm:
            self.V = self.add_variable(name='V_weight_norm',
                                       shape=[
                                            input_shape[-1].value, self.units],
                                       dtype=tf.float32,
                                       initializer=tf.random_normal_initializer(
                                           0, 0.05),
                                       trainable=True)
            self.g = self.add_variable(name='g_weight_norm',
                                       shape=(self.units,),
                                       initializer=init_ops.ones_initializer(),
                                       dtype=self.dtype,
                                       trainable=True)

        if self.mean_only_batch_norm:
            self.batch_norm_running_average = []

        if self.use_bias:
            self.bias = self.add_variable('bias',
                                          shape=[self.units, ],
                                          initializer=self.bias_initializer,
                                          regularizer=self.bias_regularizer,
                                          constraint=self.bias_constraint,
                                          dtype=self.dtype,
                                          trainable=True)
        else:
            self.bias = None
        self.built = True

    def call(self, inputs, training=True):
        inputs = ops.convert_to_tensor(inputs, dtype=self.dtype)
        shape = inputs.get_shape().as_list()

        if self.weight_norm:
            inputs = tf.matmul(inputs, self.V)
            scaler = self.g/tf.sqrt(tf.reduce_sum(tf.square(self.V), [0]))
            outputs = tf.reshape(scaler, [1, self.units])*inputs
        else:
            if len(shape) > 2:
                # Broadcasting is required for the inputs.
                outputs = standard_ops.tensordot(inputs, self.kernel, [[len(shape) - 1],
                                                                       [0]])
                # Reshape the output back to the original ndim of the input.
                if not context.executing_eagerly():
                    output_shape = shape[:-1] + [self.units]
                    outputs.set_shape(output_shape)
            else:
                outputs = gen_math_ops.mat_mul(inputs, self.kernel)


        if self.mean_only_batch_norm:
            mean = tf.reduce_mean(outputs, reduction_indices=0)
            if training:
                # If first iteration
                if self.batch_norm_running_average == []:
                    self.batch_norm_running_average = mean
                else:
                    self.batch_norm_running_average = self.batch_norm_running_average * \
                        self.mean_only_batch_norm_momentum + mean * \
                        (1-self.mean_only_batch_norm_momentum)
                    outputs = outputs - mean
            else:
                outputs = outputs - self.batch_norm_running_average

        if self.use_bias:
            outputs = nn.bias_add(outputs, self.bias)
        if self.activation is not None:
            return self.activation(outputs)  # pylint: disable=not-callable
        return outputs

    def compute_output_shape(self, input_shape):
        input_shape = tensor_shape.TensorShape(input_shape)
        input_shape = input_shape.with_rank_at_least(2)
        if input_shape[-1].value is None:
            raise ValueError(
                'The innermost dimension of input_shape must be defined, but saw: %s'
                % input_shape)
        return input_shape[:-1].concatenate(self.units)


@tf_export('layers.dense')
def dense(
        inputs, units,
        activation=None,
        use_bias=True,
        kernel_initializer=None,
        bias_initializer=init_ops.zeros_initializer(),
        kernel_regularizer=None,
        bias_regularizer=None,
        activity_regularizer=None,
        kernel_constraint=None,
        bias_constraint=None,
        weight_norm=True,
        mean_only_batch_norm=True,
        trainable=True,
        name=None,
        reuse=None):

    layer = Dense(units,
                  activation=activation,
                  use_bias=use_bias,
                  kernel_initializer=kernel_initializer,
                  bias_initializer=bias_initializer,
                  kernel_regularizer=kernel_regularizer,
                  bias_regularizer=bias_regularizer,
                  activity_regularizer=activity_regularizer,
                  kernel_constraint=kernel_constraint,
                  bias_constraint=bias_constraint,
                  trainable=trainable,
                  weight_norm=weight_norm,
                  mean_only_batch_norm=mean_only_batch_norm,
                  name=name,
                  dtype=inputs.dtype.base_dtype,
                  _scope=name,
                  _reuse=reuse)
    return layer.apply(inputs)

**Loss of Pi Ensembling Model**

In [0]:
def pi_model_loss(X_train_labeled, y_train_labeled, X_train_unlabeled,
                  pi_model, unsupervised_weight):

    z_labeled = pi_model(X_train_labeled)
    z_labeled_i = pi_model(X_train_labeled)

    z_unlabeled = pi_model(X_train_unlabeled)
    z_unlabeled_i = pi_model(X_train_unlabeled)

    # Loss = Supervised loss + unsup loss of labeled sample + unsup loss unlabeled sample (Unsupervised Loss)
    return tf.losses.softmax_cross_entropy(
        y_train_labeled, z_labeled) + unsupervised_weight * (
            tf.losses.mean_squared_error(z_labeled, z_labeled_i) +
            tf.losses.mean_squared_error(z_unlabeled, z_unlabeled_i))

## **Pi Gradient Generation**

In [0]:
def pi_model_gradients(X_train_labeled, y_train_labeled, X_train_unlabeled,
                       pi_model, unsupervised_weight):
  
    with tf.GradientTape() as tape:
        loss_value = pi_model_loss(X_train_labeled, y_train_labeled, X_train_unlabeled,
                                   pi_model, unsupervised_weight)
    return loss_value, tape.gradient(loss_value, pi_model.variables)

## **Ramp-up Function**
NB: keep it slow in intial epochs

In [0]:
def ramp_up_function(epoch, epoch_with_max_rampup=80):

    if epoch < epoch_with_max_rampup:
        p = max(0.0, float(epoch)) / float(epoch_with_max_rampup)
        p = 1.0 - p
        return math.exp(-p*p*5.0)
    else:
        return 1.0