In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.examples.tutorials.mnist import input_data

# Python optimisation variables

  from ._conv import register_converters as _register_converters


In [2]:
from tensorflow.train import AdamOptimizer
#tf.train.AdamOptimizer

In [3]:
def parameter_shape(gradient):
    '''
    Record the parameters original shape
    Input: the return value of optimizer.compute_gradients (many matrices)
    Output: A list of shape
    
    '''
    gradient = np.array(gradient)
    #shape_list record parameter shapes of each layer
    shape_list = []
    for i in range(gradient.shape[0]):
        grad_temp = gradient[i,0].flatten()
        shape_list.append(['Layer_'+str(i+1), gradient[i,0].shape, grad_temp.shape[0]])
        
    return shape_list

In [4]:
def batch_gradient_collector(gradient):
    '''
    Collect the gradient of each batch
    Input: the return value of optimizer.compute_gradients (many matrices)
    Output: the sum of gradients within one epoch as a vector
    
    '''
    gradient = np.array(gradient)
    #shape_list record parameter shapes of each layer
    gradient_vector = []
    for i in range(gradient.shape[0]):
        grad_temp = gradient[i,0].flatten()
        gradient_vector.append(grad_temp)
        
    return np.array(gradient_vector)        

In [36]:
def batch_parameter_collector(gradient):
    '''
    Collect the gradient of each batch
    Input: the return value of optimizer.compute_gradients (many matrices)
    Output: the sum of gradients within one epoch as a vector
    
    '''
    gradient = np.array(gradient)
    #shape_list record parameter shapes of each layer
    parameter_vector = []
    for i in range(gradient.shape[0]):
        grad_temp = gradient[i,1].flatten()
        parameter_vector.append(grad_temp)
        
    return np.array(parameter_vector)

In [14]:
def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
    # setup the filter input shape for tf.nn.conv_2d
    conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
                      num_filters]

    # initialise weights and bias for the filter
    weights = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
                                      name=name+'_W')
    bias = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')

    # setup the convolutional layer operation
    out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')

    # add the bias
    out_layer += bias

    # apply a ReLU non-linear activation
    out_layer = tf.nn.relu(out_layer)

    # now perform max pooling
    ksize = [1, pool_shape[0], pool_shape[1], 1]
    strides = [1, 2, 2, 1]
    out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
                               padding='SAME')

    return out_layer

In [5]:
mnist = input_data.read_data_sets("MNIST_data/", one_hot=True)

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [6]:
class AdamOptimizer_Bing(AdamOptimizer):
    def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, epsilon=1e-8,
               use_locking=False, name="Adam"):

        super(AdamOptimizer, self).__init__(use_locking, name)
        self._lr = learning_rate
        self._beta1 = beta1
        self._beta2 = beta2
        self._epsilon = epsilon

        # Tensor versions of the constructor arguments, created in _prepare().
        self._lr_t = None
        self._beta1_t = None
        self._beta2_t = None
        self._epsilon_t = None

        # Created in SparseApply if needed.
        self._updated_lr = None
    
    def minimize(self, loss, global_step=None, var_list=None,
               gate_gradients=1, aggregation_method=None,
               colocate_gradients_with_ops=False, name=None,
               grad_loss=None):
        """
        The same as function minimize, but return the result of compute_gradients
        Created by: Big Bing in 7/28 
        Purpose: To realize parallel computing(communicate gradient)
        """
        grads_and_vars = self.compute_gradients(
            loss, var_list=var_list, gate_gradients=gate_gradients,
            aggregation_method=aggregation_method,
            colocate_gradients_with_ops=colocate_gradients_with_ops,
            grad_loss=grad_loss)[-8:]

        vars_with_grad = [v for g, v in grads_and_vars if g is not None]
        if not vars_with_grad:
          raise ValueError(
              "No gradients provided for any variable, check your graph for ops"
              " that do not support gradients, between variables %s and loss %s." %
              ([str(v) for _, v in grads_and_vars], loss))
        #self.apply_gradients(grads_and_vars, global_step=global_step, name=name)

        return self.apply_gradients(grads_and_vars, global_step=global_step, name=name),grads_and_vars

In [7]:
learning_rate = 0.0001
epochs = 2
batch_size = 50

# declare the training data placeholders
# input x - for 28 x 28 pixels = 784 - this is the flattened image data that is drawn from 
# mnist.train.nextbatch()
x = tf.placeholder(tf.float32, [None, 784])
# dynamically reshape the input
x_shaped = tf.reshape(x, [-1, 28, 28, 1])
# now declare the output data placeholder - 10 digits
y = tf.placeholder(tf.float32, [None, 10])

In [41]:
class CNN():
    def __init__(self):
        self.pred = 0
        self.loss = 0
        self.optimizer_gradient = 0
        self.optimizer = 0
        self.shape_list = 0
        self.init_op = 0
        
        self.t = 0
        self.t_0 = 0
        self.grad = 0
        self.learning_rate = 0.0001
        self.torque = 0
        self.belta = 0
        self.resource = 0
        self.grad_t0 = 0
        self.history = []
        self.w = 0
        self.w_hat = 0
        self.w_t0 = 0
        
    def Rec_from_Agg(self, w_global, torque_global):
        self.w_t0 = w_global
        self.w_hat = w_global
        self.torque = torque_global
    def Snd_to_Agg(self):
        if self.t_0 > 0:
            return w,self.resource, self.belta, self.grad_t0
        else:
            return w,self.resource
    def aa(self):
        self.w = self.w_hat
    
    def Est_Resource(self):
        return self.resource

    def get_coef(self):
        return self.w
    
    def set_coef(self, w_global):
        self.w = w_global
        
    def Est_Belta(self ,X, y):
        grad_global_parameter = np.dot((np.dot(X ,self.w_hat)-y), X)# In time t, the gradient of local loss of global parameters
        self.grad_t0 = grad_global_parameter
        self.belta = np.linalg.norm(self.grad - grad_global_parameter)/np.linalg.norm(self.w - self.w_hat)
        
    def time_record(self):
        self.t_0 = self.t
        
    def CNN_Layer(self):
        x = tf.placeholder(tf.float32, [None, 784])
        # dynamically reshape the input
        x_shaped = tf.reshape(x, [-1, 28, 28, 1])
        # now declare the output data placeholder - 10 digits
        y = tf.placeholder(tf.float32, [None, 10])
        # create some convolutional layers
        layer1 = create_new_conv_layer(x_shaped, 1, 32, [5, 5], [2, 2], name='layer1')
        layer2 = create_new_conv_layer(layer1, 32, 64, [5, 5], [2, 2], name='layer2')

        flattened = tf.reshape(layer2, [-1, 7 * 7 * 64])
        
        # setup some weights and bias values for this layer, then activate with ReLU
        wd1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1000], stddev=0.03), name='wd1')
        bd1 = tf.Variable(tf.truncated_normal([1000], stddev=0.01), name='bd1')
        dense_layer1 = tf.matmul(flattened, wd1) + bd1
        dense_layer1 = tf.nn.relu(dense_layer1)

        # another layer with softmax activations
        wd2 = tf.Variable(tf.truncated_normal([1000, 10], stddev=0.03), name='wd2')
        bd2 = tf.Variable(tf.truncated_normal([10], stddev=0.01), name='bd2')
        dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
        
        pred = tf.nn.softmax(dense_layer2)
        #loss is cross_entropy loss
        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=dense_layer2, labels=y))
        
        
        return loss,pred
    
    
    def fit(self, dataset):
        
        x = tf.placeholder(tf.float32, [None, 784])
        # dynamically reshape the input
        x_shaped = tf.reshape(x, [-1, 28, 28, 1])
        # now declare the output data placeholder - 10 digits
        y = tf.placeholder(tf.float32, [None, 10])
        # create some convolutional layers
        layer1 = create_new_conv_layer(x_shaped, 1, 32, [5, 5], [2, 2], name='layer1')
        layer2 = create_new_conv_layer(layer1, 32, 64, [5, 5], [2, 2], name='layer2')

        flattened = tf.reshape(layer2, [-1, 7 * 7 * 64])
        
        # setup some weights and bias values for this layer, then activate with ReLU
        wd1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1000], stddev=0.03), name='wd1')
        bd1 = tf.Variable(tf.truncated_normal([1000], stddev=0.01), name='bd1')
        dense_layer1 = tf.matmul(flattened, wd1) + bd1
        dense_layer1 = tf.nn.relu(dense_layer1)

        # another layer with softmax activations
        wd2 = tf.Variable(tf.truncated_normal([1000, 10], stddev=0.03), name='wd2')
        bd2 = tf.Variable(tf.truncated_normal([10], stddev=0.01), name='bd2')
        dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
        
        y_ = tf.nn.softmax(dense_layer2)
        #loss is cross_entropy loss
        cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=dense_layer2, labels=y))
        
            
            
        #cross_entropy, y_ = self.CNN_Layer()
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        optimizer_gradient = AdamOptimizer_Bing(learning_rate=learning_rate).minimize(cross_entropy)
        optimizer = AdamOptimizer(learning_rate=learning_rate).minimize(cross_entropy)
        init_op = tf.global_variables_initializer()
        grad = []

        # setup the initialisation operator

        with tf.Session() as sess:
            # initialise the variables
            sess.run(init_op)
            total_batch = int(len(dataset.train.labels) / batch_size)
            count = 0
            for epoch in range(self.torque):
                avg_cost = 0
                self.t += 1
                count += 1

                if count < self.torque:
                    for i in range(total_batch):
                        batch_x, batch_y = dataset.train.next_batch(batch_size=50)
                        _,c = sess.run([optimizer,cross_entropy], feed_dict={x: batch_x, y: batch_y})
                        avg_cost += c / total_batch

                elif count == self.torque:
                    '''
                    #self.grad saved for belta computation. 
                    #It denotes in time t(update time), the gradient of local loss of local parameters

                    '''

                    for i in range(total_batch):
                        batch_x, batch_y = dataset.train.next_batch(batch_size=batch_size)
                        g,c = sess.run([optimizer_gradient,cross_entropy], feed_dict={x: batch_x, y: batch_y})
                        gradient_temp = batch_gradient_collector(g[1])
                        grad.append(gradient_temp)
                        avg_cost += c / total_batch
                    self.shape_list = parameter_shape(g[1])
                    #Sum up gradients from each batch
                    self.grad = np.array(grad).sum(axis = 0)
                
                self.w = batch_parameter_collector(g[1])
                test_acc = sess.run(accuracy, feed_dict={x: mnist.test.images, y: mnist.test.labels})   
                self.history.append([avg_cost,test_acc, str(self.t)])


            return self
                            

In [38]:
c = CNN()

In [39]:
c.torque = 2

In [40]:
c.fit(mnist)

UnboundLocalError: local variable 'g' referenced before assignment

In [19]:
c.history

[[0.7495106210898269, 0.9262, '1'], [0.1582654863587496, 0.9691, '2']]

In [35]:
c.grad[2].shape

(51200,)