In [1]:
# -*- coding: utf-8 -*-
"""
Author:-aam35
Analyzing Forgetting in neural networks
"""

import numpy as np
import os
import sys
import tensorflow as tf
import tensorflow.contrib.eager as tfe
from tensorflow.examples.tutorials.mnist import input_data
import time
tf.enable_eager_execution()
tf.executing_eagerly()

# random seed to get the consistent result
tf.random.set_random_seed(42)

## Permuted MNIST
## a training set of 55,000 examples, validation is 5000, and a test set of 10,000 examples
data = input_data.read_data_sets("data/MNIST_data/", one_hot=True)


## parameters
num_tasks_to_run = 10
num_epochs_per_task = 20
minibatch_size = 64
learning_rate = 0.01
num_train = len(data.train.labels)
num_test = len(data.test.labels)

# Generate the tasks specifications as a list of random permutations of the input pixels.
#  permuting the pixels in all images with the same permutation
# for training
train_permutation = []
# for validation
validation_permutation = []
# for test
test_permutation = []
for task in range(num_tasks_to_run):
    ## 28*28 pixels
    train_permutation.append(np.random.RandomState(seed=task*(42)).permutation(data.train.images))
    validation_permutation.append(np.random.RandomState(seed=task*(42)).permutation(data.validation.images))
    test_permutation.append(np.random.RandomState(seed=task*(42)).permutation(data.test.images))

    

#Based on tutorial provided create your MLP model for above problem
#For TF2.0 users Keras can be used for loading trainable variables and dataset.
#You might need google collab to run large scale experiments

Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/MNIST_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting data/MNIST_data/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting data/MNIST_data/t10k-images-idx3-ubyte.gz
Extracting data/MNIST_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.


In [2]:
## model 1
size_input = 784 # MNIST data input (img shape: 28*28)
size_hidden = 256
size_output = 10 # MNIST total classes (0-9 digits)


# Define class to build mlp model
class MLP(object):
    def __init__(self, size_input, size_hidden, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden: int, size of hidden layer
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
        """
        self.size_input, self.size_hidden, self.size_output, self.device =\
        size_input, size_hidden, size_output, device
    
        # Initialize weights between input layer and hidden layer
        self.W1 = tfe.Variable(tf.random_normal([self.size_input, self.size_hidden]))
        # Initialize biases for hidden layer
        self.b1 = tfe.Variable(tf.random_normal([1, self.size_hidden]))
        # Initialize weights between hidden layer and output layer
        self.W2 = tfe.Variable(tf.random_normal([self.size_hidden, self.size_output]))
        # Initialize biases for output layer
        self.b2 = tfe.Variable(tf.random_normal([1, self.size_output]))
    

        
        # Define variables to be updated during backpropagation
        self.variables = [self.W1, self.W2, self.b1, self.b2]
        
    
    # prediction
    def forward(self, X):
        """
        forward pass
        X: Tensor, inputs
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
      
        return self.y
    
    ## loss function
    def loss(self, y_pred, y_true):
        '''
        y_pred - Tensor of shape (batch_size, size_output)
        y_true - Tensor of shape (batch_size, size_output)
        '''
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        #return tf.losses.mean_squared_error(y_true_tf, y_pred_tf)
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred_tf, labels=y_true_tf))
        #return tf.reduce_mean(-tf.reduce_sum(y_true_tf * tf.log(y_pred_tf), reduction_indices=[1]))
        
  
    def backward(self, X_train, y_train):
        """
        backward pass
        """
        # optimizer
        # Test with SGD,Adam, RMSProp
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        optimizer.apply_gradients(zip(grads, self.variables),
                              global_step=tf.train.get_or_create_global_step())
        
        
    def compute_output(self, X):
        """
        Custom method to obtain output tensor during forward pass
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        #Remember to normalize your dataset before moving forward
        # Compute values in hidden layer
        what = tf.matmul(X_tf, self.W1) + self.b1
        hhat = tf.nn.relu(what)
        # dropout
        d_hhat = tf.nn.dropout(hhat, 0.3)
        # Compute output
        output = tf.matmul(hhat, self.W2) + self.b2
        #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
        #Second add tf.Softmax(output) and then return this variable
        #print(output)
        return tf.nn.softmax(output)
        #return output

In [3]:
 # Initialize model using CPU
mlp_on_cpu = MLP(size_input, size_hidden, size_output, device='cpu')



time_start = time.time()
Ptest_dataset_images = []
Ptest_dataset_labels = []

# training process
for run in range(num_tasks_to_run):
    # train for 50 epochs for task1
    if run == 0:
        num_epochs_per_task = 30
        #num_epochs_per_task = 10
    else :
        #num_epochs_per_task = 20
        num_epochs_per_task = 10
    for epoch in range(num_epochs_per_task):
        train_ds = tf.data.Dataset.from_tensor_slices((data.train.images, data.train.labels)).map(lambda x, y: (x, tf.cast(y, tf.float32)))\
           .shuffle(buffer_size=1000)\
           .batch(batch_size=minibatch_size)
        loss_total = tfe.Variable(0, dtype=tf.float32)
        for inputs, outputs in train_ds:
            preds = mlp_on_cpu.forward(inputs)
            loss_total = loss_total + mlp_on_cpu.loss(preds, outputs)
            mlp_on_cpu.backward(inputs, outputs)
        print('Number of Epoch = {} - loss:= {:.4f}'.format(epoch + 1, loss_total.numpy() / num_train))
        preds = mlp_on_cpu.compute_output(data.train.images)
        correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(data.train.labels, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        print ("Training Accuracy = {}".format(accuracy.numpy()))
 
    # accumulate test dataset
    #Ptest_dataset_images.extend(test_permutation[run])
    #Ptest_dataset_labels.extend(data.test.labels)
    
    # test accuracy
    #preds = mlp_on_cpu.compute_output(Ptest_dataset_images)
    preds = mlp_on_cpu.compute_output(data.test.images)

    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(data.test.labels, 1))
    #Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # To keep sizes compatible with model
    print ("Accuracy = {}".format(accuracy.numpy()))

        
time_taken = time.time() - time_start
print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Instructions for updating:
Colocations handled automatically by placer.
Number of Epoch = 1 - loss:= 0.0363
Training Accuracy = 0.17096363008022308
Number of Epoch = 2 - loss:= 0.0355
Training Accuracy = 0.19650909304618835
Number of Epoch = 3 - loss:= 0.0353
Training Accuracy = 0.20694544911384583
Number of Epoch = 4 - loss:= 0.0352
Training Accuracy = 0.2173818200826645
Number of Epoch = 5 - loss:= 0.0350
Training Accuracy = 0.22594545781612396
Number of Epoch = 6 - loss:= 0.0349
Training Accuracy = 0.23739999532699585
Number of Epoch = 7 - loss:= 0.0347
Training Accuracy = 0.24912726879119873
Number of Epoch = 8 - loss:= 0.0345
Training Accuracy = 0.2635818123817444
Number of Epoch = 9 - loss:= 0.0342
Training Accuracy = 0.28141817450523376
Number of Epoch = 10 - loss:= 0.0339
Training Accuracy = 0.30212727189064026
Number of Epoch = 11 - loss:= 0.0336
Training Accuracy = 0.31778180599212646
Number of Epoch = 12 - loss:= 0.0334
Training Accuracy = 0.33347272872924805
Number of Epoch

KeyboardInterrupt: 

In [11]:
## model 2
size_input = 784 # MNIST data input (img shape: 28*28)
size_hidden_1 = 256
size_hidden_2 = 256
size_output = 10 # MNIST total classes (0-9 digits)


# Define class to build mlp model
class MLP1(object):
    def __init__(self, size_input, size_hidden_1, size_hidden_2, size_output, device=None):
        """
        size_input: int, size of input layer
        size_hidden: int, size of hidden layer
        size_output: int, size of output layer
        device: str or None, either 'cpu' or 'gpu' or None. If None, the device to be used will be decided automatically during Eager Execution
        """
        self.size_input, self.size_hidden_1, self.size_hidden_2, self.size_output, self.device =\
        size_input, size_hidden_1, size_hidden_2, size_output, device
    
        # Initialize weights between input layer and hidden layer1
        self.W1 = tfe.Variable(tf.random_normal([self.size_input, self.size_hidden_1]))
        # Initialize biases for hidden layer
        self.b1 = tfe.Variable(tf.random_normal([1, self.size_hidden_1]))
        # Initialize weights between input layer and hidden layer
        self.W2 = tfe.Variable(tf.random_normal([self.size_hidden_1, self.size_hidden_2]))
        # Initialize biases for hidden layer
        self.b2 = tfe.Variable(tf.random_normal([1, self.size_hidden_2]))
        # Initialize weights between hidden layer and output layer
        self.W3 = tfe.Variable(tf.random_normal([self.size_hidden_2, self.size_output]))
        # Initialize biases for output layer
        self.b3 = tfe.Variable(tf.random_normal([1, self.size_output]))
        

    
        # Define variables to be updated during backpropagation
        self.variables = [self.W1, self.W2, self.W3, self.b1, self.b2, self.b3]
        
    
    # prediction
    def forward(self, X):
        """
        forward pass
        X: Tensor, inputs
        """
        if self.device is not None:
            with tf.device('gpu:0' if self.device=='gpu' else 'cpu'):
                self.y = self.compute_output(X)
        else:
            self.y = self.compute_output(X)
      
        return self.y
    
    
    ## loss function
    def loss(self, y_pred, y_true):
        '''
        y_pred - Tensor of shape (batch_size, size_output)
        y_true - Tensor of shape (batch_size, size_output)
        '''
        y_true_tf = tf.cast(tf.reshape(y_true, (-1, self.size_output)), dtype=tf.float32)
        y_pred_tf = tf.cast(y_pred, dtype=tf.float32)
        #return tf.losses.mean_squared_error(y_true_tf, y_pred_tf)
        return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y_pred_tf, labels=y_true_tf))
        #return tf.reduce_mean(-tf.reduce_sum(y_true_tf * tf.log(y_pred_tf), reduction_indices=[1]))
        
  
    def backward(self, X_train, y_train):
        """
        backward pass
        """
        # optimizer
        # Test with SGD,Adam, RMSProp
        optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)
        #optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        with tf.GradientTape() as tape:
            predicted = self.forward(X_train)
            current_loss = self.loss(predicted, y_train)
        grads = tape.gradient(current_loss, self.variables)
        optimizer.apply_gradients(zip(grads, self.variables),
                              global_step=tf.train.get_or_create_global_step())
        
        
    def compute_output(self, X):
        """
        Custom method to obtain output tensor during forward pass
        """
        # Cast X to float32
        X_tf = tf.cast(X, dtype=tf.float32)
        # Remember to normalize your dataset before moving forward
        # Compute values in hidden layer
        what1 = tf.matmul(X_tf, self.W1) + self.b1
        hhat1 = tf.nn.relu(what1)
        # dropout
        d_hhat = tf.nn.dropout(hhat1, 0.3)
        # Compute output
        what2 = tf.matmul(hhat1, self.W2) + self.b2
        hhat2 = tf.nn.relu(what2)
        #d_hhat = tf.nn.dropout(hhat1, 0.3)
        output = tf.matmul(hhat2, self.W3) + self.b3
        #Now consider two things , First look at inbuild loss functions if they work with softmax or not and then change this
        #Second add tf.Softmax(output) and then return this variable
        #print(output)
        return tf.nn.softmax(output)        

In [13]:
 # Initialize model using CPU
mlp_on_cpu = MLP1(size_input, size_hidden_1, size_hidden_2, size_output, device='cpu')



time_start = time.time()
Ptest_dataset_images = []
Ptest_dataset_labels = []

# training process
for run in range(num_tasks_to_run):
    # train for 50 epochs for task1
    if run == 0:
        num_epochs_per_task = 30
        #num_epochs_per_task = 10
    else :
        #num_epochs_per_task = 20
        num_epochs_per_task = 10
    for epoch in range(num_epochs_per_task):
        train_ds = tf.data.Dataset.from_tensor_slices((train_permutation[run], data.train.labels)).map(lambda x, y: (x, tf.cast(y, tf.float32)))\
           .shuffle(buffer_size=1000)\
           .batch(batch_size=minibatch_size)
        loss_total = tfe.Variable(0, dtype=tf.float32)
        for inputs, outputs in train_ds:
            preds = mlp_on_cpu.forward(inputs)
            loss_total = loss_total + mlp_on_cpu.loss(preds, outputs)
            mlp_on_cpu.backward(inputs, outputs)
        print('Number of Epoch = {} - loss:= {:.4f}'.format(epoch + 1, loss_total.numpy() / num_train))
        preds = mlp_on_cpu.compute_output(train_permutation[run])
        correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(data.train.labels, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        print ("Training Accuracy = {}".format(accuracy.numpy()))
 
    # accumulate test dataset
    Ptest_dataset_images.extend(test_permutation[run])
    Ptest_dataset_labels.extend(data.test.labels)
    
    # test accuracy
    preds = mlp_on_cpu.compute_output(Ptest_dataset_images)
    #preds = mlp_on_cpu.compute_output(data.test.images)

    correct_prediction = tf.equal(tf.argmax(preds, 1), tf.argmax(Ptest_dataset_labels, 1))
    #Calculate accuracy
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    # To keep sizes compatible with model
    print ("Accuracy = {}".format(accuracy.numpy()))

        
time_taken = time.time() - time_start
print('\nTotal time taken (in seconds): {:.2f}'.format(time_taken))
#For per epoch_time = Total_Time / Number_of_epochs

Number of Epoch = 1 - loss:= 0.0369
Training Accuracy = 0.10154545307159424
Number of Epoch = 2 - loss:= 0.0369
Training Accuracy = 0.10143636167049408
Number of Epoch = 3 - loss:= 0.0369
Training Accuracy = 0.10152727365493774
Number of Epoch = 4 - loss:= 0.0369
Training Accuracy = 0.1035090908408165
Number of Epoch = 5 - loss:= 0.0369
Training Accuracy = 0.10170909017324448
Number of Epoch = 6 - loss:= 0.0369
Training Accuracy = 0.10221818089485168
Number of Epoch = 7 - loss:= 0.0369
Training Accuracy = 0.10167272388935089
Number of Epoch = 8 - loss:= 0.0369
Training Accuracy = 0.1019272729754448


KeyboardInterrupt: 