In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import math
import os
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.sparsity import keras as sparsity
import matplotlib.pyplot as plt
from tensorflow.keras import models, layers, datasets
from tensorflow.keras import utils
from tensorflow.keras.layers import Dense, Flatten, Reshape, Input, InputLayer
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.initializers import RandomNormal
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from load_MNIST import load_MNIST
from used_func import parameter_count,define_pruning_params,pruning_rounds,prune_network,encode_save_json,decode_json
from model import pruned_nn
import json

Modify the Strategy and the Pruning rate before starting the code

In [None]:
#Which round of the experiment that we are experimenting
# we will get the summary statistics of these values to report
#for compututional reasons we will make each round by hand unfortunately

strategy=1
exp_round=5
pruning_percentage=0.2


In [None]:

#Setting random seed
#in order to have reproducible results each round will have specific random seed

print('Starting the experiment...')

np.random.seed(41+exp_round)
tf.random.set_seed(41+exp_round)

## import the summary data to save our experiments result on

#first let's create the folder

data_dirname='Data'

if not os.path.exists(data_dirname):
    os.mkdir(data_dirname)
    print("Directory " , data_dirname ,  " Created ")
else:    
    print("Directory " , data_dirname ,  " already exists")

summary_data='summary_data.csv'

if os.path.exists('{}/{}'.format(data_dirname,summary_data)):
    # import the main Pandas document
    df_main=pd.read_csv('{}/{}'.format(data_dirname,summary_data))
    print('Summary Data Loaded Succesfully')
else:
    df_main=pd.DataFrame()
    print('Summary Data Created Succesfully')

# creating a folder for the original networks weight    
    
orig_dirname='Orig_weights'

if not os.path.exists(orig_dirname):
    os.mkdir(orig_dirname)
    print("Directory " , orig_dirname ,  " Created ")
else:    
    print("Directory " , orig_dirname ,  " already exists")


# loads, normalizes and preprocesses the MNIST dataset

print('Loading the data...')

X_train,y_train,X_test,y_test=load_MNIST()

#creating the validation data out of the training set

X_train, X_val, y_train,y_val=train_test_split(X_train,y_train,test_size=5000,stratify=y_train,random_state=42)

# Assigning some variables for the experiment

batch_size = 60 #according to the paper
epochs = 50 #this is the number of the training iterations in the original paper. However we will use early stopping
#to stop the training
num_classes = y_test.shape[1]

#patience is for the early stopping, if there is no mitigation in the validation loss, training will stop
patience=5
minimum_delta = 0.001

# input image dimensions

img_rows, img_cols = 28, 28


#Number of the units in the layers
first_layer=300
second_layer=100
layer_pruning_rounds=11 #It is possible to modify this, we choose for the 11 rounds

#Target sparsity for iterative pruning process. See the method section our paper
#this is modifiable as well.
target=0.036


#Creating the smaller network sizes

layers_model=list()
layers_model.append((first_layer,second_layer))
for i in range(layer_pruning_rounds):
    first_layer-=np.ceil(first_layer*0.2).astype(int)
    second_layer-=np.ceil(second_layer*0.2).astype(int)
    layers_model.append((first_layer,second_layer))

print('Network sizes for the experiments:')
print(layers_model)


# Create training and testing datasets for Tensorflow GradientTape
# Create training and testing datasets for Tensorflow
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))

train_dataset = train_dataset.shuffle(buffer_size = 55000, reshuffle_each_iteration = True).batch(batch_size = batch_size, drop_remainder = False)
val_dataset = val_dataset.batch(batch_size=batch_size, drop_remainder=False)
test_dataset = test_dataset.batch(batch_size=batch_size, drop_remainder=False)

layers = tf.keras.layers

# Specify the parameters to be used for layer-wise pruning, NO PRUNING is done here:
# This is necessary because we decided to use same model for every experiments
# so this unpruned network has to be defined
pruning_params_unpruned = define_pruning_params()

# This is necessary for the model to work with Tensorflow-Optimization API
callbacks = [
             sparsity.UpdatePruningStep(),

]


## Initialize the Lenet-300-100 model

#This model will be used to determine _super winning tickets_

print('Original Lenet 300-100 is training...')

lenet_model = pruned_nn(pruning_params_unpruned,first_layer=300,second_layer=100)

lenet_model_stripped = sparsity.strip_pruning(lenet_model)

# Two dictionary is used for every model in the notebook
# One for 1 epoch (1.000 iterations) that we decide early stopping to stop training
# One for 100 iterations to decide validation loss and test accuracy in the work

# Dictionary to hold scalar metrics-
history_lenet = {}

history_lenet['accuracy'] = np.zeros(epochs)
history_lenet['val_accuracy'] = np.zeros(epochs)
history_lenet['loss'] = np.zeros(epochs)
history_lenet['val_loss'] = np.zeros(epochs)

# Dictionary to hold scalar metrics-
history_lenet_batch = {}

history_lenet_batch['accuracy'] = np.zeros(epochs*10)
history_lenet_batch['val_accuracy'] = np.zeros(epochs*10)
history_lenet_batch['loss'] = np.zeros(epochs*10)
history_lenet_batch['val_loss'] = np.zeros(epochs*10)

lenet_model.save_weights("{}/Lenet_300_100_Randomly_Initialized_weights.h5".format(orig_dirname), overwrite=True)

# Instantiate a new neural network model for which, the mask is to be created,


mask_model_lenet = pruned_nn(pruning_params_unpruned,300,100)

# Load weights of the model-
mask_model_lenet.load_weights("{}/Lenet_300_100_Randomly_Initialized_weights.h5".format(orig_dirname))

# Strip the model of its pruning parameters-
mask_model_lenet_stripped = sparsity.strip_pruning(mask_model_lenet)

# In this setup all the values in the masks will be equal to the 1 because there is no pruning

for wts in mask_model_lenet_stripped.trainable_weights:
    wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))

## these tensoflow functions have to be defined again before usage everytime
## also the optimizers and loss function has to be reinitialized to reset them

# Choose an optimizer and loss function for training-
loss_fn = tf.keras.losses.CategoricalCrossentropy()
optimizer = tf.keras.optimizers.Adam(0.0012)

# Select metrics to measure the error and the accuracy of the model.

train_loss = tf.keras.metrics.Mean(name = 'train_loss')
train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')

val_loss = tf.keras.metrics.Mean(name = 'val_loss')
val_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'val_accuracy')

test_loss = tf.keras.metrics.Mean(name = 'test_loss')
test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')



@tf.function
def train_one_step(model, mask_model, optimizer, x, y):
    '''
    def train_step(data, labels):
    Function to compute one step of gradient descent optimization
    '''
    with tf.GradientTape() as tape:
        # Make predictions using defined model-
        y_pred = model(x)

        # Compute loss-
        loss = loss_fn(y, y_pred)

    # Compute gradients wrt defined loss and weights and biases-
    grads = tape.gradient(loss, model.trainable_variables)

    
    
    
    # List to hold element-wise multiplication between-
    # computed gradient and masks-
    grad_mask_mul = []

    # Perform element-wise multiplication between computed gradients and masks-
    for grad_layer, mask in zip(grads, mask_model.trainable_weights):
        grad_mask_mul.append(tf.math.multiply(grad_layer, mask))

    # Apply computed gradients to model's weights and biases-
    # optimizer.apply_gradients(zip(grads, model.trainable_variables))
    # normally we just apply the grads but here we use masked grads
    optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))

    # Compute loss and accuracy-
    train_loss(loss)
    train_accuracy(y, y_pred)



@tf.function
def val_step(model, optimizer, data, labels):
    """
    Function to val model performance
    on validation dataset
    """

    predictions = model(data)
    t_loss = loss_fn(labels, predictions)

    val_loss(t_loss)
    val_accuracy(labels, predictions)

@tf.function
def test_step(model, optimizer, data, labels):
    """
    Function to test model performance
    on testing dataset
    """

    predictions = model(data)
    t_loss = loss_fn(labels, predictions)

    test_loss(t_loss)
    test_accuracy(labels, predictions)


patience_val = np.zeros(patience)

#min_loss and patience_sofar is the variables to manually enable early stopping  

min_loss=10
patience_sofar=0

# these metrics are to enable calculations in every 100 iterations
hunderd_iter=[i*100-1 for i in range(1,11)]
iteration=0

for epoch in range(epochs):

    #to stop training when earlystopping criterion met
    if patience_sofar >= patience:
        print("\n\nEarlyStopping Evoked! Stopping training\n\n")
        break

    # Reset the metrics at the start of the next epoch
    train_loss.reset_states()
    train_accuracy.reset_states()
    #val_loss.reset_states()
    #val_accuracy.reset_states()

    for i, (x,y) in enumerate(train_dataset):
        train_one_step(lenet_model_stripped, mask_model_lenet_stripped, optimizer, x, y)
        if i in hunderd_iter:
            val_loss.reset_states()
            val_accuracy.reset_states()
            for x_t, y_t in val_dataset:
                val_step(lenet_model_stripped, optimizer, x_t, y_t)
            history_lenet_batch['accuracy'][iteration] = train_accuracy.result()
            history_lenet_batch['loss'][iteration] = train_loss.result()
            history_lenet_batch['val_loss'][iteration] = val_loss.result()
            history_lenet_batch['val_accuracy'][iteration] = val_accuracy.result()
            #we are saving the weights in every 100 iteration. So that we can load the weights of the minimum validation
            #loss and evaluate the test accuracy on the test set
            lenet_model_stripped.save_weights("{}/Lenet_300_100_Trained_weights_iter_{}.h5".format(orig_dirname,iteration),
                                              overwrite=True)

            iteration+=1
        else:
            continue


#    for x_t, y_t in val_dataset:
#        val_step(lenet_model_stripped, optimizer, x_t, y_t)


    #Printing the results after every epoch
    template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, val Loss: {3:.4f}, val Accuracy: {4:4f}'

    history_lenet['accuracy'][epoch] = train_accuracy.result()
    history_lenet['loss'][epoch] = train_loss.result()
    history_lenet['val_loss'][epoch] = val_loss.result()
    history_lenet['val_accuracy'][epoch] = val_accuracy.result()

    print(template.format(epoch + 1, 
                          train_loss.result(), train_accuracy.result()*100,
                          val_loss.result(), val_accuracy.result()*100))

    # Count number of non-zero parameters in each layer and in total-

    model_sum_params = 0

    for layer in lenet_model_stripped.trainable_weights:

        model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()

    print("Total number of trainable parameters = {0}\n".format(model_sum_params))

    #for computing the early stopping manually

    if (min_loss-history_lenet['val_loss'][epoch])>minimum_delta:
        min_loss=history_lenet['val_loss'][epoch]
    else:
        patience_sofar+=1



#to clear the zero values from dictionary to have better visualization

for metrics in history_lenet.keys():
        history_lenet[metrics] = np.resize(history_lenet[metrics], new_shape = epoch)
for metrics in history_lenet_batch.keys():
        history_lenet_batch[metrics] = np.resize(history_lenet_batch[metrics], new_shape = iteration)

# Evaluating the minimum validation loss and the corresponding accuracy  after the training
min_val_loss_iter=np.argmin(history_lenet_batch['val_loss'])
history_lenet_batch['min_iter']=int(min_val_loss_iter)

#we are getting the weights in the minimum validation loss
lenet_model_stripped.load_weights("{}/Lenet_300_100_Trained_weights_iter_{}.h5".format(orig_dirname,min_val_loss_iter))

#calculating the test accuracy
test_accuracy.reset_states()
for x_t, y_t in test_dataset:
    test_step(lenet_model_stripped, optimizer, x_t, y_t)
history_lenet_batch['test_accuracy']=float(test_accuracy.result())

print('Training of the original Lenet 300-100 is finished.')

#Experiment loop for the smaller networks starts here

print('Experiment for the smaller network sizes begins...')

for (first_layer,second_layer) in layers_model:
    print(first_layer,second_layer)

    # Instantiate random the NN-
    orig_model = pruned_nn(pruning_params_unpruned,first_layer=first_layer,second_layer=second_layer)

    # Save random initialized weights BEFORE pruning and training of NN
    # and BEFORE the model is STRIPPED-

    #first let's create the folder
    
    #actually we dont have to use different folders for the strategies. So it can be modified.
    dirName='Exp_Strategy_{}'.format(strategy)

    if not os.path.exists(dirName):
        os.mkdir(dirName)
        print("Directory " , dirName ,  " Created ")
    else:    
        print("Directory " , dirName ,  " already exists")

    orig_model.save_weights("{}/MNIST_{}_{}_Randomly_Initialized_weights.h5".format(dirName,first_layer,second_layer), overwrite=True)

    # Strip the pruning wrappers from UNPRUNED model-
    orig_model_stripped = sparsity.strip_pruning(orig_model)


    # Save random uninitialized weights BEFORE pruning of NN using STRIPPED model-
    orig_model_stripped.save_weights("{}/MNIST_{}_{}_Randomly_Initialized_weights_stripped.h5".format(dirName,first_layer,second_layer)
                                     , overwrite=True)

    #these functions calculates the parameters in the network
    orig_model_params=parameter_count('orig_model',orig_model)

    orig_model_stripped_params=parameter_count('orig_model_stripped',orig_model_stripped)

    print('orig model summary:')
    orig_model_stripped.summary()

    ## Funtion that computes the Iterative Pruning Rounds based on the pruning rate and the target sparsity


    prun_rates,num_prun_rounds=pruning_rounds(orig_model_stripped,pruning_percentage=pruning_percentage,target=target)

    ### Lets create a dictionary for the history of the models

    #one for the epochs
    # one for the batches


    
    history_main = {}

    
    for x in range(num_prun_rounds):
        history = {}

        # Neural Network model, scalar metrics-
        history['accuracy'] = np.zeros(shape = epochs)
        history['val_accuracy'] = np.zeros(shape = epochs)
        history['loss'] = np.zeros(shape = epochs)
        history['val_loss'] = np.zeros(shape = epochs)

        # compute % of weights pruned at the end of each iterative pruning round-
        history['percentage_wts_pruned'] = 0
        history['units_first_layer'] = first_layer
        history['units_second_layer'] = second_layer
        history['strategy'] = strategy
        history['patience'] = patience
        history['pruning_rate'] = pruning_percentage

        history_main[x + 1] = history



    history_main_batch = {}

    
    for x in range(num_prun_rounds):
        history = {}

        # Neural Network model, scalar metrics-
        history['accuracy'] = np.zeros(shape = epochs*10)
        history['val_accuracy'] = np.zeros(shape = epochs*10)
        history['loss'] = np.zeros(shape = epochs*10)
        history['val_loss'] = np.zeros(shape = epochs*10)

        # compute % of weights pruned at the end of each iterative pruning round-
        history['percentage_wts_pruned'] = 0
        history['units_first_layer'] = first_layer
        history['units_second_layer'] = second_layer
        history['strategy'] = strategy
        history['patience'] = patience
        history['pruning_rate'] = pruning_percentage

        history_main_batch[x + 1] = history


    ### 2: Train the unpruned Network until it converges

    #We will also use Gradient Tape here to be consistent with the Winning Ticket model

    #Early Stopping criterion has been used for training of the model


    ##these tensoflow functions have to be defined again before usage everytime
    # Choose an optimizer and loss function for training-
    loss_fn = tf.keras.losses.CategoricalCrossentropy()
    optimizer = tf.keras.optimizers.Adam(0.0012)

    # Select metrics to measure the error & accuracy of model.
    # These metrics accumulate the values over epochs and then
    # print the overall result-
    train_loss = tf.keras.metrics.Mean(name = 'train_loss')
    train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')

    val_loss = tf.keras.metrics.Mean(name = 'val_loss')
    val_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'val_accuracy')

    test_loss = tf.keras.metrics.Mean(name = 'test_loss')
    test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')

    @tf.function
    def train_one_step(model, mask_model, optimizer, x, y):
        '''
        def train_step(data, labels):
        Function to compute one step of gradient descent optimization
        '''
        with tf.GradientTape() as tape:
            # Make predictions using defined model-
            y_pred = model(x)

            # Compute loss-
            loss = loss_fn(y, y_pred)

        # Compute gradients wrt defined loss and weights and biases-
        grads = tape.gradient(loss, model.trainable_variables)

        # List to hold element-wise multiplication between-
        # computed gradient and masks-
        grad_mask_mul = []

        # Perform element-wise multiplication between computed gradients and masks-
        for grad_layer, mask in zip(grads, mask_model.trainable_weights):
            grad_mask_mul.append(tf.math.multiply(grad_layer, mask))

        # Apply computed gradients to model's weights and biases-
        # optimizer.apply_gradients(zip(grads, model.trainable_variables))
        # normally we just apply the grads but here we use masked grads
        optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))

        # Compute accuracy-
        train_loss(loss)
        train_accuracy(y, y_pred)



    @tf.function
    def val_step(model, optimizer, data, labels):
        """
        Function to val model performance
        on validation dataset
        """

        predictions = model(data)
        t_loss = loss_fn(labels, predictions)

        val_loss(t_loss)
        val_accuracy(labels, predictions)

    @tf.function
    def test_step(model, optimizer, data, labels):
        """
        Function to test model performance
        on testing dataset
        """

        predictions = model(data)
        t_loss = loss_fn(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)


    
    # Instantiate a new neural network model for which, the mask is to be created,

    mask_model_orig = pruned_nn(pruning_params_unpruned,first_layer,second_layer)

    # Load weights of PRUNED model-
    mask_model_orig.load_weights("{}/MNIST_{}_{}_Randomly_Initialized_weights.h5".format(dirName,first_layer,second_layer))

    # Strip the model of its pruning parameters-
    mask_model_orig_stripped = sparsity.strip_pruning(mask_model_orig)

    #for the original models as well we create mask with zeros because there will be no pruning

    for wts in mask_model_orig_stripped.trainable_weights:
        wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))

    # Dictionary to hold scalar metrics-
    history_orig = {}

    history_orig['accuracy'] = np.zeros(epochs)
    history_orig['val_accuracy'] = np.zeros(epochs)
    history_orig['loss'] = np.zeros(epochs)
    history_orig['val_loss'] = np.zeros(epochs)
    history_orig['percentage_wts_pruned'] = 0
    history_orig['units_first_layer'] = first_layer
    history_orig['units_second_layer'] = second_layer
    history_orig['strategy'] = strategy
    history_orig['patience'] = patience
    history_orig['pruning_rate'] = pruning_percentage

    # Dictionary to hold scalar metrics-
    history_orig_batch = {}

    history_orig_batch['accuracy'] = np.zeros(epochs*10)
    history_orig_batch['val_accuracy'] = np.zeros(epochs*10)
    history_orig_batch['loss'] = np.zeros(epochs*10)
    history_orig_batch['val_loss'] = np.zeros(epochs*10)
    history_orig_batch['percentage_wts_pruned'] = 0
    history_orig_batch['units_first_layer'] = first_layer
    history_orig_batch['units_second_layer'] = second_layer
    history_orig_batch['strategy'] = strategy
    history_orig_batch['patience'] = patience
    history_orig_batch['pruning_rate'] = pruning_percentage


    patience_val = np.zeros(patience)

    #min_loss and patience_sofar is the variables to manually enable early stopping  

    min_loss=10
    patience_sofar=0
    #To enable the saving the validation loss in every 100 iteration
    hunderd_iter=[i*100-1 for i in range(1,11)]
    iteration=0

    for epoch in range(epochs):

        #to stop training when earlystopping criterion met
        if patience_sofar >= patience:
            print("\n\nEarlyStopping Evoked! Stopping training\n\n")
            break

        # Reset the metrics at the start of the next epoch
        train_loss.reset_states()
        train_accuracy.reset_states()
        #val_loss.reset_states()
        #val_accuracy.reset_states()

        for i, (x,y) in enumerate(train_dataset):
            train_one_step(orig_model_stripped, mask_model_orig_stripped, optimizer, x, y)
            if i in hunderd_iter:
                val_loss.reset_states()
                val_accuracy.reset_states()
                for x_t, y_t in val_dataset:
                    val_step(orig_model_stripped, optimizer, x_t, y_t)
                history_orig_batch['accuracy'][iteration] = train_accuracy.result()
                history_orig_batch['loss'][iteration] = train_loss.result()
                history_orig_batch['val_loss'][iteration] = val_loss.result()
                history_orig_batch['val_accuracy'][iteration] = val_accuracy.result()
                #Saving the weights of the model in every 100 iteration. So that we can use the weights of the minimum
                #validation loss model to evaluate corresponding test accuracy after the training
                orig_model.save_weights("{}/MNIST_{}_{}_Trained_Weights_Unpruned_iter_{}.h5".format(dirName,first_layer,second_layer,iteration),
                            overwrite = True)

                iteration+=1
            else:
                continue


    #    for x_t, y_t in val_dataset:
    #        val_step(orig_model_stripped, optimizer, x_t, y_t)


        #printing the results after every epoch
        template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, val Loss: {3:.4f}, val Accuracy: {4:4f}'

        history_orig['accuracy'][epoch] = train_accuracy.result()
        history_orig['loss'][epoch] = train_loss.result()
        history_orig['val_loss'][epoch] = val_loss.result()
        history_orig['val_accuracy'][epoch] = val_accuracy.result()

        print(template.format(epoch + 1, 
                              train_loss.result(), train_accuracy.result()*100,
                              val_loss.result(), val_accuracy.result()*100))

        # Count number of non-zero parameters in each layer and in total-

        model_sum_params = 0

        for layer in orig_model_stripped.trainable_weights:

            model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()

        print("Total number of trainable parameters = {0}\n".format(model_sum_params))

        #for computing the early stopping manually

        if (min_loss-history_orig['val_loss'][epoch])>minimum_delta:
            min_loss=history_orig['val_loss'][epoch]
        else:
            patience_sofar+=1



    #to clear the zero values from dictionary to have better visualization


    for metrics in history_orig.keys():
            history_orig[metrics] = np.resize(history_orig[metrics], new_shape = epoch)
    for metrics in history_orig_batch.keys():
            history_orig_batch[metrics] = np.resize(history_orig_batch[metrics], new_shape = iteration)

    #for k,v in history_orig.items():
    #    history_orig[k]=np.where(v==0,np.nan,v)
    min_val_loss_iter=np.argmin(history_orig_batch['val_loss'])
    history_orig_batch['min_iter']=int(min_val_loss_iter)

    #we are getting the weights in the minimum validation loss
    orig_model.load_weights("{}/MNIST_{}_{}_Trained_Weights_Unpruned_iter_{}.h5".format(dirName,first_layer,second_layer,min_val_loss_iter))

    #calculating the test accuracy
    test_accuracy.reset_states()
    for x_t, y_t in test_dataset:
        test_step(orig_model_stripped, optimizer, x_t, y_t)
    history_orig_batch['test_accuracy']=float(test_accuracy.result())


    # We are saving the weights at the minimum validation loss to make pruning in next steps
    orig_model.save_weights("{}/MNIST_{}_{}_Trained_Weights_Unpruned.h5".format(dirName,first_layer,second_layer),
                            overwrite = True)

    # Here our iterative pruning loop start

    for i in range(len(prun_rates)):

        # we need to set the epoch again to inital value
        epoch=50

        #Based on the strategy selected, different pruning strategies will be used
        if strategy==1:
            print("strategy 1 is selected")
        # Instantiate a Neureal Network model to be pruned using parameters from above-
            pruned_model = pruned_nn(pruning_params_unpruned,first_layer,second_layer)

            
            #Loading the weights to be pruned.
            pruned_model.load_weights("{}/MNIST_{}_{}_Trained_Weights_Unpruned.h5".format(dirName,first_layer,second_layer))

            # Strip the pruning wrappers from pruned model-
            pruned_model_stripped = sparsity.strip_pruning(pruned_model)

            # Prun the weights
            prune_network(pruned_model_stripped,pruning_percentage=prun_rates[i])

            # Save weights of PRUNED and Trained model BEFORE stripping-
            pruned_model.save_weights("{}/MNIST_{}_{}_Weights_Pruned.h5".format(dirName,first_layer,second_layer), overwrite = True)

        else:
            print('strategy 2 is selected')
            pruning_params_pruned = define_pruning_params(target=prun_rates[i],end=-1)

            # Instantiate a Neureal Network model to be pruned using parameters from above-
            pruned_model = pruned_nn(pruning_params_pruned,first_layer,second_layer)

            
            #Loading the weights to be pruned
            pruned_model.load_weights("{}/MNIST_{}_{}_Trained_Weights_Unpruned.h5".format(dirName,first_layer,second_layer))

            # Train the NN to be pruned
            history_pruned = pruned_model.fit(
                x = X_train, y = y_train,
                batch_size = batch_size,
                epochs = 4, #since the model is already trained, TF website advises to train only for 4 epochs
                verbose = 1,
                callbacks=callbacks,
                #validation_data = (X_val, y_val),
                shuffle = True
            )


            # Save weights of PRUNED and Trained model BEFORE stripping-
            pruned_model.save_weights("{}/MNIST_{}_{}_Weights_Pruned.h5".format(dirName,first_layer,second_layer), overwrite = True)

            # Strip the pruning wrappers from pruned model-
            pruned_model_stripped = sparsity.strip_pruning(pruned_model)


        # we need to load the original weights to the model before we continue
        #because we need to reset the weights to their initial values

        orig_model_stripped.load_weights("{}/MNIST_{}_{}_Randomly_Initialized_weights_stripped.h5".format(dirName,first_layer,second_layer))

        # Sanity-check: confirm that p = 30% of the weights are actually pruned away from the network-

        #we are subtracting 410 because it is the number of the units in the neural net
        orig_model_stripped_params=parameter_count('orig_model_stripped',orig_model_stripped,verbose=0)
        pruned_model_params=parameter_count('pruned_model',pruned_model,verbose=0)

        print("\n% of weights pruned away = {0:.2f}%\n".format( \
            (orig_model_stripped_params - pruned_model_params) / orig_model_stripped_params * 100))
        history_main[i+1]['percentage_wts_pruned']=np.round((orig_model_stripped_params - pruned_model_params) / orig_model_stripped_params * 100,2)
        history_main_batch[i+1]['percentage_wts_pruned']=np.round((orig_model_stripped_params - pruned_model_params) / orig_model_stripped_params * 100,2)

        # Instantiate a new neural network model for which, the mask is to be created,

        mask_model = pruned_nn(pruning_params_unpruned,first_layer,second_layer)

        # Load weights of PRUNED model-
        mask_model.load_weights("{}/MNIST_{}_{}_Weights_Pruned.h5".format(dirName,first_layer,second_layer))

        # Strip the model of its pruning parameters-
        mask_model_stripped = sparsity.strip_pruning(mask_model)

        # For each layer, for each weight which is 0, leave it, as is.
        # And for weights which survive the pruning,reinitialize it to ONE (1)-

        for wts in mask_model_stripped.trainable_weights:
            wts.assign(tf.where(tf.equal(wts, 0.), 0., 1.))

        mask_model_stripped_params=parameter_count('mask_model_stripped',mask_model_stripped,verbose=0)

        # Instantiate a new neural network model for which, the weights are to be extracted

        winning_ticket_model = pruned_nn(pruning_params_unpruned,first_layer,second_layer)

        # Load weights of PRUNED model-

        winning_ticket_model.load_weights("{}/MNIST_{}_{}_Weights_Pruned.h5".format(dirName,first_layer,second_layer))

        # Strip the model of its pruning parameters-

        winning_ticket_model_stripped = sparsity.strip_pruning(winning_ticket_model)

        # For each layer, for each weight which is 0, leave it, as is. And for weights which survive the pruning,
        # reinitialize it to the value, the model received BEFORE it was trained and pruned-

        for orig_wts, pruned_wts in zip(orig_model_stripped.trainable_weights, winning_ticket_model_stripped.trainable_weights):
            pruned_wts.assign(tf.where(tf.equal(pruned_wts, 0), pruned_wts, orig_wts))

        winning_ticket_stripped_params=parameter_count('winning_ticket_model_stripped',winning_ticket_model_stripped,verbose=0)

        
        # Save the weights (with pruning parameters) extracted to a file-

        winning_ticket_model_stripped.save_weights("{}/MNIST_{}_{}_Winning_Ticket.h5".format(dirName,first_layer,second_layer), overwrite=True)

        #Instantiate a model for iterative pruning-
        model_gt = pruned_nn(pruning_params_unpruned,first_layer,second_layer)

        model_gt_stripped=sparsity.strip_pruning(model_gt)

        # Load weights of winning ticket-
        model_gt_stripped.load_weights("{}/MNIST_{}_{}_Winning_Ticket.h5".format(dirName,first_layer,second_layer))
        
        #printing to see where are we at the training actually.
        #it is not necessary
        print(model_gt_stripped.summary())

        # Choose an optimizer and loss function for training-
        loss_fn = tf.keras.losses.CategoricalCrossentropy()
        optimizer = tf.keras.optimizers.Adam(0.0012)

        # Select metrics to measure the error & accuracy of model.
        train_loss = tf.keras.metrics.Mean(name = 'train_loss')
        train_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'train_accuracy')

        val_loss = tf.keras.metrics.Mean(name = 'val_loss')
        val_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'val_accuracy')

        test_loss = tf.keras.metrics.Mean(name = 'test_loss')
        test_accuracy = tf.keras.metrics.CategoricalAccuracy(name = 'test_accuracy')


        @tf.function
        def train_one_step(model, mask_model, optimizer, x, y):
            '''
            def train_step(data, labels):
            Function to compute one step of gradient descent optimization
            '''
            with tf.GradientTape() as tape:
                # Make predictions using defined model-
                y_pred = model(x)

                # Compute loss-
                loss = loss_fn(y, y_pred)

            # Compute gradients wrt defined loss and weights and biases-
            grads = tape.gradient(loss, model.trainable_variables)

            
            # List to hold element-wise multiplication between-
            # computed gradient and masks-
            grad_mask_mul = []

            # Perform element-wise multiplication between computed gradients and masks-
            for grad_layer, mask in zip(grads, mask_model.trainable_weights):
                grad_mask_mul.append(tf.math.multiply(grad_layer, mask))

            # Apply computed gradients to model's weights and biases-
            # optimizer.apply_gradients(zip(grads, model.trainable_variables))
            # normally we just apply the grads but here we use masked grads
            optimizer.apply_gradients(zip(grad_mask_mul, model.trainable_variables))

            # Compute accuracy-
            train_loss(loss)
            train_accuracy(y, y_pred)

            

        @tf.function
        def val_step(model, optimizer, data, labels):
            """
            Function to val model performance
            on valing dataset
            """

            predictions = model(data)
            t_loss = loss_fn(labels, predictions)

            val_loss(t_loss)
            val_accuracy(labels, predictions)

        @tf.function
        def test_step(model, optimizer, data, labels):
            """
            Function to test model performance
            on testing dataset
            """

            predictions = model(data)
            t_loss = loss_fn(labels, predictions)

            test_loss(t_loss)
            test_accuracy(labels, predictions)

        ### Provide parameters for _EarlyStopping_

        #The parameters should be same with the original model


        patience_val = np.zeros(patience)

        #min_loss and patience_sofar is the variables to manually enable early stopping  

        min_loss=10
        patience_sofar=0

        #this is calculated to evaluate the validation loss in every 100 iterations
        hunderd_iter=[i*100-1 for i in range(1,11)]
        iteration=0


        for epoch in range(epochs):

            #to stop training when earlystopping criterion met
            if patience_sofar >= patience:
                print("\n\nEarlyStopping Evoked! Stopping training\n\n")
                break

            # Reset the metrics at the start of the next epoch
            train_loss.reset_states()
            train_accuracy.reset_states()
            #val_loss.reset_states()
            #val_accuracy.reset_states()

            for j, (x,y) in enumerate(train_dataset):
                train_one_step(model_gt_stripped, mask_model_stripped, optimizer, x, y)
                if j in hunderd_iter:
                    val_loss.reset_states()
                    val_accuracy.reset_states()
                    for x_t, y_t in val_dataset:
                        val_step(model_gt_stripped, optimizer, x_t, y_t)
                    history_main_batch[i+1]['accuracy'][iteration] = train_accuracy.result()
                    history_main_batch[i+1]['loss'][iteration] = train_loss.result()
                    history_main_batch[i+1]['val_loss'][iteration] = val_loss.result()
                    history_main_batch[i+1]['val_accuracy'][iteration] = val_accuracy.result()
                    #Saving the weights in every 100 iterations again.
                    model_gt.save_weights("{}/MNIST_{}_{}_Trained_Weights_gt_Unpruned_iter_{}.h5".format(dirName,first_layer,second_layer,iteration), overwrite=True)
                    iteration+=1
                else:
                    continue


    #        for x_t, y_t in val_dataset:

     #           val_step(model_gt_stripped, optimizer, x_t, y_t)
            #for printing the results after every epoch
            template = 'Epoch {0}, Loss: {1:.4f}, Accuracy: {2:.4f}, val Loss: {3:.4f}, val Accuracy: {4:4f}'

            history_main[i+1]['accuracy'][epoch] = train_accuracy.result()
            history_main[i+1]['loss'][epoch] = train_loss.result()
            history_main[i+1]['val_loss'][epoch] = val_loss.result()
            history_main[i+1]['val_accuracy'][epoch] = val_accuracy.result()

            print(template.format(epoch + 1, 
                                  train_loss.result(), train_accuracy.result()*100,
                                  val_loss.result(), val_accuracy.result()*100))

            # Count number of non-zero parameters in each layer and in total-

            model_sum_params = 0

            for layer in model_gt_stripped.trainable_weights:

                model_sum_params += tf.math.count_nonzero(layer, axis = None).numpy()

            print("Total number of trainable parameters = {0}\n".format(model_sum_params))

            #for computing the early stopping manually

            if (min_loss-history_main[i+1]['val_loss'][epoch])>minimum_delta:
                min_loss=history_main[i+1]['val_loss'][epoch]
            else:
                patience_sofar+=1




        #to clean the zero values from the dictionary

        for metrics in history_main[i+1].keys():
            history_main[i+1][metrics] = np.resize(history_main[i+1][metrics], new_shape = epoch)
        for metrics in history_main_batch[i+1].keys():
            history_main_batch[i+1][metrics] = np.resize(history_main_batch
                                                         [i+1][metrics], new_shape = iteration)

        min_val_loss_iter=np.argmin(history_main_batch[i+1]['val_loss'])
        history_main_batch[i+1]['min_iter']=int(min_val_loss_iter)

        #we are getting the weights in the minimum validation loss
        model_gt.load_weights("{}/MNIST_{}_{}_Trained_Weights_gt_Unpruned_iter_{}.h5".format(dirName,first_layer,second_layer,min_val_loss_iter))

        #calculating the test accuracy
        test_accuracy.reset_states()
        for x_t, y_t in test_dataset:
            test_step(model_gt_stripped, optimizer, x_t, y_t)
        history_main_batch[i+1]['test_accuracy']=float(test_accuracy.result())


        # Save weights of Winning Ticket (One Shot) trained with GradientTape WITH pruning parameters-
        model_gt.save_weights("{}/MNIST_{}_{}_Trained_Weights_Unpruned.h5".format(dirName,first_layer,second_layer), overwrite=True)

    ## lets check if there is actually winning tickets or not


    wt=0
    swt=0
    for i in range(1,len(prun_rates)+1):
        print('percentage pruned:{:.3f}'.format(history_main[i]['percentage_wts_pruned'][0]))
        orig_val_loss=min(history_orig_batch['val_loss'])
        round_orig=history_orig_batch['min_iter']
        acc_orig=history_orig_batch['test_accuracy']
        lenet_val_loss=min(history_lenet_batch['val_loss'])
        round_lenet=history_lenet_batch['min_iter']
        acc_lenet=history_lenet_batch['test_accuracy']


        print('orig min val_loss:{:.3f} iteration:{}, val_accuracy:{:.3f}'.format(orig_val_loss,round_orig,acc_orig))

        wt_val_loss=min(history_main_batch[i]['val_loss'])
        round_wt=history_main_batch[i]['min_iter']
        acc_wt=history_main_batch[i]['test_accuracy']
        print('wt min val_loss:{:.3f} iteration:{}, val_accuracy:{:.3f}'.format(wt_val_loss,round_wt,acc_wt))

        if round_wt<=round_orig and acc_wt>=acc_orig:
            history_main_batch[i]['winning_ticket']=1
            print('WINNING WICKET!')
            wt+=1
        else:
            history_main_batch[i]['winning_ticket']=0
            print('no winning ticket...')
        print()

        if round_wt<=round_lenet and acc_wt>=acc_lenet:
            history_main_batch[i]['super_winning_ticket']=1
            print('SUPER WINNING WICKET!')
            swt+=1
        else:
            history_main_batch[i]['super_winning_ticket']=0

        print()
    print('percentage of wt:{:.2f}'.format(wt/len(prun_rates)))
    print('percentage of swt:{:.2f}'.format(swt/len(prun_rates)))


    raw_dir='Raw_Data'

    if not os.path.exists(raw_dir):
        os.mkdir(raw_dir)
        print("Directory " , raw_dir ,  " Created ")
    else:    
        print("Directory " , raw_dir ,  " already exists")



    # What should be the name for the raw data? DEPENDS ON THE EXPERIMENT

    filename='{}/Str_{}_pat_{}_layers_{}_{}_round_{}.json'.format(raw_dir,strategy,patience,first_layer,second_layer,exp_round)

    encode_save_json(history_main_batch,filename)

    #create a new dataframe to inject the values

    df=pd.DataFrame()

    # just creating a copy 
    dic=history_main_batch.copy()


    #Calculating some metrics 
    for i in list(dic[1].keys())[5:10]:
        df[i]=[dic[1][i][0]]
    df['winning_ticket_percentage']=np.mean([dic[i]['winning_ticket'] for i in dic.keys()])
    df['super_winning_ticket_percentage']=np.mean([dic[i]['super_winning_ticket'] for i in dic.keys()])
    df['round']=exp_round

    #creating some colums for the data
    if df['winning_ticket_percentage'][0]>0.0:
        t=list()
        for i in dic.keys():
            if dic[i]['winning_ticket']==0:
                continue
            else:
                per=dic[i]['percentage_wts_pruned'][0]
                pos=np.argmin(dic[i]['val_loss'])
                loss=dic[i]['val_loss'][pos]
                acc=dic[i]['test_accuracy']
                t.append((per,pos*100,loss,acc))

        t.sort(key=lambda x:x[2])
        df['min_val_loss_per_pruned']=[t[0][0]]
        df['min_val_loss']=[t[0][2]]
        df['mean_val_loss']=np.mean([i[2] for i in t])
        df['min_val_loss_iteration']=[t[0][1]]
        df['mean_val_loss_iteration']=np.mean([i[1] for i in t])
        df['min_val_loss_acc']=[t[0][3]]
        df['mean_test_acc']=np.mean([i[3] for i in t])



    #append the values from this experiment to the main document
    df_main=df_main.append(df)

    #Update the main results document

    df_main.to_csv('{}/{}'.format(data_dirname,summary_data),index_label=False)

# here the loop ends

In [None]:
#we can see the results here
pd.read_csv('{}/{}'.format(data_dirname,summary_data))