In [1]:
ARCHITECTURE = 'VGG'
EXPERIMENT_TYPE = 'fixed-eps'
ITERATIONS = 10

In [2]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf

from helperfiles import *
import helperfiles.experiment as experiment

In [3]:
from __future__ import absolute_import, division, print_function, unicode_literals
import gc
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
from tensorflow.keras import layers
import tensorflow_datasets as tfds
from tqdm import tqdm
import matplotlib.pyplot as plt
import pathlib
import os
import random
from secrets import randbelow
import foolbox as fb
from datetime import datetime
from helperfiles.helpers import load_data, initialize_base_model, get_zeros_ratio, train_model, compile_model, bb0_attack, pgd_attack,cw2_attack, plot_hist

AUTOTUNE = tf.data.experimental.AUTOTUNE



def run(structure, method, scope, iterations, architecture, experiment_type):
    
    if architecture == 'ResNet' or architecture == 'VGG':
        ds_train, ds_test, attack_images, attack_labels = load_data("imagenette")
    if architecture == 'MLP' or architecture == 'CNN':
        ds_train, ds_test, attack_images, attack_labels = load_data("mnist")


    experiment_name = f'{experiment_type}-{architecture}-{method}-{scope}-{structure}'
    cols = ['iteration','experiment_name','structure','method','scope','pruning_ratio','accuracy','loss','pgd_linf','cw_l2','bb_l0', 'total_params', 'params_left']
    results = pd.DataFrame(columns=cols, dtype='object')
    pgd_success_rates = []
    cw_success_rates = []
    bb0_success_rates = []
    all_accuracies = []


    compression_rates = [tf.math.pow(2, x).numpy() for x in range(7)]
    pruning_ratios = [1-1/x for x in compression_rates]


    for j in tqdm(range(iterations)):
        accuracies = []
        pgd_success_rate = []
        cw_success_rate = []
        bb0_success_rate = []


        try: 
            
            del model
            gc.collect()
            print('deleted model')
        except:
            print('no model to delete')
            pass
        tf.keras.backend.clear_session()
        tf.compat.v1.reset_default_graph()
        gc.collect()

        model = initialize_base_model(architecture, ds_train, j ,experiment_name=experiment_name, lr=1e-3, )
        for index, pruning_ratio in tqdm(enumerate(pruning_ratios)):
            print(f'current pruning ratio is{pruning_ratio}, current iteration is {j}')
            gc.collect()
            if  method=='random' and scope=='global' and structure=='unstructured':
                model.prune_random_global_unstruct(pruning_ratio)
            elif  method=='random' and scope=='global' and structure=='structured':
                model.prune_random_global_struct(pruning_ratio)
            elif  method=='random' and scope=='local' and structure=='unstructured':
                model.prune_random_local_unstruct(pruning_ratio)
            elif  method=='random' and scope=='local' and structure=='structured':
                model.prune_random_local_struct(pruning_ratio)
            elif  method=='magnitude' and scope=='global' and structure=='unstructured':
                    model.prune_magnitude_global_unstruct(pruning_ratio)
            elif  method=='magnitude' and scope=='global' and structure=='structured':
                model.prune_magnitude_global_struct(pruning_ratio)
            elif  method=='magnitude' and scope=='local' and structure=='unstructured':
                model.prune_magnitude_local_unstruct(pruning_ratio)
            elif  method=='magnitude' and scope=='local' and structure=='structured':
                model.prune_magnitude_local_struct(pruning_ratio)
            else:
                raise ValueError("pruning method invalid")

            zeros_ratio, non_zeros, param_count = get_zeros_ratio(model)
            compile_model(architecture, model, lr=1e-3)
            
            hist = train_model(architecture, ds_train, ds_test, model, to_convergence=True)
            zeros_ratio, non_zeros, param_count = get_zeros_ratio(model)
            if architecture == 'ResNet' or architecture=='VGG':
                res = model.evaluate(ds_test,verbose=0)
            if architecture == 'CNN' or architecture=='MLP':
                res = model.evaluate(ds_test[0], ds_test[1],verbose=0)
            plot_hist(hist)


            if res[1] > .40:
                #pass
                bb0_success = bb0_attack(architecture, model, attack_images, attack_labels)
            else: 
                bb0_success = 'not successful'
            vals = {
                'iteration':j,
                'experiment_name':experiment_name,
                'structure':structure,
                'method':method,
                'scope':scope,
                'pruning_ratio':pruning_ratio,
                'accuracy':res[1],
                'loss':res[0],
                'pgd_linf':pgd_attack(architecture, model, attack_images, attack_labels),
                'cw_l2':cw2_attack(architecture, model, attack_images, attack_labels),
                'bb_l0':bb0_success,
                'total_params':param_count,
                'params_left':non_zeros
            }
            results = results.append(pd.DataFrame([vals], index=[0], dtype='object'))
            results.to_pickle(f'./final-results/{experiment_name}.pkl')
            results.to_csv(f'./final-results/{experiment_name}.csv', index=False)


    results.to_pickle(f'./final-results/{experiment_name}.pkl')
    results.to_csv(f'./final-results/{experiment_name}.csv', index=False)
   




In [16]:
run(
    
    structure='unstructured', 
    method='magnitude', 
    scope='local', 
    iterations=ITERATIONS,
    architecture = ARCHITECTURE,
    experiment_type = EXPERIMENT_TYPE,
)

  0%|          | 0/10 [00:00<?, ?it/s]

no model to delete


  0%|          | 0/10 [00:17<?, ?it/s]


ResourceExhaustedError:  OOM when allocating tensor with shape[128,512,28,28] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[node vg_g11/batch_normalization_4/FusedBatchNormV3 (defined at <ipython-input-8-0449ba79666c>:178) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
 [Op:__inference_train_function_1221732]

Errors may have originated from an input operation.
Input Source operations connected to node vg_g11/batch_normalization_4/FusedBatchNormV3:
 vg_g11/custom_conv_layer_4/Relu (defined at <ipython-input-8-0449ba79666c>:77)

Function call stack:
train_function


In [8]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
from tensorflow.keras import layers
import tensorflow_datasets as tfds
from tqdm import tqdm
import matplotlib.pyplot as plt
import pathlib
import os
import random
from secrets import randbelow
import foolbox as fb
from datetime import datetime

import helperfiles.helpers as helpers

shapes = {

    'conv_1': (3, 3, 3, 64),
    'conv_2': (3, 3, 64, 128),
    'conv_3': (3, 3, 128, 256),
    'conv_4': (3, 3, 256, 256),
    'conv_5': (3, 3, 256, 512),
    'conv_6': (3, 3, 512, 512),
    'conv_7': (3, 3, 512, 512),
    'conv_8': (3, 3, 512, 512),
    'dense_1': (7*7*512, 4096),
    'dense_2': (4096, 1024),
    'dense_3': (1024, 10),
}

#conv2D with bias and relu activation

class CustomConvLayer(layers.Layer):

    def __init__(self, shape, bias, stride, padding='SAME'):
        
        #super(CustomConvLayer, self).__init__()
        #self.w = weights
        #self.m = mask
        #self.b = biases
        #self.s = strides
        #self.p = padding
        #self.bn = layers.BatchNormalization()
        super(CustomConvLayer, self).__init__()
        self.bias = bias
        self.w = self.add_weight(
            shape=shape,
            initializer='glorot_uniform',
            trainable=True,
            name='w'
        )
        self.m = self.add_weight(
            shape=shape,
            initializer='ones',
            trainable=False,
            name='m'
        )
        if self.bias==True:
            self.b = self.add_weight(
                shape=shape[-1],
                initializer='zeros',
                trainable=True,
                name='b'
            )
        self.s = stride
        self.p = padding
        
    def call(self, inputs):
        x = tf.nn.conv2d(inputs, tf.multiply(self.w, self.m), strides=[1, self.s, self.s, 1], padding=self.p,)
        if self.bias == True:
            x = tf.nn.bias_add(x, self.b)
        #x = self.bn(x)
        return tf.nn.relu(x)
        

#Average Pooling Layer
class CustomPoolLayer(layers.Layer):
    
    def __init__(self, k=2, padding='SAME'):#padding='VALID'):
        super(CustomPoolLayer, self).__init__()
        self.k = k
        self.p = padding
    
    def call(self, inputs):
        return tf.nn.max_pool2d(inputs, ksize=[1, self.k, self.k, 1], strides=[1, self.k, self.k, 1], padding=self.p)
    
#Dense Layer with Bias
class CustomDenseLayer(layers.Layer):
    def __init__(self, shape, bias, activation = 'relu'):
        super(CustomDenseLayer, self).__init__()
        self.bias = bias
        self.w = self.add_weight(
            shape = shape,
            initializer='random_normal',
            trainable = True,
            name='w'
        )
        self.m = self.add_weight(
            shape = shape,
            initializer='ones',
            trainable = False,
            name='m'
        )
        if self.bias == True:
            self.b = self.add_weight(
                shape = (shape[-1]),
                initializer = 'zeros',
                trainable = True,
                name='b'
            )
        self.a = activation
        
        
    def call(self, inputs):
        x = tf.matmul(inputs, tf.multiply(self.w, self.m))
        if self.bias == True:
            x = tf.nn.bias_add(x, self.b)
        if self.a == 'relu':
            return tf.nn.tanh(x)
        if self.a == 'softmax':
            return tf.nn.softmax(x)
        
class VGG11(tf.keras.Model):
    def __init__(self):
        super(VGG11, self).__init__()
        self.conv1 = CustomConvLayer(shapes['conv_1'], False, 1,)
        self.maxpool1 = CustomPoolLayer(k=2)
        self.bn1 = layers.BatchNormalization()
        self.conv2 = CustomConvLayer(shapes['conv_2'], False, 1,)
        self.maxpool2 = CustomPoolLayer(k=2)
        self.bn2 = layers.BatchNormalization()
        self.conv3 = CustomConvLayer(shapes['conv_3'], False, 1,)
        self.bn3 = layers.BatchNormalization()
        self.conv4 = CustomConvLayer(shapes['conv_4'], False, 1,)
        self.maxpool3 = CustomPoolLayer(k=2)
        self.bn4 = layers.BatchNormalization()
        self.conv5 = CustomConvLayer(shapes['conv_5'], False, 1,)
        self.bn5 = layers.BatchNormalization()
        self.conv6 = CustomConvLayer(shapes['conv_6'], False, 1,)
        self.maxpool4 = CustomPoolLayer(k=2)
        self.bn6 = layers.BatchNormalization()
        self.conv7 = CustomConvLayer(shapes['conv_7'], False, 1,)
        self.bn7 = layers.BatchNormalization()
        self.conv8 = CustomConvLayer(shapes['conv_8'], False, 1,)
        self.maxpool5 = CustomPoolLayer(k=2)
        self.bn8 = layers.BatchNormalization()
        self.dense1 = CustomDenseLayer(shapes['dense_1'], True, 'relu')
        #self.bn9 = layers.BatchNormalization()
        self.dense2 = CustomDenseLayer(shapes['dense_2'], True, 'relu')
        #self.bn10 = layers.BatchNormalization()
        self.dense3 = CustomDenseLayer(shapes['dense_3'], True, 'softmax')
        self.conv_layers = [0, 6, 12, 18, 24, 30, 36, 42]
        self.conv_masks = [1, 7, 13, 19, 25, 31, 37, 43]
        self.dense_layers = [48, 51, 54]
        self.dense_masks = [50, 53, 56]
        #self.conv_layers = []
        #self.conv_masks = []
        #self.dense_layers = []
        #self.dense_masks = []
        
    def call(self, inputs, training=False):
        x = self.conv1(inputs)
        x = self.maxpool1(x)
        x = self.bn1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.bn2(x)
        x = self.conv3(x)
        x = self.bn3(x)
        x = self.conv4(x)
        x = self.maxpool3(x)
        x = self.bn4(x)
        x = self.conv5(x)
        x = self.bn5(x)
        x = self.conv6(x)
        x = self.maxpool4(x)
        x = self.bn6(x)
        x = self.conv7(x)
        x = self.bn7(x)
        x = self.conv8(x)
        x = self.maxpool5(x)
        x = self.bn8(x)
        x = layers.Flatten()(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.dense3(x)
        return x
    def prune_random_local_unstruct(self, ratio):
        def prune_conv_layers_locally(self, ratio):
            weights = self.get_weights()
            for i, layer in enumerate(self.conv_layers):
                #shape = 3,3,64,128
                converted_weights = convert_from_hwio_to_iohw(weights[layer]).numpy()
                converted_mask = convert_from_hwio_to_iohw(weights[self.conv_masks[i]]).numpy()
                #shape = 128,64, 3,3
                layer_shape = weights[layer].shape
                flat_masks = converted_mask.flatten()
                no_of_weighs_to_prune = int(np.round(ratio * len(flat_weights)))
                non_zero_weights = np.nonzero(flat_weights)[0]
                no_of_weights_to_prune_left = int(no_of_weighs_to_prune - (len(flat_weights) - len(non_zero_weights)) )
                random.shuffle(non_zero_weights)
                indices_to_delete = non_zero_weights[:no_of_weights_to_prune_left]
                for idx_to_delete in indices_to_delete:
                    flat_masks[idx_to_delete] = 0
                    flat_weights[idx_to_delete] = 0
                converted_mask = flat_masks.reshape(layer_shape)
                converted_weights = flat_weights.reshape(layer_shape)
                back_converted_mask = convert_from_iohw_to_hwio(converted_mask)
                back_converted_weights = convert_from_iohw_to_hwio(converted_weights)
                weights[layer] = back_converted_weights
                weights[self.conv_masks[i]] = back_converted_mask
            self.set_weights(weights)
            return True
            
        
        def prune_dense_layers_locally(self, ratio):
            weights = self.get_weights()
#            for index, weight in enumerate(weights):
            for i, layer in enumerate(self.dense_layers):
#                if index in dense_layer_to_prune:
                    shape = weights[layer].shape
                    flat_weights = weights[layer].flatten()
                    flat_mask = weights[self.dense_masks[i]].flatten()
                    no_of_weighs_to_prune = int(np.round(ratio * len(flat_weights)))
                    # find unpruned weights
                    non_zero_weights = np.nonzero(flat_mask)[0]
                    # calculate the amount of weights to be pruned this round
                    no_of_weights_to_prune_left = int(no_of_weighs_to_prune - (len(flat_weights) - len(non_zero_weights)) )
                    # shuffle all non-zero weights
                    random.shuffle(non_zero_weights)
                    # and take the indices of the first x weights where x is the number of weights to be pruned this round
                    indices_to_delete = non_zero_weights[:no_of_weights_to_prune_left]
                    for idx_to_delete in indices_to_delete:
                        flat_mask[idx_to_delete] = 0
                        flat_weights[idx_to_delete] = 0

                    mask_reshaped = flat_mask.reshape(shape)
                    weights_reshaped = flat_weights.reshape(shape)
                    weights[self.dense_masks[i]] = mask_reshaped
                    weights[layer] = weights_reshaped
            self.set_weights(weights)
            return weights
        weights = prune_conv_layers_locally(self, ratio)
        weights = prune_dense_layers_locally(self,ratio)
        return True
    
    def prune_magnitude_global_unstruct(self, ratio):

        weights = self.get_weights()
        flat_weights = []
        flat_mask = []
        all_masks = self.conv_masks + self.dense_masks
        for i, x in enumerate(self.conv_layers + self.dense_layers):
            flat_weights = np.append(flat_weights, weights[x].flatten())
            flat_mask = np.append(flat_mask, weights[all_masks[i]].flatten())
            
        no_of_weights_to_prune = int(np.round(len(flat_weights)*ratio))
        #print('total weights',len(flat_weights))
        #print('weights to prune w/o round',int(len(flat_weights)*ratio))
        #print('weights to prune with round',int(np.round(len(flat_weights)*ratio)))
        indices_to_delete = np.abs(flat_weights).argsort(0)[:no_of_weights_to_prune]
        
        for idx_to_delete in indices_to_delete:
            flat_mask[idx_to_delete] = 0
            flat_weights[idx_to_delete] = 0
        z = 0
        for i, x in enumerate(self.conv_layers + self.dense_layers):
            weights[x] = flat_weights[z:z + np.prod(weights[x].shape)].reshape(weights[x].shape)
            weights[all_masks[i]] = flat_mask[z:z + np.prod(weights[x].shape)].reshape(weights[x].shape)
            z = z + np.prod(weights[x].shape)            
        self.set_weights(weights)
        return True
    
    
    def prune_random_local_struct(self, ratio, prune_dense_layers=False):
        def prune_conv_layers(self, ratio):
            weights = self.get_weights()
            for i, layer in enumerate(self.conv_layers):

                vals = []
                iohw_weights = convert_from_hwio_to_iohw(weights[layer])
                iohw_mask = convert_from_hwio_to_iohw(weights[self.conv_masks[i]])
                converted_shape = iohw_weights.shape
                no_of_channels = converted_shape[0]*converted_shape[1]
                no_of_channels_to_prune = int(np.round(ratio * no_of_channels))
                channels = tf.reshape(iohw_weights, (no_of_channels,converted_shape[2],converted_shape[3])).numpy()
                #print(channels)
                non_zero_channels = np.nonzero([np.sum(channel) for channel in channels])[0]
                #print(non_zero_channels)
                no_of_channels_to_prune_left = no_of_channels_to_prune - (len(channels) - len(non_zero_channels))
                random.shuffle(non_zero_channels)
                channels_to_prune = non_zero_channels[:no_of_channels_to_prune_left]
                mask = tf.reshape(iohw_mask, 
                                  (no_of_channels,converted_shape[2],converted_shape[3])).numpy()

                for channel_to_prune in channels_to_prune:
                    channels[channel_to_prune] = tf.zeros([converted_shape[2],converted_shape[3]])
                    mask[channel_to_prune] = tf.zeros([converted_shape[2],converted_shape[3]])

                reshaped_mask = tf.reshape(mask, converted_shape)
                reshaped_weights = tf.reshape(channels, converted_shape)
                weights[layer] = convert_from_iohw_to_hwio(reshaped_weights)
                weights[self.conv_masks[i]] = convert_from_iohw_to_hwio(reshaped_mask)
            self.set_weights(weights)
            return True
        def prune_dense_layers(self, ratio):
            weights = self.get_weights()
            for i, layer_to_prune in enumerate(self.dense_layers):
                rows = weights[layer_to_prune]
                no_of_rows_to_prune = int(np.round(ratio * len(weights[layer_to_prune])))
                non_zero_rows = np.nonzero([np.sum(row) for row in rows])[0]
                no_of_rows_to_prune_left = no_of_rows_to_prune - (len(rows) - len(non_zero_rows))
                random.shuffle(non_zero_rows)
                rows_to_prune = non_zero_rows[:no_of_rows_to_prune_left]
                
                for row_to_prune in rows_to_prune:
                    weights[layer_to_prune][row_to_prune] = tf.zeros(len(weights[layer_to_prune][row_to_prune]))
                    weights[self.dense_masks[i]][row_to_prune] = tf.zeros(len(weights[layer_to_prune][row_to_prune]))
            self.set_weights(weights)
            return True
        prune_conv_layers(self, ratio)
        if prune_dense_layers==True:
            prune_dense_layers(self, ratio)
        
        return True

    def prune_random_global_struct(self, ratio, prune_dense_layers=False):
        raise Warning('Not yet implemented')
        return False
    
    def prune_magnitude_local_struct(self, ratio, structure='channel'):
        def prune_filters(self, ratio):
            weights = self.get_weights()
            for i, x in enumerate(self.conv_layers):
                # shape = (3,3,64,128)
                vals = []
                oihw_weights = convert_from_hwio_to_oihw(weights[x])
                oihw_mask = convert_from_hwio_to_oihw(weights[self.conv_masks[i]])
                # shape = (128,64,3,3)
                converted_shape = oihw_weights.shape
                no_of_filters = converted_shape[0]
                no_of_filters_to_prune = int(np.round(ratio * no_of_channels))
                for single_filter in oihw_weights:
                    #shape of single_filter = (64,3,3)
                    vals.append(tf.math.reduce_sum(tf.math.abs(single_filter)))
                filters_to_prune = np.argsort(vals)[:no_of_channels_to_prune]

                for filters_to_prune in no_of_filters_to_prune:
                    oihw_weights[filters_to_prune] = tf.zeros([converted_shape[1], converted_shape[2], converted_shape[3]])
                    mask[channel_to_prune] = tf.zeros([converted_shape[1], converted_shape[2], converted_shape[3]])

                 # shape = (128,64,3,3)
                weights[x] = convert_from_oihw_to_hwio(oihw_weights)
                weights[self.conv_masks[i]] = convert_from_oihw_to_hwio(mask)
                 # shape = (64,128,3,3)
            self.set_weights(weights)
            return weights
        
        def prune_channels(self, ratio):
            weights = self.get_weights()
            for i, x in enumerate(self.conv_layers):
                # shape = (3,3,64,128)
                vals = []
                iohw_weights = convert_from_hwio_to_iohw(weights[x])
                iohw_mask = convert_from_hwio_to_iohw(weights[self.conv_masks[i]])
                # shape = (64,128,3,3)
                converted_shape = iohw_weights.shape
                no_of_channels = converted_shape[0]*converted_shape[1]
                no_of_channels_to_prune = int(np.round(ratio * no_of_channels))
                channels = tf.reshape(iohw_weights, (no_of_channels,converted_shape[2],converted_shape[3])).numpy()
                mask = tf.reshape(iohw_mask, (no_of_channels,converted_shape[2],converted_shape[3])).numpy()
                # shape = (8192,3,3)
                for channel in channels:
                    vals.append(tf.math.reduce_sum(tf.math.abs(channel)))
                channels_to_prune = np.argsort(vals)[:no_of_channels_to_prune]

                for channel_to_prune in channels_to_prune:
                    channels[channel_to_prune] = tf.zeros([converted_shape[2],converted_shape[3]])
                    mask[channel_to_prune] = tf.zeros([converted_shape[2],converted_shape[3]])

                reshaped_mask = tf.reshape(mask, converted_shape)
                reshaped_weights = tf.reshape(channels, converted_shape)
                weights[x] = convert_from_iohw_to_hwio(reshaped_weights)
                weights[self.conv_masks[i]] = convert_from_iohw_to_hwio(reshaped_mask)
            self.set_weights(weights)
            return weights
        def prune_dense_layers(self, ratio):
            weights = self.get_weights()
            for i, layer_to_prune in enumerate(self.dense_layers):
                no_of_rows_to_prune = int(np.round(ratio * len(weights[layer_to_prune])))
                vals = []
                for row in weights[layer_to_prune]:
                    vals.append(np.sum(np.abs(row)))
                rows_to_prune = np.argsort(vals)[:no_of_rows_to_prune]
                for row_to_prune in rows_to_prune:
                    weights[layer_to_prune][row_to_prune] = tf.zeros(len(weights[layer_to_prune][row_to_prune]))
                    weights[self.dense_masks[i]][row_to_prune] = tf.zeros(len(weights[layer_to_prune][row_to_prune]))
            self.set_weights(weights)
            return weights
        
        if structure == 'channel':
            prune_channels(self,ratio)
        if structure == 'filter':
            prune_filter(self,ratio)
        
        if prune_dense_layers==True:
            prune_dense_layers(self, ratio)
        self.set_weights(weights)
        return True
    
    
        
    def prune_magnitude_global_struct(self, ratio, prune_dense_layers=False,structure='channel'
                                     ):
        def prune_filters(self, ratio):
            weights = self.get_weights()
            all_filters = []
            all_masks = []
            vals = []
            for i, layer_to_prune in enumerate(self.conv_layers):
                # convert from e.g. (3,3,64,128) to (128,64,3,3)
                oihw_weights = convert_from_hwio_to_oihw(weights[layer_to_prune])
                oihw_mask = convert_from_hwio_to_oihw(weights[self.conv_masks[i]])
                converted_shape = oihw_weights.shape
                no_of_filters = converted_shape[0]
                
                #calculate average magnitude for each filter
                vals = vals + [np.sum(np.abs(single_filter)) / np.prod(single_filter.shape) for single_filter in oihw_weights]
                all_filters = list(all_filters) +  list(oihw_weights)
                all_masks = list(all_masks) + list(oihw_mask)
            no_of_filters_to_prune = int(np.round(ratio * len(vals)))
            filters_to_prune = np.argsort(vals)[:no_of_channels_to_prune]
            
            for filter_to_prune in filters_to_prune:
                all_filters[filter_to_prune] = tf.zeros(all_filters[filter_to_prune].shape) 
                all_masks[filter_to_prune] = tf.zeros(all_filters[filter_to_prune].shape) 
            
            z = 0
            for i, layer_to_prune in enumerate(self.conv_layers):
                original_shape = convert_from_hwio_to_oihw(weights[layer_to_prune]).shape
                pruned_layer = tf.reshape(all_filters[z:z + original_shape[0]], original_shape)
                pruned_mask = tf.reshape(all_masks[z:z + original_shape[0]], original_shape)
                weights[layer_to_prune] = convert_from_oihw_to_hwio(pruned_layer)
                weights[self.conv_masks[i]] = convert_from_oihw_to_hwio(pruned_mask)
                z = z + original_shape[0]
            self.set_weights(weights)
            return weights
        
        def prune_channels(self, ratio):
            weights = self.get_weights()
            all_channels = []
            all_masks = []
            vals = []
            for layer_to_prune in self.conv_layers:
                # convert from e.g. (3,3,1,6) to (1,6,3,3)
                iohw_weights = convert_from_hwio_to_iohw(weights[layer_to_prune])
                converted_shape = iohw_weights.shape
                no_of_channels = converted_shape[0]*converted_shape[1]
                #convert from (1,6,3,3) to (6,3,3)
                channels = tf.reshape(iohw_weights, (no_of_channels,converted_shape[2],converted_shape[3])).numpy()
                mask = np.ones((no_of_channels,converted_shape[2],converted_shape[3]))
                #calculate average magnitude for each filter
                vals = vals + [np.sum(np.abs(channel)) / np.prod(channel.shape) for channel in channels]
                all_channels = list(all_channels) +  list(channels)
                all_masks = list(all_masks) + list(mask)
            no_of_channels_to_prune = int(np.round(ratio * len(vals)))
            channels_to_prune = np.argsort(vals)[:no_of_channels_to_prune]
            
            for channel_to_prune in channels_to_prune:
                all_channels[channel_to_prune] = tf.zeros(all_channels[channel_to_prune].shape) 
                all_masks[channel_to_prune] = tf.zeros(all_channels[channel_to_prune].shape) 
            
            z = 0
            for i, layer_to_prune in enumerate(self.conv_layers):
                original_shape = convert_from_hwio_to_iohw(weights[layer_to_prune]).shape
                pruned_layer = tf.reshape(all_channels[z:z + original_shape[0]*original_shape[1]], original_shape)
                pruned_mask = tf.reshape(all_masks[z:z + original_shape[0]*original_shape[1]], original_shape)
                weights[layer_to_prune] = convert_from_iohw_to_hwio(pruned_layer)
                weights[self.conv_masks[i]] = convert_from_iohw_to_hwio(pruned_mask)
                z = z + original_shape[0]*original_shape[1]
            self.set_weights(weights)
            return weights
        
        def prune_dense_layers(self, ratio):
            weights = self.get_weights()
            vals = []
            lengths = []
            for layer_to_prune in self.dense_layers:
                lengths.append(weights[layer_to_prune].shape[0])
                vals = vals + [np.sum(np.abs(row)) / len(row) for row in weights[layer_to_prune]]
            no_of_rows_to_prune = int(np.round(ratio * len(vals)))
            rows_to_prune = np.argsort(vals)[:no_of_rows_to_prune]
            for i, layer_to_prune in enumerate(self.dense_layers):
                for row_to_prune in rows_to_prune:
                    if row_to_prune in range(int(np.sum(lengths[:i])), int(np.sum(lengths[:i+1]))):
                        weights[layer_to_prune][row_to_prune - int(np.sum(lengths[:i]))] = tf.zeros(weights[layer_to_prune][row_to_prune - int(np.sum(lengths[:i]))].shape)
                        
                        weights[self.dense_masks[i]][row_to_prune - int(np.sum(lengths[:i]))] = tf.zeros(weights[layer_to_prune][row_to_prune - int(np.sum(lengths[:i]))].shape)                
            self.set_weights(weights)        
            return weights
        if structure == 'filter':
            prune_filters(self, ratio)
        if structure == 'channel':
            prune_channels(self, ratio)
        
        if prune_dense_layers==True:
            prune_dense_layers(self, ratio)

        return True
    
    
    def prune_magnitude_local_unstruct(self, ratio):
        
        def prune_conv_layers_locally(self, ratio):
            weights = self.get_weights()
            for layer_index, layer in enumerate(self.conv_layers):
                #shape = 3,3,64,128
                converted_weights = convert_from_hwio_to_iohw(weights[layer]).numpy()
                converted_mask = convert_from_hwio_to_iohw(weights[self.conv_masks[layer_index]]).numpy()
                #shape = 128,64, 3,3
                layer_shape = converted_weights.shape
                flat_weights = converted_weights.flatten()
                flat_masks = converted_mask.flatten()
                no_of_weights_to_prune = int(np.round(ratio * len(flat_weights)))
                indices_to_delete = np.abs(flat_weights).argsort(0)[:no_of_weights_to_prune]
                for idx_to_delete in indices_to_delete:
                    flat_masks[idx_to_delete] = 0
                    flat_weights[idx_to_delete] = 0
                converted_mask = flat_masks.reshape(layer_shape)
                converted_weights = flat_weights.reshape(layer_shape)
                back_converted_mask = convert_from_iohw_to_hwio(converted_mask)
                back_converted_weights = convert_from_iohw_to_hwio(converted_weights)
                weights[layer] = back_converted_weights
                weights[self.conv_masks[layer_index]] = back_converted_mask
            self.set_weights(weights)
            return weights
        
        def prune_dense_layers_locally(self, ratio):
            weights = self.get_weights()
            for index, layer in enumerate(self.dense_layers):
                shape = weights[layer].shape
                flat_weights = weights[layer].flatten()
                flat_mask = weights[self.dense_masks[index]].flatten()

                no_of_weights_to_prune = int(np.round(len(flat_weights)*ratio))
                indices_to_delete = np.abs(flat_weights).argsort()[:no_of_weights_to_prune]
                for idx_to_delete in indices_to_delete:
                    flat_mask[idx_to_delete] = 0
                    flat_weights[idx_to_delete] = 0
                mask_reshaped = flat_mask.reshape(shape)
                weights_reshaped = flat_weights.reshape(shape)
                weights[self.dense_masks[index]] = mask_reshaped
                weights[layer] = weights_reshaped
            self.set_weights(weights)
            return weights
        
        prune_conv_layers_locally(self,ratio)
        prune_dense_layers_locally(self,ratio)
        return True
    
    def find_layers_and_masks(self):
        if len(self.conv_layers) != 0:
            return True
        for i, w in enumerate(self.get_weights()):
            print(i ,'/', len(self.get_weights()))
            if len(w.shape) == 4 and w.shape[0] != 1: 
                if np.all([x == 0 or x == 1 for x in w.flatten()[:100]]) == False: 
                    self.conv_layers.append(i)
                else:
                    self.conv_masks.append(i)
            if len(w.shape) == 2: 
                if np.all([x == 0 or x == 1 for x in w.flatten()[:100]]) == False: 
                    self.dense_layers.append(i)
                else:
                    self.dense_masks.append(i)
        return True
        

In [9]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
from tensorflow.keras import layers
import tensorflow_datasets as tfds
from tqdm import tqdm
import matplotlib.pyplot as plt
import pathlib
import os
import random
from secrets import randbelow
import foolbox as fb
from datetime import datetime

import helperfiles.vgg11 as vgg
import helperfiles.resnet as resnet
import helperfiles.mlp as mlp
import helperfiles.cnn as cnn

AUTOTUNE = tf.data.experimental.AUTOTUNE


def load_data(dataset,ratio='100%'):

    def augment(image,label):
        #image = tf.image.convert_image_dtype(image, tf.float32)
        #image = tf.image.rot90(image, tf.random.uniform(shape=[], minval=0, maxval=4, dtype=tf.int32)) # random rotation
        image = tf.image.random_flip_left_right(image)
        #image = tf.image.random_flip_up_down(image)
        #image = tf.image.random_hue(image, 0.08)
        #image = tf.image.random_saturation(image, 0.6, 1.6)
        #image = tf.image.random_contrast(image, 0.7, 1.3)
        #image = tf.image.random_brightness(image, max_delta=0.5) # Random brightness
        image = tf.image.resize_with_crop_or_pad(image, 224+60, 224+60) # Add 60 pixels of padding
        image = tf.image.random_crop(image, size=[224,224,3]) # Random crop back to 28x28
        return image,label
    
    @tf.function
    def load_image(datapoint):
        input_image, label = normalize(datapoint)
        return input_image, label
       
    if dataset=='mnist':
        
        ds, info = tfds.load(name=dataset, with_info=True, split=[f"train[:{ratio}]",f"test[:{ratio}]"])
        ds_train=ds[0]
        ds_test=ds[1]
        
        def normalize(x):
            y = {'image': tf.image.convert_image_dtype(x['image'], tf.float32), 'label': x['label']}
            y = (tf.reshape(y['image'],(28*28,1)), y['label'])
            return y
        ds_test = list(ds_test.map(load_image))
        ds_train = list(ds_train.map(load_image))

        x_train = tf.convert_to_tensor([sample[0] for sample in ds_train])
        y_train = tf.convert_to_tensor([sample[1] for sample in ds_train])
        x_test = tf.convert_to_tensor([sample[0] for sample in ds_test])
        y_test = tf.convert_to_tensor([sample[1] for sample in ds_test])

        return [x_train, y_train], [x_test, y_test], x_test[:1000], y_test[:1000]
        
    if dataset=='imagenette':
        ds, info = tfds.load(name=dataset, with_info=True, split=[f"train[:{ratio}]",f"validation[:{ratio}]"])
        
        ds_train=ds[0]
        ds_test=ds[1]
        def normalize(x):
            y = {'image': tf.image.convert_image_dtype(x['image'], tf.float32), 'label': x['label']}
            y = (tf.image.resize(y['image'], (224,224)), y['label'])
            return y


        num_train_examples= info.splits['train'].num_examples
        BATCH_SIZE = 128

        ds_train = (
            ds_train
            .map(normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE)
            .take(num_train_examples)
            .cache()
            .shuffle(num_train_examples)
            .map(augment, num_parallel_calls=AUTOTUNE)
            .batch(BATCH_SIZE)
            .prefetch(AUTOTUNE)
        ) 

        ds_test = ds_test.map(
            normalize, )
        ds_test = ds_test.batch(BATCH_SIZE)
        ds_test = ds_test.cache()
        ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)



        attack_set = list(ds[1].map(load_image))[:256]

        attack_images = tf.convert_to_tensor([sample[0] for sample in attack_set])
        attack_labels = tf.convert_to_tensor([sample[1] for sample in attack_set])

        return ds_train, ds_test, attack_images, attack_labels
    
    return False

def compile_model(architecture, model, lr=1e-3):
    model.compile(
        loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) ,
        optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
        metrics=['accuracy'],
        experimental_run_tf_function=True
    )
    return True
    

def initialize_base_model(architecture, ds, index, experiment_name, lr=1e-3, save_weights=False):
    if architecture == 'ResNet':
        model = resnet.CustomResNetModel()
        model.compile(
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) ,
            optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
            metrics=['accuracy'],
            experimental_run_tf_function=True
        )
        model.fit(
            x=ds,
            epochs=1,
        )
    if architecture == 'VGG':
        model = VGG11()
        model.compile(
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) ,
            optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
            metrics=['accuracy'],
            experimental_run_tf_function=True
        )
        model.fit(
            x=ds,
            epochs=1,
        )
    if architecture == 'CNN' :
        model = cnn.CustomConvModel()
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=lr),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True) ,
                      metrics=['accuracy'],
                      experimental_run_tf_function=False
                     )
        model.fit(
            x=ds[0],
            y=ds[1],
            batch_size=64,
            epochs=1,
        )
    if architecture == 'MLP':
        return 'not implemented yet'
    
    return model



def train_model(architecture, ds_train, ds_test, model, to_convergence=True, epochs=5):
    if architecture=='CNN':
        if to_convergence == True:
            epochs=500
        callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
        hist = model.fit(
            x=ds_train[0],
            y=ds_train[1],
            batch_size=64,
            epochs=epochs,
            callbacks=[callback],
            validation_data=(ds_test[0], ds_test[1]),
        )
        return hist
    
    if architecture=='ResNet' or architecture == 'VGG':

        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
            patience=12,
            monitor='val_loss',
            factor=.3,
            min_lr=9e-5,
            min_delta=0
        )

        early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=15)
        checkpoint_filepath = '/tmp/checkpoint'
        model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
            filepath=checkpoint_filepath,
            save_weights_only=True,
            monitor='val_loss',
            mode='max',
            save_best_only=True)

        if to_convergence == True:
            epochs = 200

        hist = model.fit(
            x=ds_train,
            epochs=epochs,
            validation_data=ds_test,
            callbacks=[reduce_lr, early_stopping, model_checkpoint_callback],
        )
        return hist

def print_time(text=''):
    now = datetime.now()
    dt_string = now.strftime("%H:%M:%S")
    print(text, dt_string)

def pgd_attack(architecture, model_to_attack, attack_images, attack_labels):
    print_time(text='starting pgd')
    BATCHSIZE = 64
    fmodel = fb.models.TensorFlowModel(model_to_attack, bounds=(0,1))
    attack = fb.attacks.LinfProjectedGradientDescentAttack()
    if architecture == 'CNN' or architecture == 'MLP':
        adversarials, _, success = attack(
            fmodel,
            attack_images,
            attack_labels,
            epsilons=[x/255 for x in [2,4,8,16,32]]
        )
        del fmodel
        return [np.count_nonzero(eps_res)/len(y_to_attack) for eps_res in success]
    
    if architecture == 'ResNet'  or architecture == 'VGG':
        res = [[],[],[],[],[],[]]
        strengths = [0.125,0.25,0.5,1,2,4]
        for i in range(4):
            print_time(text=f'pgd batch {i}')
            adversarials, _, success = attack(
                fmodel,
                attack_images[i*BATCHSIZE:(i+1)*BATCHSIZE],
                attack_labels[i*BATCHSIZE:(i+1)*BATCHSIZE],
                epsilons=[x/255 for x in strengths]
            )
            for j in range(len(strengths)):
                res[j] = res[j]+list(success[j])
        print_time(text='ending pgd')
        del fmodel
        gc.collect()
        return [np.count_nonzero(eps_res)/len(attack_labels) for eps_res in res]
    return False


def cw2_attack(architecture, model_to_attack, attack_images, attack_labels, eps=[100]):
    print_time(text=f'starting cw')
    BATCHSIZE = 64
    fmodel = fb.models.TensorFlowModel(model_to_attack, bounds=(0,1))
    attack = fb.attacks.L2CarliniWagnerAttack(
        binary_search_steps = 9,
        steps= 5000,
        stepsize = 1,
        confidence = 0,
        initial_const = 100,
        abort_early = True,
    )
    if architecture == 'CNN' or architecture == 'MLP':
        adversarials, _, success = attack(
            fmodel,
            attack_images,
            attack_labels,
            epsilons=eps
        )
        dists = [tf.norm(x_to_attack[i]-adversarials[0][i]).numpy() for i in range(len(x_to_attack))]
        del fmodel
        
        return dists, success.numpy().tolist()
    if architecture == 'ResNet'  or architecture == 'VGG':
        success = []
        dists = [] 
        for i in range(4):
            print_time(text=f'cw batch {i}')
            attack_batch = attack_images[i*BATCHSIZE:(i+1)*BATCHSIZE]
            attack_batch_labels = attack_labels[i*BATCHSIZE:(i+1)*BATCHSIZE]
            adversarials, _, batch_success = attack(
                fmodel,
                attack_batch,
                attack_batch_labels,
                epsilons=eps
            )
            success = success + list(batch_success)
            dists = dists + [tf.norm(attack_batch[j]-adversarials[0][j]).numpy() for j in range(len(attack_batch))]
        print_time(text=f'ending cw')
        del fmodel
        gc.collect()
        return dists, success
    return False

def bb0_attack(architecture,model_to_attack, attack_images, attack_labels):
    print_time(text=f'starting bb0')
    fmodel = fb.models.TensorFlowModel(model_to_attack, bounds=(0,1))
    init_attack = fb.attacks.DatasetAttack()
    if architecture == 'CNN' or architecture == 'MLP':
        BATCHSIZE = 250
    if architecture == 'ResNet' or architecture == 'VGG':
        BATCHSIZE = 64
    
    batches = [
        (attack_images[:BATCHSIZE], attack_labels[:BATCHSIZE]), 
        (attack_images[BATCHSIZE:2*BATCHSIZE], attack_labels[BATCHSIZE:2*BATCHSIZE]),
        (attack_images[2*BATCHSIZE:3*BATCHSIZE], attack_labels[2*BATCHSIZE:3*BATCHSIZE]), 
        (attack_images[3*BATCHSIZE:4*BATCHSIZE], attack_labels[3*BATCHSIZE:4*BATCHSIZE])
    ]

    # create attack that picks adversarials from given dataset of samples
    #init_attack = fb.attacks.DatasetAttack()
    init_attack = fb.attacks.DatasetAttack()

    init_attack.feed(fmodel, batches[0][0])   # feed 1st batch of inputs
    init_attack.feed(fmodel, batches[1][0])   # feed 2nd batch of inputs
    init_attack.feed(fmodel, batches[2][0])   # feed 1st batch of inputs
    init_attack.feed(fmodel, batches[3][0])   # feed 2nd batch of inputs
    attack = fb.attacks.L0BrendelBethgeAttack(binary_search_steps=30, steps=500,lr_num_decay=30, lr=1e7, init_attack=init_attack)

    success = []
    dists = [] 
    for i in range(4):
        print_time(text=f'bb0 batch {i}')
        attack_batch = attack_images[i*BATCHSIZE:(i+1)*BATCHSIZE]
        attack_batch_labels = attack_labels[i*BATCHSIZE:(i+1)*BATCHSIZE]
        adversarials, _, batch_success = attack(
            fmodel,
            attack_batch,
            criterion=fb.criteria.Misclassification(attack_batch_labels),
            epsilons=[None]
        )
        
        success = success + list(batch_success)
        dists = dists + [np.count_nonzero(attack_batch[j]-adversarials[0][j]) for j in range(len(attack_batch))]
    print_time(text=f'ending bb0')
    del fmodel
    gc.collect()
    return dists, success



def convert_from_hwio_to_iohw(weights_hwio):
    return tf.transpose(weights_hwio, [2, 3, 0, 1])

def convert_from_iohw_to_hwio(weights_iohw):
    return tf.transpose(weights_iohw, [2, 3, 0, 1])

def convert_from_iohw_to_oihw(weights_iohw):
    return tf.transpose(weights_iohw, [1, 0, 2, 3])

def convert_from_oihw_to_iohw(weights_oihw):
    return tf.transpose(weights_oihw, [1, 0, 2, 3])

def convert_from_hwio_to_oihw(weights_hwio):
    return tf.transpose(weights_hwio, [3, 2, 0, 1])

def convert_from_oihw_to_hwio(weights_oihw):
    return tf.transpose(weights_oihw, [2, 3, 1, 0])



def get_zeros_ratio(model, layers_to_examine=None):
    if layers_to_examine==None:
        layers_to_examine = model.dense_masks+model.conv_masks
    weights = model.get_weights()
    all_weights = np.array([])
    for x in layers_to_examine:

        all_weights = np.append(all_weights, weights[x].flatten())
    return np.count_nonzero(all_weights)/len(all_weights), np.count_nonzero(all_weights), len(all_weights)

def plot_hist(hist):
    # summarize history for accuracy
    plt.plot(hist.history['accuracy'])
    plt.plot(hist.history['val_accuracy'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for loss
    plt.plot(hist.history['loss'])
    plt.plot(hist.history['val_loss'])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    # summarize history for lr
    plt.plot(hist.history['lr'])
    plt.title('model lr')
    plt.ylabel('lr')
    plt.xlabel('epoch')
    #plt.legend(['train', 'test'], loc='upper left')
    plt.show()