In [83]:
from __future__ import absolute_import, division, print_function, unicode_literals

import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import pandas as pd
from tensorflow.keras import layers
from tqdm import tqdm
import matplotlib.pyplot as plt
import pathlib
import os

AUTOTUNE = tf.data.experimental.AUTOTUNE
tf.keras.backend.clear_session()  # For easy reset of notebook state.

tf.__version__
#tf.executing_eagerly()

'2.1.0'

# load data

In [84]:
ds = tfds.load(name="imagenette", with_info=True, split=["train[:1000]","validation[:100]"])
ds_train=ds[0][0]
ds_test=ds[0][1]
assert isinstance(ds_train, tf.data.Dataset)

def normalize(x):
    y = {'image': tf.image.convert_image_dtype(x['image'], tf.float32), 'label': x['label']}
    y = (tf.image.resize(y['image'], (224,224)), y['label'])
    return y
    
ds_train = ds_train.map(lambda x: normalize(x))
ds_train = ds_train.cache()
ds_train = ds_train.shuffle(100)
ds_train = ds_train.batch(128)
ds_train = ds_train.prefetch(tf.data.experimental.AUTOTUNE)

ds_test = ds_test.map(
    normalize, num_parallel_calls=tf.data.experimental.AUTOTUNE)
ds_test = ds_test.batch(128)
ds_test = ds_test.cache()
ds_test = ds_test.prefetch(tf.data.experimental.AUTOTUNE)

# define model

In [85]:
weights = {

    'weights_conv_1': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 3, 64])),
    'weights_conv_2': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 64, 128])),
    'weights_conv_3': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 128, 256])),
    'weights_conv_4': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 256, 256])),
    'weights_conv_5': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 256, 512])),
    'weights_conv_6': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 512, 512])),
    'weights_conv_7': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 512, 512])),
    'weights_conv_8': tf.Variable(tf.initializers.GlorotUniform()([3, 3, 512, 512])),
    'weights_dense_1': tf.Variable(tf.initializers.GlorotUniform()([7*7*512, 4096])),
    'weights_dense_2': tf.Variable(tf.initializers.GlorotUniform()([4096, 1024])),
    'weights_dense_3': tf.Variable(tf.initializers.GlorotUniform()([1024, 10])),
}


masks = {
    
    'mask_conv_1': tf.Variable(tf.ones([3, 3, 3, 64]), trainable=False),
    'mask_conv_2': tf.Variable(tf.ones([3, 3, 64, 128]), trainable=False),
    'mask_conv_3': tf.Variable(tf.ones([3, 3, 128, 256]), trainable=False),
    'mask_conv_4': tf.Variable(tf.ones([3, 3, 256, 256]), trainable=False),
    'mask_conv_5': tf.Variable(tf.ones([3, 3, 256, 512]), trainable=False),
    'mask_conv_6': tf.Variable(tf.ones([3, 3, 512, 512]), trainable=False),
    'mask_conv_7': tf.Variable(tf.ones([3, 3, 512, 512]), trainable=False),
    'mask_conv_8': tf.Variable(tf.ones([3, 3, 512, 512]), trainable=False),
    # 224x224 input --> 5 maxpool layers --> 
    'mask_dense_1': tf.Variable(tf.ones([7*7*512, 4096]), trainable=False),
    'mask_dense_2': tf.Variable(tf.ones([4096, 1024]), trainable=False),
    'mask_dense_3': tf.Variable(tf.ones([1024, 10]), trainable=False),
}

biases = {
    #output depth
    'bias_conv_1': tf.Variable(tf.zeros([64])),
    'bias_conv_2': tf.Variable(tf.zeros([128])),
    'bias_conv_3': tf.Variable(tf.zeros([256])),
    'bias_conv_4': tf.Variable(tf.zeros([256])),
    'bias_conv_5': tf.Variable(tf.zeros([512])),
    'bias_conv_6': tf.Variable(tf.zeros([512])),
    'bias_conv_7': tf.Variable(tf.zeros([512])),
    'bias_conv_8': tf.Variable(tf.zeros([512])),
    
    'bias_dense_1': tf.Variable(tf.zeros([4096])),
    'bias_dense_2': tf.Variable(tf.zeros([1024])),
    'bias_dense_3': tf.Variable(tf.zeros([10])),

}


In [92]:
#conv2D with bias and relu activation

class CustomConvLayer(layers.Layer):

    def __init__(self, weights, mask, biases, strides, padding='SAME'):
        
        super(CustomConvLayer, self).__init__()
        self.w = weights
        self.m = mask
        self.b = biases
        self.s = strides
        self.p = padding
        #self.bn = layers.BatchNormalization()

        
    def call(self, inputs):
        x = tf.nn.conv2d(inputs, tf.multiply(self.w, self.m), strides=[1, self.s, self.s, 1], padding=self.p)
        x = tf.nn.bias_add(x, self.b)
        #x = self.bn(x)
        return tf.nn.relu(x)
        

#Average Pooling Layer
class CustomPoolLayer(layers.Layer):
    
    def __init__(self, k=2, padding='SAME'):#padding='VALID'):
        super(CustomPoolLayer, self).__init__()
        self.k = k
        self.p = padding
    
    def call(self, inputs):
        return tf.nn.max_pool2d(inputs, ksize=[1, self.k, self.k,1], strides=[1, self.k, self.k, 1], padding=self.p)
    
#Dense Layer with Bias
class CustomDenseLayer(layers.Layer):
    
    def __init__(self, weights, mask, bias, activation = 'relu'):
        super(CustomDenseLayer, self).__init__()
        self.w = weights
        self.b = bias
        self.a = activation
        self.m = mask
        
    def call(self, inputs):
        #print('dense w',self.w)
        #print('dense i',inputs)
        x = tf.matmul(inputs, tf.multiply(self.w, self.m))
        #print('dense x',x)
        x = tf.nn.bias_add(x, self.b)
        if self.a == 'relu':
            return tf.nn.relu(x)
        if self.a == 'softmax':
            return tf.nn.softmax(x)
        if self.a == 'sigmoid':
            return tf.nn.sigmoid(x)

In [93]:
class VGG11(tf.keras.Model):
    def __init__(self):
        super(VGG11, self).__init__()
        self.conv1 = CustomConvLayer(weights['weights_conv_1'], masks['mask_conv_1'], biases['bias_conv_1'], 1)
        self.maxpool1 = CustomPoolLayer(k=2)
        self.conv2 = CustomConvLayer(weights['weights_conv_2'], masks['mask_conv_2'], biases['bias_conv_2'], 1)
        self.maxpool2 = CustomPoolLayer(k=2)
        self.conv3 = CustomConvLayer(weights['weights_conv_3'], masks['mask_conv_3'], biases['bias_conv_3'], 1)
        self.conv4 = CustomConvLayer(weights['weights_conv_4'], masks['mask_conv_4'], biases['bias_conv_4'], 1)
        self.maxpool3 = CustomPoolLayer(k=2)
        self.conv5 = CustomConvLayer(weights['weights_conv_5'], masks['mask_conv_5'], biases['bias_conv_5'], 1)
        self.conv6 = CustomConvLayer(weights['weights_conv_6'], masks['mask_conv_6'], biases['bias_conv_6'], 1)
        self.maxpool4 = CustomPoolLayer(k=2)
        self.conv7 = CustomConvLayer(weights['weights_conv_7'], masks['mask_conv_7'], biases['bias_conv_7'], 1)
        self.conv8 = CustomConvLayer(weights['weights_conv_8'], masks['mask_conv_8'], biases['bias_conv_8'], 1)
        #self.conv9 = CustomConvLayer(weights['weights_conv_9'], masks['mask_conv_9'], biases['bias_conv_9'], 1)
        #self.conv10 = CustomConvLayer(weights['weights_conv_10'], masks['mask_conv_10'], biases['bias_conv_10'], 1)
        self.maxpool5 = CustomPoolLayer(k=2)
        #self.conv11 = CustomConvLayer(weights['weights_conv_11'], masks['mask_conv_11'], biases['bias_conv_11'], 1)
        #self.conv12 = CustomConvLayer(weights['weights_conv_12'], masks['mask_conv_12'], biases['bias_conv_12'], 1)
        #self.conv13 = CustomConvLayer(weights['weights_conv_13'], masks['mask_conv_13'], biases['bias_conv_13'], 1)
        #self.maxpool5 = CustomPoolLayer(k=2)
        self.dense1 = CustomDenseLayer(weights['weights_dense_1'], masks['mask_dense_1'], biases['bias_dense_1'], 'relu')
        self.dense2 = CustomDenseLayer(weights['weights_dense_2'], masks['mask_dense_2'], biases['bias_dense_2'], 'relu')
        self.dense3 = CustomDenseLayer(weights['weights_dense_3'], masks['mask_dense_3'], biases['bias_dense_3'], 'softmax')
        
    def call(self, inputs, training=False):
        #x = tf.reshape(inputs, shape=[-1, 28, 28, 1])

        x = self.conv1(inputs)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.maxpool2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.maxpool3(x)
        x = self.conv5(x)
        x = self.conv6(x)
        x = self.maxpool4(x)
        x = self.conv7(x)
        x = self.conv8(x)
        x = self.maxpool5(x)
        x = layers.Flatten()(x)
        x =  self.dense1(x)
        x =  self.dense2(x)
        x =  self.dense3(x)
        return x
        

In [94]:
model = VGG11()
sgd = tf.keras.optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(optimizer=sgd,
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'],
              experimental_run_tf_function=False
             )

# train model

In [None]:
model.fit(
    x=ds_train,
    #steps_per_epoch=1,
    epochs=7,
    validation_data=ds_test,
)

Epoch 1/7


# define helper functions for pruning

In [74]:
def convert_from_hwio_to_iohw(weights_nchw):
    return tf.transpose(weights_nchw, [2, 3, 0, 1])



def convert_from_iohw_to_hwio(weights_nhwc):
    return tf.transpose(weights_nhwc, [2, 3, 0, 1])

In [77]:
def prune_weights(model, pruning_ratio):
    layer_to_prune = [24, 27, 30]
    weights = model.get_weights()
    weights_to_prune = model.get_weights()
    for index, weight in enumerate(weights):
        if index in layer_to_prune:
            #print(weight.shape)
            flat_weights = weight.flatten()
            flat_weights_to_prune = weights_to_prune[index+2].flatten()
            #print (flat_weights_to_prune.shape, flat_weights.shape)
            flat_weights_df = pd.DataFrame(flat_weights)
            flat_weights_to_prune_df = pd.DataFrame(flat_weights_to_prune)
            no_of_weights_to_prune = int(len(flat_weights)*pruning_ratio)
            #print(no_of_weights_to_prune)
            indices_to_delete = flat_weights_df.abs().values.argsort(0)[:no_of_weights_to_prune]
            for idx_to_delete in indices_to_delete:
                flat_weights_to_prune[idx_to_delete] = 0
            dims = weights_to_prune[index+2].shape
            weights_reshaped = flat_weights_to_prune.reshape(dims)
            weights_to_prune[index+2] = weights_reshaped
    #print(weights_to_prune)
    return weights_to_prune

In [78]:
def prune_conv_layers(pruning_ratio):
    layer_to_prune = [0, 3, 6, 9, 12, 15, 28, 21]
    pruned_weights = model.get_weights()
    
    for layer in layer_to_prune:
        converted_weights = convert_from_hwio_to_iohw(model.get_weights()[layer])
        converted_mask = convert_from_hwio_to_iohw(model.get_weights()[layer + 2]).numpy()
        for input_index, input_layer in enumerate(converted_weights):

            for kernel_index, kernel in enumerate(input_layer):
                dims = kernel.shape
                flat_weights = kernel.numpy().flatten()
                flat_masks = converted_mask[input_index][kernel_index].flatten()
                flat_weights_df = pd.DataFrame(flat_weights)
                flat_mask_df = pd.DataFrame(flat_masks)
                no_of_weights_to_prune = int(len(flat_weights)*pruning_ratio)
                #print(no_of_weights_to_prune)
                indices_to_delete = flat_weights_df.abs().values.argsort(0)[:no_of_weights_to_prune]
                for idx_to_delete in indices_to_delete:
                    flat_masks[idx_to_delete] = 0

                converted_mask[input_index][kernel_index] = flat_masks.reshape(dims)
        back_converted_mask = convert_from_iohw_to_hwio(converted_mask)
        pruned_weights[layer+2] = back_converted_mask
    
    return pruned_weights

# prune model

In [79]:
pruning_ratios = [0.0, .5, 0.8, 0.9]
pre_pruning_weight_archive = []
post_pruning_weight_archive = []
post_fine_tune_weight_archive = []
pre_fine_tune_results = []
post_fine_tune_results = []

for pruning_ratio in tqdm(pruning_ratios):
    pre_pruning_weight_archive.append(model.get_weights())
    pruned_weights = prune_conv_layers(pruning_ratio)
    model.set_weights(pruned_weights)
    pruned_weights = prune_weights(model, pruning_ratio)
    model.set_weights(pruned_weights)
    post_pruning_weight_archive.append(model.get_weights())
    pre_fine_tune_results.append(model.evaluate(x_test, y_test, verbose=0))
    model.fit(
        x=ds_train,
        #steps_per_epoch=1,
        epochs=1,
        validation_data=ds_test,
    )
    post_fine_tune_results.append(model.evaluate(x_test, y_test, verbose=0))
    post_fine_tune_weight_archive.append(model.get_weights())

  0%|          | 0/4 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [None]:
def create_data_set(PATH):
    data_dir = pathlib.Path(PATH)
    CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if (item.name != "LICENSE.txt" and item.name != '.DS_Store')])
    image_count = len(list(data_dir.glob('*/*.JPEG')))
    IMG_HEIGHT = 224
dd    IMG_WIDTH = 224
    
    def get_label(file_path):
        # convert the path to a list of path components
        
        parts = tf.strings.split(file_path, os.path.sep)
        # The second to last is the class-directory
        return parts[-2] == CLASS_NAMES

    def decode_img(img):
        # convert the compressed string to a 3D uint8 tensor
        img = tf.image.decode_jpeg(img, channels=3)
        # Use `convert_image_dtype` to convert to floats in the [0,1] range.
        img = tf.image.convert_image_dtype(img, tf.float32)
        # resize the image to the desired size.
        img = tf.image.resize(img, [IMG_WIDTH, IMG_HEIGHT])
        #print(img)
        return img

    def process_path(file_path):
        label = get_label(file_path)
        # load the raw data from the file as a string
        img = tf.io.read_file(file_path)
        #print(img)
        img = decode_img(img)
        #print(type(img))
        return img, label
    
    list_ds = tf.data.Dataset.list_files(str(data_dir/'*/*.JPEG'))
    print(list(list_ds))
    labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
    return labeled_ds
    

In [None]:
TRAIN_PATH = '/Users/florianmerkle/tensorflow_datasets/downloads/extracted/TAR_GZ.s3_fast-ai-imageclas_imagenetteG9ZQvBaITKiOTw9TfthWmx-Neuhl0366js3YfZzZ3Po.tgz/imagenette/train'
train_data = create_data_set(TRAIN_PATH)
VAL_PATH = '/Users/florianmerkle/tensorflow_datasets/downloads/extracted/TAR_GZ.s3_fast-ai-imageclas_imagenetteG9ZQvBaITKiOTw9TfthWmx-Neuhl0366js3YfZzZ3Po.tgz/imagenette/val/'
val_data = create_data_set(VAL_PATH)

In [None]:
def prepare_for_training(ds, cache=False, shuffle_buffer_size=1000):
  # This is a small dataset, only load it once, and keep it in memory.
  # use `.cache(filename)` to cache preprocessing work for datasets that don't
  # fit in memory.
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    #ds = ds.shuffle(buffer_size=shuffle_buffer_size)

    # Repeat forever
    ds = ds.repeat(1)

    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

In [None]:
BATCH_SIZE = 64
train_ds = prepare_for_training(train_data)
val_ds = prepare_for_training(val_data)

In [None]:
x_train = list(val_ds)[0][0]
y_train = list(val_ds)[0][1]
#x_train = tf.expand_dims(x_train, axis=0)
#y_train = tf.expand_dims(y_train, axis=0)
print(y_train[0].shape)

In [None]:
model.fit(x=x_train,
          y=labels,
          epochs=5,
          batch_size=4
         )

In [None]:
model.run_eagerly = True

In [None]:
y_train

In [None]:
#model.predict_on_batch(val_ds)
labels = np.asarray([np.argmax(x)for x in y_train])

In [None]:
def show_batch(image_batch, label_batch):
    plt.figure(figsize=(10,10))
    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        #print()
        plt.title(CLASS_NAMES[np.argmax(label_batch[n])])
        plt.axis('off')
        

In [None]:
image_batch, label_batch = next(iter(train_ds))

#show_batch(x_train[:25], y_train[:25])
show_batch(image_batch, label_batch)

In [None]:
CLASS_NAMES

In [None]:
data_dir = pathlib.Path(TRAIN_PATH)
CLASS_NAMES = np.array([item.name for item in data_dir.glob('*') if (item.name != "LICENSE.txt" and item.name != '.DS_Store')])
image_count = len(list(data_dir.glob('**/*.JPEG')))

In [None]:
STEPS_PER_EPOCH = np.ceil(image_count/BATCH_SIZE)
STEPS_PER_EPOCH

In [None]:
image_count

In [None]:
IMG_PATH = '/Users/florianmerkle/tensorflow_datasets/downloads/extracted/TAR_GZ.s3_fast-ai-imageclas_imagenetteG9ZQvBaITKiOTw9TfthWmx-Neuhl0366js3YfZzZ3Po.tgz/imagenette/train/chain_saw/n03000684_1000.JPEG'
def predict():
    image = tf.io.read_file(IMG_PATH)
    image = tf.image.decode_jpeg(image, channels=3)
    print(image.shape)
    image = tf.expand_dims(image, axis=0)
    print(image.shape)
    image = tf.image.resize(image, [224, 224])
    print(image.shape)
    print(model.predict(image))

In [None]:
vgg_keras = tf.keras.applications.VGG16()

vgg_keras.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=0.01),
              loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True) ,
              metrics=['accuracy'],
              experimental_run_tf_function=False
             )

In [None]:
preds = vgg_keras.predict(first_b[0][:10])

In [None]:
model.summary()