# Model Compression API

This notebook showcases the API that performs Model Compression via structured channel pruning for both TensorFlow and PyTorch models. Structured pruning performs a one-shot pruning and returns the model with a user defined sparcity. Re-training will be performed by the user. For quick reference, developers can read Pruning Filters for Efficient ConvNets --> https://arxiv.org/abs/1608.08710

## Create a TensorFlow Model

In [None]:
# AlexNet
# source https://towardsdatascience.com/implementing-alexnet-cnn-architecture-using-tensorflow-2-0-and-keras-2113e090ad98
# baseline cnn model for AlexNet
from sklearn.model_selection import KFold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import SGD
from tensorflow.python.framework import type_spec as type_spec_module
import os
import numpy as np
import time

from ode.tf_pruner import TfPruner
from ode.tf_quantizer import TfQuantizer




## Load, Train, and Test Dataset

We define a load_dataset() method, as well as a pre-processor in prep_pixels(). 
We limit the dataset to only 1000 elements to keep training short and the following statements can be omitted
from code below.

    trainX = trainX[:1000]
    trainY = trainY[:1000]
    testX = testX[:1000]
    testY = testY[:1000]

In [None]:
def load_dataset():
    # load dataset
    (trainX, trainY), (testX, testY) = mnist.load_data()
    # reshape dataset to have a single channel
    trainX = trainX.reshape((trainX.shape[0], 28, 28, 1))
    testX = testX.reshape((testX.shape[0], 28, 28, 1))
    # one hot encode target values
    trainY = to_categorical(trainY)
    testY = to_categorical(testY)

    trainX = trainX[:1000]
    trainY = trainY[:1000]
    testX = testX[:1000]
    testY = testY[:1000]

    print(f'trainX.shape: {trainX.shape}')
    return trainX, trainY, testX, testY

## Defines

In [None]:
# Scale pixels
def prep_pixels(train, test):
    # convert from integers to floats
    train_norm = train.astype('float32')
    test_norm = test.astype('float32')
    # normalize to range 0-1
    train_norm = train_norm / 255.0
    test_norm = test_norm / 255.0
    # return normalized images
    return train_norm, test_norm


def compile_model(model):
    opt = SGD(lr=0.01, momentum=0.9)
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])


def define_model():
    """Model with activation layers"""
    model = keras.Sequential() #.to(device=device)
    model.add(keras.layers.Conv2D(32, (3, 3), kernel_initializer='he_uniform', input_shape=(28, 28, 1)))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.MaxPooling2D((2, 2)))
    model.add(keras.layers.Conv2D(64, (3, 3), kernel_initializer='he_uniform'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.Conv2D(32, (3, 3), kernel_initializer='he_uniform'))
    model.add(keras.layers.BatchNormalization())
    model.add(keras.layers.Activation('relu'))
    model.add(keras.layers.MaxPooling2D((2, 2)))
    model.add(keras.layers.Flatten())
    model.add(keras.layers.Dense(100, activation='relu', kernel_initializer='he_uniform'))
    model.add(keras.layers.Dense(10, activation='softmax'))
    
     # compile model
    compile_model(model)

    return model



We now proceed to initialize the timer, dataset and model.

In [None]:
current_milli_time = lambda: int(round(time.time() * 1000))

In [None]:
# Prepares cross validation
kfold = KFold(5, shuffle=True, random_state=1)

train_ds_X, train_ds_Y, test_ds_X, test_ds_Y = load_dataset()
train_ds_X, test_ds_X = prep_pixels(train_ds_X, test_ds_X)

# Define model
model = define_model()

We now can train the model as follows:

In [None]:
# Enumerate Splits
for train_ix, test_ix in kfold.split(train_ds_X):
    
    # select rows for train and test
    trainX, trainY, testX, testY = train_ds_X[train_ix], train_ds_Y[train_ix], test_ds_X[test_ix], test_ds_Y[test_ix]
    # fit model
    history = model.fit(trainX, trainY, epochs=10, batch_size=32, validation_data=(testX, testY), verbose=0)
    # evaluate model

    _, acc = model.evaluate(testX, testY, verbose=0)

    print('> %.3f' % (acc * 100.0))

    latest_trainX = trainX
    latest_trainY = trainY
    latest_testX = testX
    latest_testY = testY



We keep a subset of the dataset in latest_* for evaluation.

Next, we time the prediction for comparison and print the original model stats as well as summary.

In [None]:
t2 = 0.0
for i in range(0, len(latest_testX)):
    
    img = latest_testX[i]
    img = (np.expand_dims(img,0))

    t1 = current_milli_time()
    prediction = model.predict(img)
    t2 += current_milli_time() - t1

t2 /= float(len(latest_testX))

print('> Original Model Accuracy: %.3f' % (acc * 100.0))
print('> Original Model Inference Time: {}'.format(t2))

model.summary()


Now, we will save the full model as follows:

In [None]:
model.save('mnist_base.h5')

Now please following the next stage of tutorial as described in the learning path.