In [1]:
import tensorflow as tf

from tensorflow.keras.optimizers.experimental import AdamW
from tensorflow import keras
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten, Conv2D, Dropout, BatchNormalization, Activation

import numpy as np
import pandas as pd
import logging
tf.get_logger().setLevel(logging.ERROR)

# windows tensorflow only recognizes CUDA GPU at version 2.10.0
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Training

In [2]:
# Load training and test datasets
train_set = pd.read_csv('fashion-mnist_train.csv')
train_images = train_set.drop('label', axis=1).values.reshape(-1, 28, 28)
train_labels = train_set['label'].values

test_set = pd.read_csv('fashion-mnist_test.csv')
test_images = test_set.drop('label', axis=1).values.reshape(-1, 28, 28)
test_labels = test_set['label'].values

In [3]:
print(train_images.shape)
print(test_images.shape)

(60000, 28, 28)
(10000, 28, 28)


In [4]:
# Convert labels to one-hot vectors
train_labels = to_categorical(train_labels, num_classes=10)
test_labels = to_categorical(test_labels, num_classes=10)

In [5]:
# Training parameters
POCHS = 30
BATCH_SIZE = 128
alpha = 0.01
print('epochs = {}, batch size = {}, learning rate = {}'.format(EPOCHS, BATCH_SIZE, alpha))

epochs = 30, batch size = 128, learning rate = 0.01


In [6]:
# Model architecture
model = Sequential()
model.add(BatchNormalization(input_shape=(28,28,1)))
model.add(Conv2D(32, 3, strides=2, padding='same',
                 input_shape=(28,28,1),
                 kernel_initializer='he_normal',
                 bias_initializer='zeros'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Conv2D(16, 2, strides=1, padding='same',
                 kernel_initializer='he_normal',
                 bias_initializer='zeros'))
model.add(BatchNormalization())
model.add(Activation('relu'))
model.add(Dropout(0.2))
model.add(Flatten())
model.add(Dense(10, kernel_initializer='glorot_uniform',
                bias_initializer='zeros'))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer = AdamW(), metrics =['accuracy'])

In [7]:
# Start training
%%time
history = model.fit(train_images, train_labels, validation_data=(test_images, test_labels),
                    epochs=EPOCHS, batch_size=BATCH_SIZE, verbose=2, shuffle=True)

Epoch 1/30
469/469 - 8s - loss: 0.5389 - accuracy: 0.8077 - val_loss: 0.3761 - val_accuracy: 0.8673 - 8s/epoch - 18ms/step
Epoch 2/30
469/469 - 3s - loss: 0.3771 - accuracy: 0.8646 - val_loss: 0.3241 - val_accuracy: 0.8855 - 3s/epoch - 5ms/step
Epoch 3/30
469/469 - 2s - loss: 0.3411 - accuracy: 0.8760 - val_loss: 0.2954 - val_accuracy: 0.8968 - 2s/epoch - 5ms/step
Epoch 4/30
469/469 - 2s - loss: 0.3170 - accuracy: 0.8855 - val_loss: 0.2881 - val_accuracy: 0.8989 - 2s/epoch - 5ms/step
Epoch 5/30
469/469 - 2s - loss: 0.3005 - accuracy: 0.8910 - val_loss: 0.2773 - val_accuracy: 0.9004 - 2s/epoch - 5ms/step
Epoch 6/30
469/469 - 3s - loss: 0.2849 - accuracy: 0.8974 - val_loss: 0.2782 - val_accuracy: 0.8987 - 3s/epoch - 5ms/step
Epoch 7/30
469/469 - 2s - loss: 0.2760 - accuracy: 0.8997 - val_loss: 0.2625 - val_accuracy: 0.9059 - 2s/epoch - 5ms/step
Epoch 8/30
469/469 - 3s - loss: 0.2689 - accuracy: 0.9022 - val_loss: 0.2710 - val_accuracy: 0.9032 - 3s/epoch - 6ms/step
Epoch 9/30
469/469 - 3s

In [8]:
# Save model as savedmodel file
model.save('fashion_mnist_model')



In [9]:
# Reload model
load_model = keras.models.load_model('fashion_mnist_model')
float_model_result = load_model.predict(test_images).argmax(axis=1)
float_model_result.shape



(10000,)

In [10]:
load_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization (BatchN  (None, 28, 28, 1)        4         
 ormalization)                                                   
                                                                 
 conv2d (Conv2D)             (None, 14, 14, 32)        320       
                                                                 
 batch_normalization_1 (Batc  (None, 14, 14, 32)       128       
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 14, 14, 32)        0         
                                                                 
 dropout (Dropout)           (None, 14, 14, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 16)        2

### Post-Training Quantization

Naive quantization on all model parameters.

In [11]:
def representative_dataset():
    dataset = tf.data.Dataset.from_tensor_slices(train_images.reshape(-1,28,28,1)).batch(1).take(100)
    for input_value in dataset:
        yield [np.float32(input_value)]

In [12]:
# Quantize with representative int8 range
converter = tf.lite.TFLiteConverter.from_saved_model('fashion_mnist_model')
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()

In [13]:
# Save tflite model
with open('fashion_mnist_tflite_model', 'wb') as f:
  f.write(tflite_model)

In [14]:
# Evaluate with tflite interpreter
interpreter = tf.lite.Interpreter('fashion_mnist_tflite_model')
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()[0]
output_details = interpreter.get_output_details()[0]

input_shape = input_details['shape']
scale, zero_point = input_details["quantization"]
int_model_result = np.array([])
for image in test_images:
    input_data = image / scale + zero_point
    input_data = np.int8(input_data.reshape(input_shape))

    interpreter.set_tensor(input_details['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details['index'])[0]
    int_model_result = np.append(int_model_result, output_data.argmax())
int_model_result.shape

(10000,)

### Quantize-Aware Training

Quantize-aware finetuning on dense layers only.

In [15]:
import tensorflow_model_optimization.quantization.keras as tfmot
from keras.models import clone_model

In [16]:
# Mark dense layers to be quantized during training
def apply_dense_quantizer(layer):
    if isinstance(layer, Dense):
        return tfmot.quantize_annotate_layer(layer)
    return layer

In [17]:
# Apply quantize blocks to marked layers
annotated_model = clone_model(load_model, clone_function=apply_dense_quantizer)
quant_aware_model = tfmot.quantize_apply(annotated_model)
quant_aware_model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 batch_normalization (BatchN  (None, 28, 28, 1)        4         
 ormalization)                                                   
                                                                 
 conv2d (Conv2D)             (None, 14, 14, 32)        320       
                                                                 
 batch_normalization_1 (Batc  (None, 14, 14, 32)       128       
 hNormalization)                                                 
                                                                 
 activation (Activation)     (None, 14, 14, 32)        0         
                                                                 
 dropout (Dropout)           (None, 14, 14, 32)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 14, 14, 16)        2

In [18]:
# Start quantize-aware training, which finetunes model with controlled precision points
%%time
quant_aware_model.compile(loss='categorical_crossentropy', optimizer = AdamW(), metrics =['accuracy'])
history = quant_aware_model.fit(train_images, train_labels, validation_data=(test_images, test_labels),
                                epochs=20, batch_size=128, verbose=2, shuffle=True)

Epoch 1/20
469/469 - 6s - loss: 0.2031 - accuracy: 0.9257 - val_loss: 0.2350 - val_accuracy: 0.9164 - 6s/epoch - 12ms/step
Epoch 2/20
469/469 - 3s - loss: 0.2044 - accuracy: 0.9236 - val_loss: 0.2216 - val_accuracy: 0.9228 - 3s/epoch - 6ms/step
Epoch 3/20
469/469 - 3s - loss: 0.2002 - accuracy: 0.9270 - val_loss: 0.2190 - val_accuracy: 0.9217 - 3s/epoch - 6ms/step
Epoch 4/20
469/469 - 3s - loss: 0.1979 - accuracy: 0.9266 - val_loss: 0.2195 - val_accuracy: 0.9227 - 3s/epoch - 6ms/step
Epoch 5/20
469/469 - 3s - loss: 0.1977 - accuracy: 0.9276 - val_loss: 0.2194 - val_accuracy: 0.9204 - 3s/epoch - 7ms/step
Epoch 6/20
469/469 - 3s - loss: 0.1948 - accuracy: 0.9286 - val_loss: 0.2290 - val_accuracy: 0.9177 - 3s/epoch - 6ms/step
Epoch 7/20
469/469 - 3s - loss: 0.1943 - accuracy: 0.9279 - val_loss: 0.2265 - val_accuracy: 0.9179 - 3s/epoch - 6ms/step
Epoch 8/20
469/469 - 3s - loss: 0.1951 - accuracy: 0.9268 - val_loss: 0.2233 - val_accuracy: 0.9220 - 3s/epoch - 6ms/step
Epoch 9/20
469/469 - 3s

In [24]:
# Apply quantization now that quantize blocks are ready
converter = tf.lite.TFLiteConverter.from_keras_model(quant_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
converter.inference_input_type = tf.int8
converter.inference_output_type = tf.int8
tflite_model = converter.convert()



In [25]:
with open('fashion_mnist_tflite_qat_model', 'wb') as f:
  f.write(tflite_model)

In [27]:
interpreter = tf.lite.Interpreter('fashion_mnist_tflite_qat_model')
interpreter.allocate_tensors()

input_shape = input_details['shape']
qat_model_result = np.array([])
for image in test_images:
    input_data = np.int8(input_data.reshape(input_shape))
    interpreter.set_tensor(input_details['index'], input_data)
    interpreter.invoke()
    output_data = interpreter.get_tensor(output_details['index'])[0]
    qat_model_result = np.append(int_model_result, output_data.argmax())
qat_model_result.shape

(10001,)

In [32]:
# Accuracy comparisons
labels = test_labels.argmax(axis=1)
print("Float32:", (np.sum(labels==float_model_result)*100)/len(labels))
print("Naive Post-Training Quantize Int8:", (np.sum(labels==int_model_result)*100)/len(labels))
print("Quantize Aware Finetuning Int8:", (np.sum(labels==qat_model_result[:-1])*100)/len(labels))

Float32: 91.93
Naive Post-Training Quantize Int8: 91.9
Quantize Aware Finetuning Int8: 91.9
