In [1]:
import tempfile
import os
import tensorflow as tf
import numpy as np
from tensorflow import keras
import time

'''
簡單CNN模型
'''

mnist = keras.datasets.mnist
(train_images, train_labels), (test_images, test_labels) = mnist.load_data()
train_images = train_images / 255.0
test_images = test_images / 255.0
model = keras.Sequential([
    keras.layers.InputLayer(input_shape=(28,28)),
    keras.layers.Reshape(target_shape=(28,28,1)),
    keras.layers.Conv2D(filters=12, kernel_size=(3,3), activation='relu'),
    keras.layers.MaxPooling2D(pool_size=(2,2)),
    keras.layers.Flatten(),
    keras.layers.Dense(10)
])
model.compile(optimizer='adam',
                loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                metrics=['accuracy'])

start_time = time.time()

model.fit(train_images, train_labels, epochs=4, validation_split=0.1)

fit_time = time.time() - start_time
print(f"Training time: {fit_time:.10f} seconds")

start_time = time.time()

_, baseline_model_accuracy = model.evaluate(test_images, test_labels, verbose=0)

fit_time = time.time() - start_time
print(f"Testing time: {fit_time:.10f} seconds")

print('accuracy:', baseline_model_accuracy)
_, keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model, keras_file, include_optimizer=False)
print('save as', keras_file)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4
Training time: 74.6676349640 seconds
Testing time: 1.4653494358 seconds
accuracy: 0.9763000011444092
save as C:\Users\minku\AppData\Local\Temp\tmpuj1bzjju.h5


  tf.keras.models.save_model(model, keras_file, include_optimizer=False)


In [2]:
'''
使用剪枝技術微調訓練好的模型
'''
import tensorflow_model_optimization as tfmot

prune_low_magnitude = tfmot.sparsity.keras.prune_low_magnitude
batch_size = 128
epochs = 2
validation_split = 0.1
num_images = train_images.shape[0] * (1 - validation_split)
end_step = np.ceil(num_images / batch_size).astype(np.int32) * epochs
pruning_params = {
    'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.8, begin_step=0, end_step=end_step)
}
model_for_pruning = prune_low_magnitude(model, **pruning_params)
model_for_pruning.compile(optimizer='adam', loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), metrics=['accuracy'])
callbacks = [
    tfmot.sparsity.keras.UpdatePruningStep(),
]

start_time = time.time()

model_for_pruning.fit(train_images, train_labels, batch_size=batch_size, epochs=epochs, validation_split=validation_split, callbacks=callbacks)

fit_time = time.time() - start_time
print(f"Training time: {fit_time:.10f} seconds")

start_time = time.time()

_, model_for_pruning_accuracy = model_for_pruning.evaluate(test_images, test_labels, verbose=0)

fit_time = time.time() - start_time
print(f"Testing time: {fit_time:.10f} seconds")

print('baseline accuracy:', baseline_model_accuracy) 
print('pruning accuracy:', model_for_pruning_accuracy)

Epoch 1/2
Epoch 2/2
Training time: 64.1299629211 seconds
Testing time: 1.4023017883 seconds
baseline accuracy: 0.9763000011444092
pruning accuracy: 0.9735999703407288


In [3]:
'''
使用剪枝技術將模型缩小3倍
'''

import os
import zipfile

def get_gzipped_model_size(file):
    _, zipped_file = tempfile.mkstemp('.zip')
    with zipfile.ZipFile(zipped_file, 'w', compression = zipfile.ZIP_DEFLATED) as f:
        f.write(file)
    return os.path.getsize(zipped_file)

model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
_, pruned_keras_file = tempfile.mkstemp('.h5')
tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)
print('Save pruned Keras model to:', pruned_keras_file)

converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
pruned_tflite_model = converter.convert()
_, pruned_tflite_file = tempfile.mkstemp('.tflite')

with open(pruned_tflite_file, 'wb') as f:
    f.write(pruned_tflite_model)
print('Save pruned TFLite model to:', pruned_tflite_file)

print("gzip baseline Keras model size: %.2f bytes" % (get_gzipped_model_size(keras_file))) #原始model
print("gzip pruned Keras model size: %.2f bytes" % (get_gzipped_model_size(pruned_keras_file))) #model after purning
print("gzip pruned TFlite model size: %.2f bytes" % (get_gzipped_model_size(pruned_tflite_file))) #TFlite type


Save pruned Keras model to: C:\Users\minku\AppData\Local\Temp\tmp9rif_ehq.h5


  tf.keras.models.save_model(model_for_export, pruned_keras_file, include_optimizer=False)


INFO:tensorflow:Assets written to: C:\Users\minku\AppData\Local\Temp\tmpo66tn70_\assets


INFO:tensorflow:Assets written to: C:\Users\minku\AppData\Local\Temp\tmpo66tn70_\assets


Save pruned TFLite model to: C:\Users\minku\AppData\Local\Temp\tmpvvp782kd.tflite
gzip baseline Keras model size: 78283.00 bytes
gzip pruned Keras model size: 25872.00 bytes
gzip pruned TFlite model size: 25015.00 bytes


In [4]:
'''
使用剪枝和量化將模型缩小 10 倍
'''
converter = tf.lite.TFLiteConverter.from_keras_model(model_for_export)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
quantized_and_pruned_tflite_model = converter.convert()
_, quantized_and_pruned_tflite_file = tempfile.mkstemp('.tflite')
with open(quantized_and_pruned_tflite_file, 'wb') as f:
    f.write(quantized_and_pruned_tflite_model)

print('quantized and pruned TFLite model save to:', quantized_and_pruned_tflite_file)
print("gzip baseline Keras model size: %.2f bytes" % (get_gzipped_model_size(keras_file)))
print("gzipped quantized and pruned TFlite model size: %.2f bytes" % (get_gzipped_model_size(quantized_and_pruned_tflite_file)))

INFO:tensorflow:Assets written to: C:\Users\minku\AppData\Local\Temp\tmpep4sfyhl\assets


INFO:tensorflow:Assets written to: C:\Users\minku\AppData\Local\Temp\tmpep4sfyhl\assets


quantized and pruned TFLite model save to: C:\Users\minku\AppData\Local\Temp\tmp939kvh5u.tflite
gzip baseline Keras model size: 78283.00 bytes
gzipped quantized and pruned TFlite model size: 8224.00 bytes


In [6]:
import numpy as np

def evaluate_model(interpreter):
    input_index = interpreter.get_input_details()[0]["index"]
    output_index = interpreter.get_output_details()[0]["index"]
    prediction_digits = []
    for i, test_image in enumerate(test_images):
        if i % 1000 == 0:
            print('Evaluated on {n} results so far.\n'.format(n=i))
        test_image = np.expand_dims(test_image, axis=0).astype(np.float32)
        interpreter.set_tensor(input_index, test_image)
        interpreter.invoke()
        output = interpreter.tensor(output_index)
        digit = np.argmax(output()[0])
        prediction_digits.append(digit)
    prediction_digits = np.array(prediction_digits)
    accuracy = (prediction_digits == test_labels).mean()
    return accuracy

interpreter = tf.lite.Interpreter(model_content=quantized_and_pruned_tflite_model)
interpreter.allocate_tensors()

start_time = time.time()

test_accuracy = evaluate_model(interpreter)

fit_time = time.time() - start_time
print(f"Testing time: {fit_time:.10f} seconds")


print('after pruning model accuracy:', model_for_pruning_accuracy)
print('after pruning and quantization model accuracy:', test_accuracy)

Evaluated on 0 results so far.

Evaluated on 1000 results so far.

Evaluated on 2000 results so far.

Evaluated on 3000 results so far.

Evaluated on 4000 results so far.

Evaluated on 5000 results so far.

Evaluated on 6000 results so far.

Evaluated on 7000 results so far.

Evaluated on 8000 results so far.

Evaluated on 9000 results so far.

Testing time: 2.6164758205 seconds
after pruning model accuracy: 0.9735999703407288
after pruning and quantization model accuracy: 0.9735
