### 0 使用GPU进行模型训练

In [1]:
import keras
from keras.datasets import mnist
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
# print('tensorflow version:',tf.__version__)  # 查看TensorFlow的版本
# print('===================================================')
# print('cuda available:',tf.test.is_built_with_cuda()) # 判断CUDA是否可用
# print('===================================================')
# print(tf.test.is_gpu_available())  # 查看cuda、TensorFlow_GPU和cudnn(选择下载，cuda对深度学习的补充)版本是否对应
# print('===================================================')
gpus = tf.config.experimental.list_physical_devices(device_type='GPU') # 查看可用GPU
# print(gpus)
import os
#选择使用某一块或多块GPU
#os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"  # =右边"0,1",代表使用标号为0,和1的GPU
os.environ["CUDA_VISIBLE_DEVICES"] = "0"  # =右边"0",代表使用标号为0的GPU
# 查看可用GPU的详细信息
from tensorflow.python.client import device_lib
# print(device_lib.list_local_devices())
#  这时候在运行相应的代码，就可以看到在GPU上运行了。可以通过任务管理器-性能处查看GPU使用率。

Using TensorFlow backend.


### 1 数据预处理

In [2]:

(train_images, train_labels), (test_images, test_labels) =mnist.load_data("mnist.npz")
print('训练集',train_images.shape,train_labels.shape)
print('测试集',test_images.shape,test_labels.shape)

img_row,img_col,channel = 28,28,1 # 图像的 高(行row),宽(列col),通道 = 28,28,1

#将数据维度进行处理
train_images = train_images.reshape(train_images.shape[0],img_row,img_col,channel) # 1,28,28,1
test_images = test_images.reshape(test_images.shape[0],img_row,img_col,channel) # 1,28,28,1

train_images = train_images.astype("float32") 
test_images = test_images.astype("float32")

## 进行归一化处理
train_images  /= 255    
test_images /= 255  # test_images = test_images/255

训练集 (60000, 28, 28) (60000,)
测试集 (10000, 28, 28) (10000,)


### 2 构建LeNet-5模型并训练

In [3]:
#构建模型
model = models.Sequential()
model.add(layers.Conv2D(8, (3, 3), strides=(1, 1), padding='same', activation='relu', input_shape=(28, 28, 1),name='conv1'))
# model.add(layers.BatchNormalization()) # 批量归一化
model.add(layers.MaxPooling2D(pool_size=(2, 2), name='pool1'))
model.add(layers.Conv2D(16, (3, 3), strides=(1, 1), padding='same',activation='relu',name='conv2'))
# model.add(layers.BatchNormalization()) # 批量归一化
model.add(layers.MaxPooling2D(pool_size=(2, 2), name='pool2'))
model.add(layers.Flatten())
model.add(layers.Dense(128, activation='relu',name='fc1'))
model.add(layers.Dense(10, activation='softmax',name='fc2'))

# 编译并训练
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(train_images, train_labels,epochs=5, batch_size=20, validation_data=(test_images,test_labels))



Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x1bb60577908>

### 3 保存LeNet-5模型为h5格式

In [4]:
# 显示模型信息
model.summary()
# 保存模型
model.save('lenet_GPU.h5')

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1 (Conv2D)               (None, 28, 28, 8)         80        
_________________________________________________________________
pool1 (MaxPooling2D)         (None, 14, 14, 8)         0         
_________________________________________________________________
conv2 (Conv2D)               (None, 14, 14, 16)        1168      
_________________________________________________________________
pool2 (MaxPooling2D)         (None, 7, 7, 16)          0         
_________________________________________________________________
flatten (Flatten)            (None, 784)               0         
_________________________________________________________________
fc1 (Dense)                  (None, 128)               100480    
_________________________________________________________________
fc2 (Dense)                  (None, 10)                1

### 4 将h5格式的模型转换为tflite格式

In [5]:

keras_model = tf.keras.models.load_model('lenet_GPU.h5')
converter = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
]
tflite_model = converter.convert()
tflite_name = "lenet_GPU.tflite"
open(tflite_name, "wb").write(tflite_model)

414744

In [6]:
# TFLite是一个用于在移动设备和边缘设备上部署机器学习模型的框架。在TFLite中，将模型的权重和偏差量化为8位整数可以显著减少模型的大小，从而提高模型的性能和效率。
# 然而，当将模型量化为int8时，偏差的值可能会超过127，这是因为偏差是在训练过程中学习的，并且可能具有比权重更广泛的分布。
# 如果将偏差强制限制为小于等于127，可能会导致精度损失或性能下降。

### 5 将h5模型量化为int8的tflite模型

量化策略：DEFAULT

In [6]:
# 加载Keras模型
keras_model = tf.keras.models.load_model('lenet_GPU.h5')
converter_quant = tf.lite.TFLiteConverter.from_keras_model(keras_model)
converter_quant.target_spec.supported_ops = [
  tf.lite.OpsSet.TFLITE_BUILTINS, # enable TensorFlow Lite ops.
  tf.lite.OpsSet.SELECT_TF_OPS # enable TensorFlow ops.
  ]
# set the optimization parameters for TensorFlow Lite conversion
converter_quant.optimizations = [tf.lite.Optimize.DEFAULT]

# convert the model to TensorFlow Lite format with float32 activations and int8 weights
quanitfied_defult_model = converter_quant.convert()

#保存转换后的模型
quanitfied_defult_name = "lenet_GPU_quanitfied.tflite"
open(quanitfied_defult_name, "wb").write(quanitfied_defult_model)

106704

### 6 模型验证

In [8]:
def evaluate_model(interpreter_path):
    #加载模型并分配张量
    interpreter = tf.lite.Interpreter(model_path=interpreter_path)
    interpreter.allocate_tensors()

    #获得输入输出张量.
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    index = input_details[0]['index']
    shape = input_details[0]['shape']
    acc_count = 0
    image_count = test_images.shape[0]
    for i in range(image_count):
        #interpreter.set_tensor(index, test_images[i].reshape(shape).astype("float32"))
        interpreter.set_tensor(index, (test_images[i].reshape(shape)*255).astype("float32"))
        interpreter.invoke()
        output_data = interpreter.get_tensor(output_details[0]['index'])
        label = np.argmax(output_data)
        if label == test_labels[i]:
            acc_count += 1
    print("test_images accuracy is {:.2%}".format(acc_count/(image_count)))


In [9]:
print('tflite')
evaluate_model('lenet_GPU.tflite')

tflite
test_images accuracy is 98.98%


In [10]:

print('tflite_quanitfied')
evaluate_model('lenet_GPU_quanitfied.tflite')

tflite_quanitfied
test_images accuracy is 98.99%
