In [1]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_model_optimization as tfmot
import pandas as pd
import time
from collections import OrderedDict

(train_images, train_labels), (test_images, test_labels) = tf.keras.datasets.fashion_mnist.load_data()
print(train_images.shape)
print(train_labels.shape)
print(test_images.shape)
print(test_labels.shape)
train_images = train_images / 255.0
test_images = test_images / 255.0

2023-04-05 02:29:08.389289: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


(60000, 28, 28)
(60000,)
(10000, 28, 28)
(10000,)


In [2]:
tf.config.list_physical_devices()

2023-04-05 02:29:15.044765: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-05 02:29:15.066612: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-05 02:29:15.066673: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.


[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'),
 PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
def get_functional_model(learning_rate : float) -> tf.keras.Model:
    input_layer = tf.keras.layers.Input(shape = (28, 28, 1))
    conv_1 = tf.keras.layers.Conv2D(32, 5, use_bias = True, activation = 'relu')(input_layer)
    pool_1 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_1)
    conv_2 = tf.keras.layers.Conv2D(64, 5, use_bias = True, activation = 'relu')(pool_1)
    pool_2 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_2)
    conv_3 = tf.keras.layers.Conv2D(96, 3, use_bias = True, activation = 'relu')(pool_2)
    pool_3 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_3)
    flat_1 = tf.keras.layers.Flatten()(pool_3)
    dense_out = tf.keras.layers.Dense(10, activation = 'softmax', name = "dense_last")(flat_1)
    
    model = tf.keras.models.Model(inputs = input_layer, outputs = dense_out)
    opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    
    model.compile(optimizer = opt, 
        loss = 'sparse_categorical_crossentropy', 
        metrics = ['accuracy'])
    return model

def get_functional_model_kernel_regularization(learning_rate : float, regularization_rate : float) -> tf.keras.Model:
    input_layer = tf.keras.layers.Input(shape = (28, 28, 1))
    conv_1 = tf.keras.layers.Conv2D(32, 5, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate))(input_layer)
    pool_1 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_1)
    conv_2 = tf.keras.layers.Conv2D(64, 5, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate))(pool_1)
    pool_2 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_2)
    conv_3 = tf.keras.layers.Conv2D(96, 3, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate))(pool_2)
    pool_3 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_3)
    flat_1 = tf.keras.layers.Flatten()(pool_3)
    dense_out = tf.keras.layers.Dense(10, activation = 'softmax', name = "dense_last", 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate))(flat_1)
    
    model = tf.keras.models.Model(inputs = input_layer, outputs = dense_out)
    opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    
    model.compile(optimizer = opt, 
        loss = 'sparse_categorical_crossentropy', 
        metrics = ['accuracy'])
    return model

def get_functional_model_kernel_bias_regularization(learning_rate : float, regularization_rate : float) -> tf.keras.Model:
    input_layer = tf.keras.layers.Input(shape = (28, 28, 1))
    conv_1 = tf.keras.layers.Conv2D(32, 5, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate))(input_layer)
    pool_1 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_1)
    conv_2 = tf.keras.layers.Conv2D(64, 5, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate))(pool_1)
    pool_2 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_2)
    conv_3 = tf.keras.layers.Conv2D(96, 3, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate))(pool_2)
    pool_3 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_3)
    flat_1 = tf.keras.layers.Flatten()(pool_3)
    dense_out = tf.keras.layers.Dense(10, activation = 'softmax', name = "dense_last", 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate))(flat_1)
    
    model = tf.keras.models.Model(inputs = input_layer, outputs = dense_out)
    opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    
    model.compile(optimizer = opt, 
        loss = 'sparse_categorical_crossentropy', 
        metrics = ['accuracy'])
    return model

def get_functional_model_all_regularization(learning_rate : float, regularization_rate : float) -> tf.keras.Model:
    input_layer = tf.keras.layers.Input(shape = (28, 28, 1))
    conv_1 = tf.keras.layers.Conv2D(32, 5, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        activity_regularizer = tf.keras.regularizers.L2(regularization_rate))(input_layer)
    pool_1 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_1)
    conv_2 = tf.keras.layers.Conv2D(64, 5, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        activity_regularizer = tf.keras.regularizers.L2(regularization_rate))(pool_1)
    pool_2 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_2)
    conv_3 = tf.keras.layers.Conv2D(96, 3, use_bias = True, activation = 'relu', 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        activity_regularizer = tf.keras.regularizers.L2(regularization_rate))(pool_2)
    pool_3 = tf.keras.layers.MaxPool2D(pool_size = 2, strides = 2)(conv_3)
    flat_1 = tf.keras.layers.Flatten()(pool_3)
    dense_out = tf.keras.layers.Dense(10, activation = 'softmax', name = "dense_last", 
                                        kernel_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        bias_regularizer = tf.keras.regularizers.L2(regularization_rate),
                                        activity_regularizer = tf.keras.regularizers.L2(regularization_rate))(flat_1)
    
    model = tf.keras.models.Model(inputs = input_layer, outputs = dense_out)
    opt = tf.keras.optimizers.Adam(learning_rate = learning_rate)
    
    model.compile(optimizer = opt, 
        loss = 'sparse_categorical_crossentropy', 
        metrics = ['accuracy'])
    return model

In [None]:
# model = get_functional_model(0.01)
# model.summary()

In [None]:
# loss, acc = model.evaluate(test_images, test_labels)
# print('Test accuracy : ', "{:0.2%}".format(acc))


In [4]:
learning_rates = np.logspace(-2, -5, 5, base = 10)
regularization_rates = np.logspace(-2, -4, 4)
# batch_sizes = np.arange(10, 250 + 1, 40)
batch_sizes = np.array([150, 200])
epochs = 10
last_time = time.time()
def get_performance_data(model: tf.keras.Model, batch_size : int, model_type : str, regularization_rate : int = None) -> dict:
    global last_time
    print(model.name +" - "+ model_type + " - " + str(batch_size) + " - " + str(model.optimizer._learning_rate.numpy()) + " - " + str(regularization_rate) + "time " + str(time.time() - last_time))
    last_time = time.time()
    entry = {}
    train_log : tf.keras.callbacks.History
    train_log = model.fit(train_images, train_labels,
        batch_size = batch_size,
        epochs = epochs,
        validation_split = 0.1,
        verbose = 0)
    test_loss, test_acc = model.evaluate(test_images, test_labels)

    entry['name'] = model.name
    entry['type'] = model_type
    entry['learning_rate'] = model.optimizer._learning_rate.numpy()
    entry['batch_size'] = batch_size
    entry['regularization_rate'] = regularization_rate
    entry['train_loss'] = train_log.history['loss']
    entry['train_accuracy'] = train_log.history['accuracy']
    entry['validation_loss'] = train_log.history['val_loss']
    entry['validation_accuracy'] = train_log.history['val_accuracy']
    entry['test_loss'] = test_loss
    entry['test_accuracy'] = test_acc

    return entry

performance_data = []
for batch_size in batch_sizes:
    for learning_rate in learning_rates:
        model = get_functional_model(learning_rate)
        performance_data.append(get_performance_data(model, batch_size, "normal_model"))

        for regularization_rate in regularization_rates:
            model_kernel_reg = get_functional_model_kernel_regularization(learning_rate, regularization_rate)
            model_kernel_bias_reg = get_functional_model_kernel_bias_regularization(learning_rate, regularization_rate)
            model_all_reg = get_functional_model_all_regularization(learning_rate, regularization_rate)
            performance_data.append(get_performance_data(model_kernel_reg, batch_size, "model_kernel_reg", regularization_rate))
            performance_data.append(get_performance_data(model_kernel_bias_reg, batch_size, "model_kernel_bias_reg", regularization_rate))
            performance_data.append(get_performance_data(model_all_reg, batch_size, "model_all_reg", regularization_rate))

# data = pd.DataFrame(performance_data)

2023-04-05 02:29:23.918551: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-05 02:29:23.918654: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-05 02:29:23.918685: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-05 02:29:24.757214: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-05 02:29:24.757356: I tensorflow/compile

model - normal_model - 150 - 0.01 - Nonetime 1.1077699661254883


2023-04-05 02:29:25.102203: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 169344000 exceeds 10% of free system memory.
2023-04-05 02:29:25.235442: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 169344000 exceeds 10% of free system memory.


: 

: 

In [None]:
# print(performance_data)
# data = pd.DataFrame(performance_data)
# data.to_csv('Test_File_2.csv')
data2 = pd.read_csv('Test_File_2.csv')
# print(data2)
performance_data2 = data2.values.tolist()
# print(performance_data2)

In [None]:
# loss, acc = model.evaluate(test_images, test_labels)
# print('Test accuracy : ', "{:0.2%}".format(acc))

In [None]:
save_dir = "./logs/"
save_path = save_dir + "model_official_1"
# model.save(save_path)

In [None]:
# Load model_v3
save_dir = "./logs/"
save_path = save_dir + "model_official_1"
# model = tf.keras.models.load_model(save_path)
loss, acc = model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(acc))

In [None]:
q_aware_model = tfmot.quantization.keras.quantize_model(model)
q_aware_model.compile(optimizer = 'adam', 
    loss = 'sparse_categorical_crossentropy', 
    metrics = ['accuracy'])
q_aware_model.summary()

In [None]:
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

In [None]:
train_log = q_aware_model.fit(train_images, train_labels,
    batch_size = 128,
    # epochs = 15,
    epochs = 1,
    validation_split = 0.1)

In [None]:
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

In [None]:
# Save quantized model
save_dir = "./logs/"
save_path = save_dir + "model_q4_func"
# q_aware_model.save(save_path)

In [None]:
# Load model 
save_dir = "./logs/"
save_path = save_dir + "model_q4_func"
q_aware_model : tf.keras.Model
with tfmot.quantization.keras.quantize_scope():
    q_aware_model = tf.keras.models.load_model(save_path)
q_aware_test_loss, q_aware_test_acc = q_aware_model.evaluate(test_images, test_labels)
print('Test accuracy : ', "{:0.2%}".format(q_aware_test_acc))

In [None]:
for i in range(len(q_aware_model.layers)):
    print("Layer : ", i, q_aware_model.layers[i].name," - params : ", len(q_aware_model.layers[i].variables))#, len(q_aware_model.layers[i]), "Weights len")

In [None]:
bit_width = 8
quantized_and_dequantized = OrderedDict()
quantized = OrderedDict()
new_quantized_and_dequantized = OrderedDict()
new_quantized = OrderedDict()
layer_index_list = []
key_index_list = []

layer : tfmot.quantization.keras.QuantizeWrapperV2
for i, layer in enumerate(q_aware_model.layers):
    quantizer : tfmot.quantization.keras.quantizers.Quantizer
    weight : tf.Variable
    if hasattr(layer, '_weight_vars'):
        for weight, quantizer, quantizer_vars in layer._weight_vars:
            min_var = quantizer_vars['min_var']
            max_var = quantizer_vars['max_var']

            key = weight.name[:-2]
            layer_index_list.append(i)
            key_index_list.append(key)
            quantized_and_dequantized[key] = quantizer(inputs = weight, training = False, weights = quantizer_vars)
            quantized[key] = np.round(quantized_and_dequantized[key] / max_var * (2**(bit_width-1)-1))

            if "conv2d" in layer.name:
                new_quantized_and_dequantized[key] = tf.quantization.fake_quant_with_min_max_vars_per_channel(weight, min_var, max_var, bit_width, narrow_range = True, name = "New_quantized_" + str(i))
                new_quantized[key] = np.round(new_quantized_and_dequantized[key] / max_var * (2**(bit_width-1)-1))
            elif "dense" in layer.name:
                new_quantized_and_dequantized[key] = tf.quantization.fake_quant_with_min_max_vars(weight, min_var, max_var, bit_width, narrow_range = True, name = "New_quantized_" + str(i))
                new_quantized[key] = np.round(new_quantized_and_dequantized[key] / max_var * (2**(bit_width-1)-1))

for key in quantized:
    # print("Fake Quantized")
    print(key)
    if "dense" not in key:
        # print(quantized_and_dequantized[key][:,:,0,0])
        print(quantized[key][:,:,0,0])
    else:
        # print(quantized_and_dequantized[key][:,0])
        print(quantized[key][:,0])

    # print("New Fake Quantized")
    # print(key)
    # if "dense" not in key:
    #     print(new_quantized_and_dequantized[key][:,:,0,0])
    #     print(new_quantized[key][:,:,0,0])
    # else:
    #     print(new_quantized_and_dequantized[key][:,0])
    #     print(new_quantized[key][:,0])

In [None]:
def self_quantize_function(input, min_var, max_var, bits, narrow_range = False):
    if not narrow_range:
        scale = (max_var - min_var) / (2**bits - 1)
    else:
        scale = (max_var - min_var) / (2**bits - 2)
    min_adj = scale * np.round(min_var / scale)
    max_adj = max_var + min_adj - min_var
    # print("Scale : ", scale)
    return scale * np.round(input / scale)

for idx, layer_index in enumerate(layer_index_list):
    m_vars = {variable.name: variable for i, variable in enumerate(q_aware_model.layers[layer_index].non_trainable_variables) if key_index_list[idx] in variable.name}
    kernel = {variable.name: variable for i, variable in enumerate(q_aware_model.layers[layer_index].trainable_variables) if "kernel" in variable.name}
    min_key = list(key for key in m_vars if "min" in key)[0]
    max_key = list(key for key in m_vars if "max" in key)[0]
    min_var = m_vars[min_key]
    max_var = m_vars[max_key]

    kernel_index = 0
    self_quantized_and_dequantized = self_quantize_function(q_aware_model.layers[layer_index].trainable_variables[kernel_index], min_var, max_var, bit_width, narrow_range = True)
    
    print(key_index_list[idx])
    # print("Self Quantized")
    if "dense" not in key_index_list[idx]:
        # print(self_quantized_and_dequantized[:,:,0,0])
        self_quantized = np.round(self_quantized_and_dequantized / max_var * (2**(bit_width - 1) - 1))
        print(self_quantized[:,:,0,0])
    else:
        # print(self_quantized_and_dequantized[:,0])
        self_quantized = np.round(self_quantized_and_dequantized / max_var * (2**(bit_width - 1) - 1))
        print(self_quantized[:,0])

In [None]:
l = 2
print(q_aware_model.layers[l].quantize_config.get_config())
print(q_aware_model.layers[l].quantize_config.activation_quantizer)

In [None]:
# Conversion to TF Lite model
converter = tf.lite.TFLiteConverter.from_keras_model(q_aware_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
    
quantized_tflite_model = converter.convert()

In [None]:
def evaluate_model(interpreter: tf.lite.Interpreter):
  input_index = interpreter.get_input_details()[0]["index"]
  output_index = interpreter.get_output_details()[0]["index"]

  # Run predictions on every image in the "test" dataset.
  prediction_digits = []
  for i, test_image in enumerate(test_images):
    if i % 1000 == 0:
      print('Evaluated on {n} results so far.'.format(n=i))
    # Pre-processing: add batch dimension and convert to float32 to match with
    # the model's input data format.
    # print("Shape : ", test_image.shape)
    test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
    test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)
    # print("New Shape : ", test_image.shape)
    interpreter.set_tensor(input_index, test_image)

    # Run inference.
    interpreter.invoke()

    # Post-processing: remove batch dimension and find the digit with highest
    # probability.
    output = interpreter.tensor(output_index)
    digit = np.argmax(output()[0])
    prediction_digits.append(digit)

  print('\n')
  # Compare prediction results with ground truth labels to calculate accuracy.
  prediction_digits = np.array(prediction_digits)
  accuracy = (prediction_digits == test_labels).mean()
  return accuracy

In [None]:
interpreter = tf.lite.Interpreter(model_content = quantized_tflite_model)
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
print("Input Shape: ", input_details[0]['shape'])
print("Output Shape: ", output_details[0]['shape'])

test_accuracy = evaluate_model(interpreter)

print('Quant TFLite test_accuracy:', test_accuracy)
print('Quant TF test accuracy:', q_aware_test_acc)

In [None]:
save_dir = "./logs/"
quant_file = 'quant_model_q4_func.tflite'
save_path = save_dir + quant_file
# with open(save_path, 'wb') as f:
#   f.write(quantized_tflite_model)

In [None]:
ind_index = 10
test_image = test_images[ind_index]
test_image = np.expand_dims(test_image, axis = 0).astype(np.float32)
test_image = np.expand_dims(test_image, axis = 3).astype(np.float32)

interpreter.set_tensor(input_details[0]['index'], test_image)
interpreter.invoke()
output_array = interpreter.get_tensor(output_details[0]['index'])
# print(output_array.shape)
digit = np.argmax(output_array[0])
probability = max(output_array[0])
print("Input index : ", test_labels[ind_index])
print("Output index : ", digit, "{0:.2%}".format(probability))
