<a href="https://colab.research.google.com/github/Metal2/ISALab1/blob/main/Script_AutoQKeras.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Libraries etc


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install git+https://github.com/google/qkeras.git

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting git+https://github.com/google/qkeras.git
  Cloning https://github.com/google/qkeras.git to /tmp/pip-req-build-wo4yv9jm
  Running command git clone -q https://github.com/google/qkeras.git /tmp/pip-req-build-wo4yv9jm


In [None]:
import sys
import warnings
warnings.filterwarnings("ignore")

import json
import pprint
import numpy as np
import six
import tempfile
import tensorflow.compat.v2 as tf
# V2 Behavior is necessary to use TF2 APIs before TF2 is default TF version internally.
tf.enable_v2_behavior()
from tensorflow.keras.optimizers import *
import time
import os
from os.path import exists
from contextlib import redirect_stdout


import qkeras
from qkeras.autoqkeras import *
from qkeras import *
from qkeras.utils import *
from qkeras.qtools import run_qtools
from qkeras.qtools import settings as qtools_settings

from tensorflow.keras.utils import to_categorical
import tensorflow_datasets as tfds


In [None]:
def get_data(dataset_name, fast=False):
  """Returns dataset from tfds."""
  ds_train = tfds.load(name=dataset_name, split="train", batch_size=-1)
  ds_test = tfds.load(name=dataset_name, split="test", batch_size=-1)

  dataset = tfds.as_numpy(ds_train)
  x_train, y_train = dataset["image"].astype(np.float32), dataset["label"]

  dataset = tfds.as_numpy(ds_test)
  x_test, y_test = dataset["image"].astype(np.float32), dataset["label"]

  if len(x_train.shape) == 3:
    x_train = x_train.reshape(x_train.shape + (1,))
    x_test = x_test.reshape(x_test.shape + (1,))

  x_train /= 256.0    #normalize 
  x_test /= 256.0

  x_mean = np.mean(x_train, axis=0) #take the mean along a column

  x_train -= x_mean #subtract mean to center data
  x_test -= x_mean

  nb_classes = np.max(y_train) + 1 #total number of classes/labels (+1 to c)
  y_train = to_categorical(y_train, nb_classes) #convert y to a one hot matrix with nb_columns
  y_test = to_categorical(y_test, nb_classes)

  print(x_train.shape[0], "train samples")
  print(x_test.shape[0], "test samples")
  return (x_train, y_train), (x_test, y_test)


In [None]:
from tensorflow.keras.initializers import *
from tensorflow.keras.layers import *
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import *

class ConvBlockNetwork(object):
  """Creates Convolutional block type of network."""

  def __init__(
      self,
      shape,
      nb_classes,
      kernel_size,
      filters,
      dropout_rate=0.0,
      with_maxpooling=True,
      with_batchnorm=True,
      kernel_initializer="he_normal",
      bias_initializer="zeros",
      use_separable=False,
      use_xnornet_trick=False,
      all_conv=False
  ):
    """Creates class.

    Args:
      shape: shape of inputs.
      nb_classes: number of output classes.
      kernel_size: kernel_size of network.
      filters: sizes of filters (if entry is a list, we create a block).
      dropout_rate: dropout rate if > 0.
      with_maxpooling: if true, use maxpooling.
      with_batchnorm: with BatchNormalization.
      kernel_initializer: kernel_initializer.
      bias_initializer: bias and beta initializer.
      use_separable: if "dsp", do conv's 1x3 + 3x1. If "mobilenet",
        use MobileNet separable convolution. If False or "none", perform single
        conv layer.
      use_xnornet_trick: use bn+act after max pool to enable binary
        to avoid saturation to largest value.
      all_conv: if true, implements all convolutional network.
    """
    self.shape = shape
    self.nb_classes = nb_classes
    self.kernel_size = kernel_size
    self.filters = filters
    self.dropout_rate = dropout_rate
    self.with_maxpooling = with_maxpooling
    self.with_batchnorm = with_batchnorm
    self.kernel_initializer = kernel_initializer
    self.bias_initializer = bias_initializer
    self.use_separable = use_separable
    self.use_xnornet_trick = use_xnornet_trick
    self.all_conv = all_conv

  def build(self):
    """Builds model."""
    x = x_in = Input(self.shape, name="input")
    for i in range(len(self.filters)):
      if len(self.filters) > 1:
        name_suffix_list = [str(i)]
      else:
        name_suffix_list = []
      if not isinstance(self.filters[i], list):
        filters = [self.filters[i]]
      else:
        filters = self.filters[i]
      for j in range(len(filters)):
        if len(filters) > 1:
          name_suffix = "_".join(name_suffix_list + [str(j)])
        else:
          name_suffix = "_".join(name_suffix_list)
        if self.use_separable == "dsp":
          kernels = [(1, self.kernel_size), (self.kernel_size, 1)]
        else:
          kernels = [(self.kernel_size, self.kernel_size)]
        for k, kernel in enumerate(kernels):
          strides = 1
          if (
              not self.with_maxpooling and j == len(filters)-1 and
              k == len(kernels)-1
          ):
            strides = 2
          if self.use_separable == "dsp":
            kernel_suffix = (
                "".join([str(k) for k in kernel]) + "_" + name_suffix)
          elif self.use_separable == "mobilenet":
            depth_suffix = (
                "".join([str(k) for k in kernel]) + "_" + name_suffix)
            kernel_suffix = "11_" + name_suffix
          else:
            kernel_suffix = name_suffix
          if self.use_separable == "mobilenet":
            x = DepthwiseConv2D(
                kernel,
                padding="same", strides=strides,
                use_bias=False,
                name="conv2d_dw_" + depth_suffix)(x)
            if self.with_batchnorm:
              x = BatchNormalization(name="conv2d_dw_bn_" + depth_suffix)(x)
            x = Activation("relu", name="conv2d_dw_act_" + depth_suffix)(x)
            kernel = (1, 1)
            strides = 1
          x = Conv2D(
              filters[j], kernel,
              strides=strides, use_bias=not self.with_batchnorm,
              padding="same",
              kernel_initializer=self.kernel_initializer,
              bias_initializer=self.bias_initializer,
              name="conv2d_" + kernel_suffix)(x)
          if not (
              self.with_maxpooling and self.use_xnornet_trick and
              j == len(filters)-1 and k == len(kernels)-1
          ):
            if self.with_batchnorm:
              x = BatchNormalization(
                  beta_initializer=self.bias_initializer,
                  name="bn_" + kernel_suffix)(x)
            x = Activation("relu", name="act_" + kernel_suffix)(x)
      if self.with_maxpooling:
        x = MaxPooling2D(2, 2, name="mp_" + name_suffix)(x)
        # this is a trick from xnornet to enable full binary or ternary
        # networks to be after maxpooling.
        if self.use_xnornet_trick:
          x = BatchNormalization(
              beta_initializer=self.bias_initializer,
              name="mp_bn_" + name_suffix)(x)
          x = Activation("relu", name="mp_act_" + name_suffix)(x)
      if self.dropout_rate > 0:
        x = Dropout(self.dropout_rate, name="drop_" + name_suffix)(x)

    if not self.all_conv:
      x = Flatten(name="flatten")(x)
      x = Dense(
          self.nb_classes,
          kernel_initializer=self.kernel_initializer,
          bias_initializer=self.bias_initializer,
          name="dense")(x)
      x = Activation("softmax", name="softmax")(x)
    else:
      x = Conv2D(
          self.nb_classes, 1, strides=1, padding="same",
          kernel_initializer=self.kernel_initializer,
          bias_initializer=self.bias_initializer,
          name="dense")(x)
      x = Activation("softmax", name="softmax")(x)
      x = Flatten(name="flatten")(x)

    model = Model(inputs=[x_in], outputs=[x])

    return model


def get_model(dataset):
  """Returns a model for the demo of AutoQKeras."""
  if dataset == "mnist":
    model = ConvBlockNetwork(
        shape=(28, 28, 1),
        nb_classes=10,
        kernel_size=3,
        filters=[16, 32, 48, 64, 128],
        dropout_rate=0.2,
        with_maxpooling=False,
        with_batchnorm=True,
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
    ).build()

  elif dataset == "fashion_mnist":
    model = ConvBlockNetwork(
        shape=(28, 28, 1),
        nb_classes=10,
        kernel_size=3,
        filters=[16, [32]*3, [64]*3],
        dropout_rate=0.2,
        with_maxpooling=True,
        with_batchnorm=True,
        use_separable="mobilenet",
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
        use_xnornet_trick=True
    ).build()

  elif dataset == "cifar10":
    model = ConvBlockNetwork(
        shape=(32, 32, 3),
        nb_classes=10,
        kernel_size=3,
        filters=[16, [32]*3, [64]*3, [128]*3],
        dropout_rate=0.2,
        with_maxpooling=True,
        with_batchnorm=True,
        use_separable="mobilenet",
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
        use_xnornet_trick=True
    ).build()

  elif dataset == "cifar100":
    model = ConvBlockNetwork(
        shape=(32, 32, 3),
        nb_classes=100,
        kernel_size=3,
        filters=[16, [32]*3, [64]*3, [128]*3, [256]*3],
        dropout_rate=0.2,
        with_maxpooling=True,
        with_batchnorm=True,
        use_separable="mobilenet",
        kernel_initializer="he_uniform",
        bias_initializer="zeros",
        use_xnornet_trick=True
    ).build()

  model.summary()

  return model

In [None]:
DATASET = "mnist"
(x_train, y_train), (x_test, y_test) = get_data(DATASET)

[1mDownloading and preparing dataset 11.06 MiB (download: 11.06 MiB, generated: 21.00 MiB, total: 32.06 MiB) to ~/tensorflow_datasets/mnist/3.0.1...[0m


Dl Completed...:   0%|          | 0/4 [00:00<?, ? file/s]

[1mDataset mnist downloaded and prepared to ~/tensorflow_datasets/mnist/3.0.1. Subsequent calls will reuse this data.[0m
60000 train samples
10000 test samples


In [None]:
physical_devices = tf.config.list_physical_devices()
for d in physical_devices:
  print(d)

PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')
PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')


In [None]:
has_tpus = np.any([d.device_type == "TPU" for d in physical_devices])

if has_tpus:
  TPU_WORKER = 'local'

  resolver = tf.distribute.cluster_resolver.TPUClusterResolver(
      tpu=TPU_WORKER, job_name='tpu_worker')
  if TPU_WORKER != 'local':
    tf.config.experimental_connect_to_cluster(resolver, protocol='grpc+loas')
  tf.tpu.experimental.initialize_tpu_system(resolver)
  strategy = tf.distribute.experimental.TPUStrategy(resolver)
  print('Number of devices: {}'.format(strategy.num_replicas_in_sync))

  cur_strategy = strategy
else:
  cur_strategy = tf.distribute.get_strategy()

In [None]:
#USE QTOOLS TO COMPUTE ENERGY ESTIMATE
# q = run_qtools.QTools(model,process,source_quantizers,is_inference,weights_path,keras_quantizer,keras_accumulator,for_reference)
# energy_dict = q.pe(weights_on_memory,activations_on_memory,min_sram_size,rd_wr_on_io)
# ENERGY X LAYER ---> energy_profile = q.extract_energy_profile(qtools_settings.cfg.include_energy, energy_dict)
# TOT ENERGY -----> total_energy = q.extract_energy_sum(qtools_settings.cfg.include_energy, energy_dict)
def  calculate_energy_cost(my_model, print_per_layer=False,for_reference=False):
    reference_internal = "fp32"
    reference_accumulator = "fp32"
    q = run_qtools.QTools(
      my_model,
      # energy calculation using a given process
      # "horowitz" refers to 45nm process published at
      # M. Horowitz, "1.1 Computing's energy problem (and what we can do about
      # it), "2014 IEEE International Solid-State Circuits Conference Digest of
      # Technical Papers (ISSCC), San Francisco, CA, 2014, pp. 10-14, 
      # doi: 10.1109/ISSCC.2014.6757323.
      process="horowitz",
      # quantizers for model input
      source_quantizers=[quantized_bits(8, 0, 1)], #we are using images 8 bit per pixel
      is_inference=False,
      # absolute path (including filename) of the model weights
      # in the future, we will attempt to optimize the power model
      # by using weight information, although it can be used to further
      # optimize QBatchNormalization.
      weights_path=None,
      # keras_quantizer to quantize weight/bias in un-quantized keras layers
      keras_quantizer=reference_internal,
      # keras_quantizer to quantize MAC in un-quantized keras layers
      keras_accumulator=reference_accumulator,
      # whether calculate baseline energy
      for_reference=for_reference)
  
    # caculate energy of the derived data type map. 
    energy_dict = q.pe(
      # whether to store parameters in dram, sram, or fixed
      weights_on_memory="sram",
      # store activations in dram or sram
      activations_on_memory="sram",
      # minimum sram size in number of bits. Let's assume a 16MB SRAM.
      min_sram_size=8*16*1024*1024,
      # whether load data from dram to sram (consider sram as a cache
      # for dram. If false, we will assume data will be already in SRAM
      rd_wr_on_io=False)

    # get stats of energy distribution in each layer
    energy_profile = q.extract_energy_profile(
      qtools_settings.cfg.include_energy, energy_dict)
      # extract sum of energy of each layer according to the rule specified in
      # qtools_settings.cfg.include_energy
    total_energy = q.extract_energy_sum(
      qtools_settings.cfg.include_energy, energy_dict) / 1000.0
  
    if print_per_layer:
      pprint.pprint(energy_profile)
      print("\n\n")

    print("Total energy: {:.2f} nJ".format(total_energy))
    return total_energy


  

In [None]:
#generate a file that sums up the main characteristics of the model
def model_summary_file(model,model_path,train_time,search_time,history,energy):
    with open(model_path+"/summary.txt", 'w') as f:
      with redirect_stdout(f): #redirect stdout to file f
        model.summary() 
        print_qmodel_summary(model)
        qkeras.print_qstats(model)
        f.write("Total training time: {ttime}\n".format(ttime=train_time))
        f.write("Total search time: {stime}\n".format(stime=search_time))
        f.write("Acc: {acc}\n".format(acc=history.history['acc'][-1]))
        f.write("Val_Acc: {vacc}\n".format(vacc=history.history['val_acc'][-1]))
        f.write("Energy: {e} nJ\n".format(e=energy))


# Unquantized reference model

In [None]:
with cur_strategy.scope():
  umodel = get_model(DATASET) 
  custom_objects = {}

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_0 (Conv2D)           (None, 14, 14, 16)        144       
                                                                 
 bn_0 (BatchNormalization)   (None, 14, 14, 16)        64        
                                                                 
 act_0 (Activation)          (None, 14, 14, 16)        0         
                                                                 
 drop_0 (Dropout)            (None, 14, 14, 16)        0         
                                                                 
 conv2d_1 (Conv2D)           (None, 7, 7, 32)          4608      
                                                                 
 bn_1 (BatchNormalization)   (None, 7, 7, 32)          128   

In [None]:
umodel_path = 'drive/MyDrive/Tesi/models/unquantized' 

with cur_strategy.scope():

  if (exists(umodel_path+'/saved_model.pb')): #check if there is a saved model and load it
    print("model already exists")   #debug
    umodel = keras.models.load_model(r"drive/MyDrive/Tesi/models/unquantized")

  else: 
    print("model not found, creating new model")
    optimizer = Adam(lr=0.02)
    umodel.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"])
    t1 = time.time()
    history = umodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test), validation_freq=5)
    t2 = time.time()
    uftime= t2-t1 #unquantized model tot fit time
    umodel.save(r"drive/MyDrive/Tesi/models/unquantized")  #save model: it creates these files -> assets  keras_metadata.pb  saved_model.pb  variables
                                                           #architecture,training config(optimizer,losses,metrics) in saved_model.pb
                                                           #weights in variables
    umodel_total_energy = calculate_energy_cost(umodel,True,for_reference=True)
    #save model summary to a file
    model_summary_file(umodel,umodel_path,uftime,0,history,umodel_total_energy)


model already exists


#AUTOQKERAS_ENERGY

In [None]:
#quantization config and limit
quantization_config = {
        "kernel": {
                "quantized_bits(4,0,1,alpha=1.0)":4,
                "quantized_bits(4,1,1,alpha=1.0)":4,
                "quantized_bits(4,2,1,alpha=1.0)":4,
                "quantized_bits(4,3,1,alpha=1.0)":4,
                "quantized_bits(4,4,1,alpha=1.0)":4,

                "quantized_bits(8,0,1,alpha=1.0)":8,
                "quantized_bits(8,1,1,alpha=1.0)":8,
                "quantized_bits(8,2,1,alpha=1.0)":8,
                "quantized_bits(8,3,1,alpha=1.0)":8,
                "quantized_bits(8,4,1,alpha=1.0)":8,
                "quantized_bits(8,5,1,alpha=1.0)":8,
                "quantized_bits(8,6,1,alpha=1.0)":8,
                "quantized_bits(8,7,1,alpha=1.0)":8,
                "quantized_bits(8,8,1,alpha=1.0)":8,

                "quantized_bits(16,0,1,alpha=1.0)":16,
                "quantized_bits(16,1,1,alpha=1.0)":16,
                "quantized_bits(16,2,1,alpha=1.0)":16,
                "quantized_bits(16,3,1,alpha=1.0)":16,
                "quantized_bits(16,4,1,alpha=1.0)":16,
                "quantized_bits(16,5,1,alpha=1.0)":16,
                "quantized_bits(16,6,1,alpha=1.0)":16,
                "quantized_bits(16,7,1,alpha=1.0)":16,
                "quantized_bits(16,8,1,alpha=1.0)":16,
                "quantized_bits(16,9,1,alpha=1.0)":16,
                "quantized_bits(16,10,1,alpha=1.0)":16,
                "quantized_bits(16,11,1,alpha=1.0)":16,
                "quantized_bits(16,12,1,alpha=1.0)":16,
                "quantized_bits(16,13,1,alpha=1.0)":16,
                "quantized_bits(16,14,1,alpha=1.0)":16,
                "quantized_bits(16,15,1,alpha=1.0)":16,
                "quantized_bits(16,16,1,alpha=1.0)":16        
        },
        "bias": {
                "quantized_bits(4,0,1,alpha=1.0)":4,
                "quantized_bits(4,1,1,alpha=1.0)":4,
                "quantized_bits(4,2,1,alpha=1.0)":4,
                "quantized_bits(4,3,1,alpha=1.0)":4,
                "quantized_bits(4,4,1,alpha=1.0)":4,

                "quantized_bits(8,0,1,alpha=1.0)":8,
                "quantized_bits(8,1,1,alpha=1.0)":8,
                "quantized_bits(8,2,1,alpha=1.0)":8,
                "quantized_bits(8,3,1,alpha=1.0)":8,
                "quantized_bits(8,4,1,alpha=1.0)":8,
                "quantized_bits(8,5,1,alpha=1.0)":8,
                "quantized_bits(8,6,1,alpha=1.0)":8,
                "quantized_bits(8,7,1,alpha=1.0)":8,
                "quantized_bits(8,8,1,alpha=1.0)":8,

                "quantized_bits(16,0,1,alpha=1.0)":16,
                "quantized_bits(16,1,1,alpha=1.0)":16,
                "quantized_bits(16,2,1,alpha=1.0)":16,
                "quantized_bits(16,3,1,alpha=1.0)":16,
                "quantized_bits(16,4,1,alpha=1.0)":16,
                "quantized_bits(16,5,1,alpha=1.0)":16,
                "quantized_bits(16,6,1,alpha=1.0)":16,
                "quantized_bits(16,7,1,alpha=1.0)":16,
                "quantized_bits(16,8,1,alpha=1.0)":16,
                "quantized_bits(16,9,1,alpha=1.0)":16,
                "quantized_bits(16,10,1,alpha=1.0)":16,
                "quantized_bits(16,11,1,alpha=1.0)":16,
                "quantized_bits(16,12,1,alpha=1.0)":16,
                "quantized_bits(16,13,1,alpha=1.0)":16,
                "quantized_bits(16,14,1,alpha=1.0)":16,
                "quantized_bits(16,15,1,alpha=1.0)":16,
                "quantized_bits(16,16,1,alpha=1.0)":16
        },
        "activation": {
                "quantized_relu(4,0)": 4,
                "quantized_relu(4,1)":4,
                "quantized_relu(4,2)":4,
                "quantized_relu(4,3)":4,
                "quantized_relu(4,4)":4,

                "quantized_relu(8,0)":8,
                "quantized_relu(8,1)":8,
                "quantized_relu(8,2)":8,
                "quantized_relu(8,3)":8,
                "quantized_relu(8,4)":8,
                "quantized_relu(8,5)":8,
                "quantized_relu(8,6)":8,
                "quantized_relu(8,7)":8,
                "quantized_relu(8,8)":8,

                "quantized_relu(16,0)":16,
                "quantized_relu(16,1)":16,
                "quantized_relu(16,2)":16,
                "quantized_relu(16,3)":16,
                "quantized_relu(16,4)":16,
                "quantized_relu(16,5)":16,
                "quantized_relu(16,6)":16,
                "quantized_relu(16,7)":16,
                "quantized_relu(16,8)":16,
                "quantized_relu(16,9)":16,
                "quantized_relu(16,10)":16,
                "quantized_relu(16,11)":16,
                "quantized_relu(16,12)":16,
                "quantized_relu(16,13)":16,
                "quantized_relu(16,14)":16,
                "quantized_relu(16,15)":16,
                "quantized_relu(16,16)":16
        },
        "linear": {
                "quantized_bits(4,0,1,alpha=1.0)":4,
                "quantized_bits(4,1,1,alpha=1.0)":4,
                "quantized_bits(4,2,1,alpha=1.0)":4,
                "quantized_bits(4,3,1,alpha=1.0)":4,
                "quantized_bits(4,4,1,alpha=1.0)":4,

                "quantized_bits(8,0,1,alpha=1.0)":8,
                "quantized_bits(8,1,1,alpha=1.0)":8,
                "quantized_bits(8,2,1,alpha=1.0)":8,
                "quantized_bits(8,3,1,alpha=1.0)":8,
                "quantized_bits(8,4,1,alpha=1.0)":8,
                "quantized_bits(8,5,1,alpha=1.0)":8,
                "quantized_bits(8,6,1,alpha=1.0)":8,
                "quantized_bits(8,7,1,alpha=1.0)":8,
                "quantized_bits(8,8,1,alpha=1.0)":8,

                "quantized_bits(16,0,1,alpha=1.0)":16,
                "quantized_bits(16,1,1,alpha=1.0)":16,
                "quantized_bits(16,2,1,alpha=1.0)":16,
                "quantized_bits(16,3,1,alpha=1.0)":16,
                "quantized_bits(16,4,1,alpha=1.0)":16,
                "quantized_bits(16,5,1,alpha=1.0)":16,
                "quantized_bits(16,6,1,alpha=1.0)":16,
                "quantized_bits(16,7,1,alpha=1.0)":16,
                "quantized_bits(16,8,1,alpha=1.0)":16,
                "quantized_bits(16,9,1,alpha=1.0)":16,
                "quantized_bits(16,10,1,alpha=1.0)":16,
                "quantized_bits(16,11,1,alpha=1.0)":16,
                "quantized_bits(16,12,1,alpha=1.0)":16,
                "quantized_bits(16,13,1,alpha=1.0)":16,
                "quantized_bits(16,14,1,alpha=1.0)":16,
                "quantized_bits(16,15,1,alpha=1.0)":16,
                "quantized_bits(16,16,1,alpha=1.0)":16
        }
}


limit = {
    "Dense": [16, 16, 16],
    "Conv2D": [16, 16, 16],
    "DepthwiseConv2D": [16, 16, 16],
    "Activation": [16],
    "BatchNormalization": []
}

In [None]:
#Optimization goal and autoqkeras search parameters

goal = {
    "type": "energy",
    "params": {
              "delta_p": 5.0,
              "delta_n": 5.0,
              "rate": 2.0,
              "stress": 1.0,
              "process": 'horowitz',
              "parameters_on_memory": ['sram','sram'],
              "activations_on_memory": ['sram','sram'],
              "rd_wr_on_io": [False,False],
              "min_sram_size": [0,0],
              "source_quantizers": ['int8'],
              "reference_internal": 'fp16',  
              "reference_accumulator": 'fp16' 
              }
}


#random keras tuner search
energy_random_filepath = 'drive/MyDrive/Tesi/models/QE/Random/Autoqkeras_trials'
run_config_random = {
  "output_dir": energy_random_filepath, 
  "goal": goal,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,                 
  "transfer_weights": False,
  "mode": "random", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  # first layer is input, layer two layers are softmax and flatten
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization
  "max_trials": 20
}

#bayesian keras tuner search
energy_bayesian_filepath = 'drive/MyDrive/Tesi/models/QE/Bayesian/Autoqkeras_trials'
run_config_bayesian = {
  "output_dir": energy_bayesian_filepath,  
  "goal": goal,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,                  
  "transfer_weights": False,
  "mode": "bayesian", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  # first layer is input, layer two layers are softmax and flatten
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization
  "max_trials": 20

}


#hyperband keras tuner search
energy_hyperband_filepath = 'drive/MyDrive/Tesi/models/QE/Hyperband/Autoqkeras_trials'
run_config_hyper = {
  "output_dir": energy_hyperband_filepath, 
  "goal": goal,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,             
  "transfer_weights": False,
  "mode": "hyperband", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization.   first layer is input, layer two layers are softmax and flatten
  "max_epochs": 50,
  "factor": 7,        #the reduction factor for the number of epochs and number of models for each bracket
  "hyperband_iterations": 1 # the number of times to iterate over the full Hyperband algorithm, one iteration = max_epochs * (math.log(max_epochs, factor) ** 2)
}

#debug, checks which layers we are quantizing
print("quantizing layers:", [umodel.layers[i].name for i in run_config_random["layer_indexes"]])
print("quantizing layers:", [umodel.layers[i].name for i in run_config_bayesian["layer_indexes"]])
print("quantizing layers:", [umodel.layers[i].name for i in run_config_hyper["layer_indexes"]])

quantizing layers: ['conv2d_0', 'bn_0', 'act_0', 'drop_0', 'conv2d_1', 'bn_1', 'act_1', 'drop_1', 'conv2d_2', 'bn_2', 'act_2', 'drop_2', 'conv2d_3', 'bn_3', 'act_3', 'drop_3', 'conv2d_4', 'bn_4', 'act_4', 'drop_4', 'flatten', 'dense']
quantizing layers: ['conv2d_0', 'bn_0', 'act_0', 'drop_0', 'conv2d_1', 'bn_1', 'act_1', 'drop_1', 'conv2d_2', 'bn_2', 'act_2', 'drop_2', 'conv2d_3', 'bn_3', 'act_3', 'drop_3', 'conv2d_4', 'bn_4', 'act_4', 'drop_4', 'flatten', 'dense']
quantizing layers: ['conv2d_0', 'bn_0', 'act_0', 'drop_0', 'conv2d_1', 'bn_1', 'act_1', 'drop_1', 'conv2d_2', 'bn_2', 'act_2', 'drop_2', 'conv2d_3', 'bn_3', 'act_3', 'drop_3', 'conv2d_4', 'bn_4', 'act_4', 'drop_4', 'flatten', 'dense']


In [None]:
#model list (3 elements: random,bayesian,hyperband) + path list for weights
QEmodel = [0,0,0]

random_model_tmp_path = './drive/MyDrive/Tesi/models/QE/Random/tmp/weights.h5'
bayesian_model_tmp_path = './drive/MyDrive/Tesi/models/QE/Bayesian/tmp/weights.h5'
hyperband_model_tmp_path = './drive/MyDrive/Tesi/models/QE/Hyperband/tmp/weights.h5'

weight_path = [random_model_tmp_path, bayesian_model_tmp_path, hyperband_model_tmp_path]


#final model path
random_model_path = './drive/MyDrive/Tesi/models/QE/Random'
bayesian_model_path = './drive/MyDrive/Tesi/models/QE/Bayesian'
hyperband_model_path = './drive/MyDrive/Tesi/models/QE/Hyperband'

model_path = [random_model_path, bayesian_model_path, hyperband_model_path]

#checkpoint path
c_random = './drive/MyDrive/Tesi/models/QE/Random/tmp/checkpoint'
b_random = './drive/MyDrive/Tesi/models/QE/Bayesian/tmp/checkpoint'
h_random = './drive/MyDrive/Tesi/models/QE/Hyperband/tmp/checkpoint'
checkpoint_filepath = [c_random, b_random, h_random]

#tot search time list
tot_time_energy = [0,0,0]


In [None]:
#QE RANDOM

#autoqkeras treats quantization as a hyperparameter search in Keras Tuner

QE_R = AutoQKeras(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_random,overwrite=False)  #by setting overwrite=False Keras Tuner resumes search from last iteration

t_start_energy = time.time()
QE_R.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)   
t_end_energy = time.time()
tot_time_energy[0] = t_end_energy - t_start_energy #total autoqkeras search time


#save weights of the best model for training 
QEmodel[0] = QE_R.get_best_model() #this builds an actual quantized model from the best hyperparameter(quantization) search
QEmodel[0].save_weights(weight_path[0])

In [None]:
#QE BAYESIAN

QE_B = AutoQKeras(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_bayesian, overwrite=False)

t_start_energy = time.time()
QE_B.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20) 
t_end_energy = time.time()
tot_time_energy[1] = t_end_energy - t_start_energy

#save data
QEmodel[1] = QE_B.get_best_model()
QEmodel[1].save_weights(weight_path[1]) 

In [None]:
#QE HYPER

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=3) #stop if no improvement after 3 epochs

QE_H = AutoQKeras(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_hyper, overwrite=False)

t_start_energy = time.time()
QE_H.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20, callbacks = [callback]) 
t_end_energy = time.time()
tot_time_energy[2] = t_end_energy - t_start_energy

#save data and model 
QEmodel[2] = QE_H.get_best_model()
QEmodel[2].save_weights(weight_path[2]) 

In [None]:
#Train Autoqkeras model
n_batch = 4096
n_epoch = 200

for x in range(3):
  model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath[x],
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_freq = n_batch*5, 
    save_best_only=True)
  
  QEmodel[x].load_weights(weight_path[x])
  path = model_path[x]
  with cur_strategy.scope():
    optimizer = Adam(lr=0.02)
    QEmodel[x].compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"])
    start = time.time()
    QE_history = QEmodel[x].fit(x_train, y_train, epochs=n_epoch, batch_size=n_batch, validation_data=(x_test, y_test), validation_freq= 5, callbacks=[model_checkpoint_callback])
    end = time.time()
    ftimeE = end-start
    #save best QE model and save summary file
    QEmodel[x].save(path)
    energy= calculate_energy_cost(QEmodel[x])
    model_summary_file(QEmodel[x],path,ftimeE,tot_time_energy[x],QE_history,energy)
  

#AUTOQKERAS_BIT


In [None]:
#Optimization goal and autoqkeras search parameters

goalB = {
          "type": "bits",
          "params": {
              "delta_p": 5.0,
              "delta_n": 5.0,
              "rate": 2.0,
              "stress": 1.0,
              "input_bits": 8,
              "output_bits": 8,
              "ref_bits": 16,
              "config": {
                  "default": ["parameters", "activations"]
              }
          }
      }


#random keras tuner search
bit_random_filepath = 'drive/MyDrive/Tesi/models/QB/Random/Autoqkeras_trials'
run_config_randomB = {
  "output_dir": bit_random_filepath, 
  "goal": goalB,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,                 
  "transfer_weights": False,
  "mode": "random", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  # first layer is input, layer two layers are softmax and flatten
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization
  "max_trials": 20
}

#bayesian keras tuner search
bit_bayesian_filepath = 'drive/MyDrive/Tesi/models/QB/Bayesian/Autoqkeras_trials'
run_config_bayesianB = {
  "output_dir": bit_bayesian_filepath,  
  "goal": goalB,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,                  
  "transfer_weights": False,
  "mode": "bayesian", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  # first layer is input, layer two layers are softmax and flatten
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization
  "max_trials": 20
}

#hyperband keras tuner search
bit_hyperband_filepath = 'drive/MyDrive/Tesi/models/QB/Hyperband/Autoqkeras_trials'
run_config_hyperB = {
  "output_dir": bit_hyperband_filepath, 
  "goal": goalB,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,             
  "transfer_weights": False,
  "mode": "hyperband", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization.   first layer is input, layer two layers are softmax and flatten
  "max_epochs": 50,
  "factor": 3,             #the reduction factor for the number of epochs and number of models for each bracket
  "hyperband_iterations": 1 # the number of times to iterate over the full Hyperband algorithm, one iteration = max_epochs * (math.log(max_epochs, factor) ** 2)
}

#debug, checks which layers we are quantizing
print("quantizing layers:", [umodel.layers[i].name for i in run_config_randomB["layer_indexes"]])
print("quantizing layers:", [umodel.layers[i].name for i in run_config_bayesianB["layer_indexes"]])
print("quantizing layers:", [umodel.layers[i].name for i in run_config_hyperB["layer_indexes"]])

quantizing layers: ['conv2d_0', 'bn_0', 'act_0', 'drop_0', 'conv2d_1', 'bn_1', 'act_1', 'drop_1', 'conv2d_2', 'bn_2', 'act_2', 'drop_2', 'conv2d_3', 'bn_3', 'act_3', 'drop_3', 'conv2d_4', 'bn_4', 'act_4', 'drop_4', 'flatten', 'dense']
quantizing layers: ['conv2d_0', 'bn_0', 'act_0', 'drop_0', 'conv2d_1', 'bn_1', 'act_1', 'drop_1', 'conv2d_2', 'bn_2', 'act_2', 'drop_2', 'conv2d_3', 'bn_3', 'act_3', 'drop_3', 'conv2d_4', 'bn_4', 'act_4', 'drop_4', 'flatten', 'dense']
quantizing layers: ['conv2d_0', 'bn_0', 'act_0', 'drop_0', 'conv2d_1', 'bn_1', 'act_1', 'drop_1', 'conv2d_2', 'bn_2', 'act_2', 'drop_2', 'conv2d_3', 'bn_3', 'act_3', 'drop_3', 'conv2d_4', 'bn_4', 'act_4', 'drop_4', 'flatten', 'dense']


In [None]:
#model list (3 elements: random,bayesian,hyperband) + path list for weights
QBmodel = [0,0,0]

random_model_tmp_pathB = './drive/MyDrive/Tesi/models/QB/Random/tmp/weights.h5'
bayesian_model_tmp_pathB = './drive/MyDrive/Tesi/models/QB/Bayesian/tmp/weights.h5'
hyperband_model_tmp_pathB = './drive/MyDrive/Tesi/models/QB/Hyperband/tmp/weights.h5'

weight_pathB = [random_model_tmp_pathB, bayesian_model_tmp_pathB, hyperband_model_tmp_pathB]


#final model path
random_model_pathB = './drive/MyDrive/Tesi/models/QB/Random'
bayesian_model_pathB = './drive/MyDrive/Tesi/models/QB/Bayesian'
hyperband_model_pathB = './drive/MyDrive/Tesi/models/QB/Hyperband'

model_pathB = [random_model_pathB, bayesian_model_pathB, hyperband_model_pathB]

#checkpoint path
checkpoint_filepathB = ['./drive/MyDrive/Tesi/tmp/checkpointB_R','./drive/MyDrive/Tesi/tmp/checkpointB_B','./drive/MyDrive/Tesi/tmp/checkpointB_H']


#search time list
tot_time_bit = [0,0,0]


In [None]:
#QB RANDOM

callback = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3,min_delta=0.02,verbose=1)
QB_R = AutoQKeras(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_randomB,overwrite=False)  #by setting overwrite=False Keras Tuner resumes search from last iteration

t_start_bit = time.time()
QB_R.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20, callbacks=[callback])   
t_end_bit = time.time()
tot_time_bit[0] = t_end_bit - t_start_bit #total autoqkeras search time


#save weights of the best model for training 
QBmodel[0] = QB_R.get_best_model() #this builds an actual quantized model from the best hyperparameter(quantization) search
QBmodel[0].save_weights(weight_pathB[0])

Limit configuration:{"Dense": [16, 16, 16], "Conv2D": [16, 16, 16], "DepthwiseConv2D": [16, 16, 16], "Activation": [16], "BatchNormalization": []}


KeyboardInterrupt: ignored

In [None]:
#QB BAYESIAN

callback = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3,min_delta=0.02,verbose=1)
QB_B = AutoQKeras(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_bayesianB, overwrite=False)


t_start_bit = time.time()
QB_B.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20,callbacks=[callback]) 
t_end_bit = time.time()
tot_time_bit[1] = t_end_bit - t_start_bit

#save data
QBmodel[1] = QB_B.get_best_model()
QBmodel[1].save_weights(weight_pathB[1]) 

Trial 20 Complete [00h 00m 13s]
val_score: 1.076310634613037

Best val_score So Far: 1.077683925628662
Total elapsed time: 00h 07m 53s
learning_rate: 0.019999999552965164
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_0 (QConv2D)          (None, 14, 14, 16)        144       
                                                                 
 bn_0 (QBatchNormalization)  (None, 14, 14, 16)        64        
                                                                 
 act_0 (QActivation)         (None, 14, 14, 16)        0         
                                                                 
 drop_0 (Dropout)            (None, 14, 14, 16)        0         
                                                                 
 conv2d_1 (QConv2D)   

In [None]:
#QB HYPER
callback = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3,min_delta=0.02,verbose=1)
QB_H = AutoQKeras(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_hyperB, overwrite=False)

t_start_bit = time.time()
QB_H.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20,callbacks=[callback]) 
t_end_bit = time.time()
tot_time_bit[2] = t_end_bit - t_start_bit

#save data and model 
QBmodel[2] = QB_H.get_best_model()
QBmodel[2].save_weights(weight_pathB[2]) 

Trial 90 Complete [00h 00m 11s]
val_score: 1.0392191410064697

Best val_score So Far: 1.0458661317825317
Total elapsed time: 00h 17m 40s
learning_rate: 0.019999999552965164
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input (InputLayer)          [(None, 28, 28, 1)]       0         
                                                                 
 conv2d_0 (QConv2D)          (None, 14, 14, 16)        144       
                                                                 
 bn_0 (QBatchNormalization)  (None, 14, 14, 16)        64        
                                                                 
 act_0 (QActivation)         (None, 14, 14, 16)        0         
                                                                 
 drop_0 (Dropout)            (None, 14, 14, 16)        0         
                                                                 
 conv2d_1 (QConv2D) 

In [None]:
n_batch = 4096
n_epoch = 200
callback = tf.keras.callbacks.EarlyStopping(monitor='val_acc', patience=3,min_delta=0.02,verbose=1)


#def make_or_restore_model():
    # Either restore the latest model, or create a fresh one
    # if there is no checkpoint available.
#    checkpoints = [checkpoint_filepathB + "/" + name for name in os.listdir(checkpoint_dir)]
#    if checkpoints:
#        latest_checkpoint = max(checkpoints, key=os.path.getctime)
#        print("Restoring from", latest_checkpoint)
#        return keras.models.load_model(latest_checkpoint)
#    print("Creating a new model")
#    return get_compiled_model()


for x in [1,2]:
  model_checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepathB[x],
    save_weights_only=False,
    monitor='val_accuracy',
    mode='max',
    save_freq = n_batch*5, 
    save_best_only=True)
  
  QBmodel[x].load_weights(weight_pathB[x])
  path = model_pathB[x]

  with cur_strategy.scope():
    optimizer = Adam(lr=0.02)
    QBmodel[x].compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"])
    startB = time.time()
    QB_history = QBmodel[x].fit(x_train, y_train, epochs=n_epoch, batch_size=n_batch, validation_data=(x_test, y_test), validation_freq= 5, callbacks=[model_checkpoint_callback,callback])
    endB = time.time()
    ftimeB = endB-startB
    #save best QE model and save summary file
    QBmodel[x].save(path)
    energyB= calculate_energy_cost(QBmodel[x])
    model_summary_file(QBmodel[x],path,ftimeB,tot_time_bit[x],QB_history,energyB)
  

Epoch 1/200



Epoch 2/200



Epoch 3/200



Epoch 4/200



Epoch 5/200
Epoch 6/200



Epoch 7/200



Epoch 8/200



Epoch 9/200



Epoch 10/200
Epoch 11/200



Epoch 12/200



Epoch 13/200



Epoch 14/200



Epoch 15/200
Epoch 16/200



Epoch 17/200



Epoch 18/200



Epoch 19/200



Epoch 20/200
Epoch 21/200



Epoch 22/200



Epoch 23/200



Epoch 24/200



Epoch 25/200
Epoch 25: early stopping




Total energy: 160.74 nJ


Instructions for updating:
Use ref() instead.


Epoch 1/200



Epoch 2/200



Epoch 3/200



Epoch 4/200



Epoch 5/200
Epoch 6/200



Epoch 7/200



Epoch 8/200



Epoch 9/200



Epoch 10/200
Epoch 11/200



Epoch 12/200



Epoch 13/200



Epoch 14/200



Epoch 15/200
Epoch 16/200



Epoch 17/200



Epoch 18/200



Epoch 19/200



Epoch 20/200
Epoch 21/200



Epoch 22/200



Epoch 23/200



Epoch 24/200



Epoch 25/200
Epoch 26/200



Epoch 27/200



Epoch 28/200



Epoch 29/200



Epoch 30/200
Epoch 30: early stopping




Total energy: 271.52 nJ


#TEST BLOCKS

In [None]:

#quantization config and limit
quantization_config = {
        "kernel": {
                "quantized_bits(4,0,1,alpha=1.0)":4,
                "quantized_bits(4,1,1,alpha=1.0)":4,
                "quantized_bits(4,2,1,alpha=1.0)":4,
                "quantized_bits(4,3,1,alpha=1.0)":4,
                "quantized_bits(4,4,1,alpha=1.0)":4,

                "quantized_bits(8,0,1,alpha=1.0)":8,
                "quantized_bits(8,1,1,alpha=1.0)":8,
                "quantized_bits(8,2,1,alpha=1.0)":8,
                "quantized_bits(8,3,1,alpha=1.0)":8,
                "quantized_bits(8,4,1,alpha=1.0)":8,
                "quantized_bits(8,5,1,alpha=1.0)":8,
                "quantized_bits(8,6,1,alpha=1.0)":8,
                "quantized_bits(8,7,1,alpha=1.0)":8,
                "quantized_bits(8,8,1,alpha=1.0)":8,

                "quantized_bits(16,0,1,alpha=1.0)":16,
                "quantized_bits(16,1,1,alpha=1.0)":16,
                "quantized_bits(16,2,1,alpha=1.0)":16,
                "quantized_bits(16,3,1,alpha=1.0)":16,
                "quantized_bits(16,4,1,alpha=1.0)":16,
                "quantized_bits(16,5,1,alpha=1.0)":16,
                "quantized_bits(16,6,1,alpha=1.0)":16,
                "quantized_bits(16,7,1,alpha=1.0)":16,
                "quantized_bits(16,8,1,alpha=1.0)":16,
                "quantized_bits(16,9,1,alpha=1.0)":16,
                "quantized_bits(16,10,1,alpha=1.0)":16,
                "quantized_bits(16,11,1,alpha=1.0)":16,
                "quantized_bits(16,12,1,alpha=1.0)":16,
                "quantized_bits(16,13,1,alpha=1.0)":16,
                "quantized_bits(16,14,1,alpha=1.0)":16,
                "quantized_bits(16,15,1,alpha=1.0)":16,
                "quantized_bits(16,16,1,alpha=1.0)":16        
        },
        "bias": {
                "quantized_bits(4,0,1,alpha=1.0)":4,
                "quantized_bits(4,1,1,alpha=1.0)":4,
                "quantized_bits(4,2,1,alpha=1.0)":4,
                "quantized_bits(4,3,1,alpha=1.0)":4,
                "quantized_bits(4,4,1,alpha=1.0)":4,

                "quantized_bits(8,0,1,alpha=1.0)":8,
                "quantized_bits(8,1,1,alpha=1.0)":8,
                "quantized_bits(8,2,1,alpha=1.0)":8,
                "quantized_bits(8,3,1,alpha=1.0)":8,
                "quantized_bits(8,4,1,alpha=1.0)":8,
                "quantized_bits(8,5,1,alpha=1.0)":8,
                "quantized_bits(8,6,1,alpha=1.0)":8,
                "quantized_bits(8,7,1,alpha=1.0)":8,
                "quantized_bits(8,8,1,alpha=1.0)":8,

                "quantized_bits(16,0,1,alpha=1.0)":16,
                "quantized_bits(16,1,1,alpha=1.0)":16,
                "quantized_bits(16,2,1,alpha=1.0)":16,
                "quantized_bits(16,3,1,alpha=1.0)":16,
                "quantized_bits(16,4,1,alpha=1.0)":16,
                "quantized_bits(16,5,1,alpha=1.0)":16,
                "quantized_bits(16,6,1,alpha=1.0)":16,
                "quantized_bits(16,7,1,alpha=1.0)":16,
                "quantized_bits(16,8,1,alpha=1.0)":16,
                "quantized_bits(16,9,1,alpha=1.0)":16,
                "quantized_bits(16,10,1,alpha=1.0)":16,
                "quantized_bits(16,11,1,alpha=1.0)":16,
                "quantized_bits(16,12,1,alpha=1.0)":16,
                "quantized_bits(16,13,1,alpha=1.0)":16,
                "quantized_bits(16,14,1,alpha=1.0)":16,
                "quantized_bits(16,15,1,alpha=1.0)":16,
                "quantized_bits(16,16,1,alpha=1.0)":16
        },
        "activation": {
                "quantized_relu(4,0)": 4,
                "quantized_relu(4,1)":4,
                "quantized_relu(4,2)":4,
                "quantized_relu(4,3)":4,
                "quantized_relu(4,4)":4,

                "quantized_relu(8,0)":8,
                "quantized_relu(8,1)":8,
                "quantized_relu(8,2)":8,
                "quantized_relu(8,3)":8,
                "quantized_relu(8,4)":8,
                "quantized_relu(8,5)":8,
                "quantized_relu(8,6)":8,
                "quantized_relu(8,7)":8,
                "quantized_relu(8,8)":8,

                "quantized_relu(16,0)":16,
                "quantized_relu(16,1)":16,
                "quantized_relu(16,2)":16,
                "quantized_relu(16,3)":16,
                "quantized_relu(16,4)":16,
                "quantized_relu(16,5)":16,
                "quantized_relu(16,6)":16,
                "quantized_relu(16,7)":16,
                "quantized_relu(16,8)":16,
                "quantized_relu(16,9)":16,
                "quantized_relu(16,10)":16,
                "quantized_relu(16,11)":16,
                "quantized_relu(16,12)":16,
                "quantized_relu(16,13)":16,
                "quantized_relu(16,14)":16,
                "quantized_relu(16,15)":16,
                "quantized_relu(16,16)":16
        },
        "linear": {
                "quantized_bits(4,0,1,alpha=1.0)":4,
                "quantized_bits(4,1,1,alpha=1.0)":4,
                "quantized_bits(4,2,1,alpha=1.0)":4,
                "quantized_bits(4,3,1,alpha=1.0)":4,
                "quantized_bits(4,4,1,alpha=1.0)":4,

                "quantized_bits(8,0,1,alpha=1.0)":8,
                "quantized_bits(8,1,1,alpha=1.0)":8,
                "quantized_bits(8,2,1,alpha=1.0)":8,
                "quantized_bits(8,3,1,alpha=1.0)":8,
                "quantized_bits(8,4,1,alpha=1.0)":8,
                "quantized_bits(8,5,1,alpha=1.0)":8,
                "quantized_bits(8,6,1,alpha=1.0)":8,
                "quantized_bits(8,7,1,alpha=1.0)":8,
                "quantized_bits(8,8,1,alpha=1.0)":8,

                "quantized_bits(16,0,1,alpha=1.0)":16,
                "quantized_bits(16,1,1,alpha=1.0)":16,
                "quantized_bits(16,2,1,alpha=1.0)":16,
                "quantized_bits(16,3,1,alpha=1.0)":16,
                "quantized_bits(16,4,1,alpha=1.0)":16,
                "quantized_bits(16,5,1,alpha=1.0)":16,
                "quantized_bits(16,6,1,alpha=1.0)":16,
                "quantized_bits(16,7,1,alpha=1.0)":16,
                "quantized_bits(16,8,1,alpha=1.0)":16,
                "quantized_bits(16,9,1,alpha=1.0)":16,
                "quantized_bits(16,10,1,alpha=1.0)":16,
                "quantized_bits(16,11,1,alpha=1.0)":16,
                "quantized_bits(16,12,1,alpha=1.0)":16,
                "quantized_bits(16,13,1,alpha=1.0)":16,
                "quantized_bits(16,14,1,alpha=1.0)":16,
                "quantized_bits(16,15,1,alpha=1.0)":16,
                "quantized_bits(16,16,1,alpha=1.0)":16
        }
}


limit = {
    "Dense": [16, 16, 16],
    "Conv2D": [16, 16, 16],
    "DepthwiseConv2D": [16, 16, 16],
    "Activation": [16],
    "BatchNormalization": []
}

goal = {
    "type": "energy",
    "params": {
              "delta_p": 5.0,
              "delta_n": 5.0,
              "rate": 2.0,
              "stress": 1.0,
              "process": 'horowitz',
              "parameters_on_memory": ['sram','sram'],
              "activations_on_memory": ['sram','sram'],
              "rd_wr_on_io": [False,False],
              "min_sram_size": [0,0],
              "source_quantizers": ['int8'],
              "reference_internal": 'fp16',  
              "reference_accumulator": 'fp16' 
              }
}



energy_test = 'drive/MyDrive/Tesi/models/QE/test'
run_config_test = {
  "output_dir": energy_test,  
  "goal": goal,                               
  "quantization_config": quantization_config,
  "learning_rate_optimizer": False,                  
  "transfer_weights": False,
  "mode": "bayesian", #can be random,bayesian,hyperband
  "seed": 42,   
  "limit": limit,
  "tune_filters": "none", #layer,block or none
  "tune_filters_exceptions": "^dense",
  "distribution_strategy": cur_strategy,  #tpu,gpu etc
  # first layer is input, layer two layers are softmax and flatten
  "layer_indexes": range(1, len(umodel.layers) - 1), #layers where we want to perform quantization
  "max_trials": 10,

  #TEST BLOCKS
  "blocks": [
    "^.*_0$",
    "^.*_1$",
    "^.*_2$",
    "^.*_3$",
    "^.*_4$",
    "^dense"
  ],
  "schedule_block": "cost" #cost if you want to schedule first the blocks by decreasing cost size 

}

pprint.pprint([layer.name for layer in umodel.layers])
autoqk = AutoQKerasScheduler(umodel, metrics=["acc"], custom_objects=custom_objects, debug=True, **run_config_test)
autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)






['input',
 'conv2d_0',
 'bn_0',
 'act_0',
 'drop_0',
 'conv2d_1',
 'bn_1',
 'act_1',
 'drop_1',
 'conv2d_2',
 'bn_2',
 'act_2',
 'drop_2',
 'conv2d_3',
 'bn_3',
 'act_3',
 'drop_3',
 'conv2d_4',
 'bn_4',
 'act_4',
 'drop_4',
 'flatten',
 'dense',
 'softmax']
... block cost: 339050 / 1008381
... adjusting max_trials for this block to 10
Pattern 0 is : {'conv2d_1': [16, 16, 16], 'bn_1': [], 'act_1': [16]}
... block cost: 332853 / 1008381
... adjusting max_trials for this block to 10
Pattern 1 is : {'conv2d_2': [16, 16, 16], 'bn_2': [], 'act_2': [16]}
... block cost: 168812 / 1008381
... adjusting max_trials for this block to 10
Pattern 2 is : {'conv2d_3': [16, 16, 16], 'bn_3': [], 'act_3': [16]}
... block cost: 123198 / 1008381
... adjusting max_trials for this block to 10
Pattern 3 is : {'conv2d_4': [16, 16, 16], 'bn_4': [], 'act_4': [16]}
... block cost: 42490 / 1008381
... adjusting max_trials for this block to 10
Pattern 4 is : {'conv2d_0': [16, 16, 16], 'bn_0': [], 'act_0': [16]}
..

In [None]:
autoqk = AutoQKerasScheduler(umodel, metrics=["acc"], custom_objects=custom_objects, **run_config_test)
start = time.time()
autoqk.fit(x_train, y_train, validation_data=(x_test, y_test), batch_size=1024, epochs=20)
end = time.time()
tot_search = end - start

tmp_path =  './drive/MyDrive/Tesi/models/QE/test/weights.h5'

qmodel = autoqk.get_best_model()
qmodel.save_weights(tmp_path)


qmodel.load_weights(tmp_path)
with cur_strategy.scope():
  optimizer = Adam(lr=0.02)
  qmodel.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["acc"])
  qmodel.fit(x_train, y_train, epochs=200, batch_size=4096, validation_data=(x_test, y_test))

Trial 10 Complete [00h 00m 33s]
val_score: 1.1332590579986572

Best val_score So Far: 1.133856177330017
Total elapsed time: 00h 06m 12s
Results summary
Results in ./drive/MyDrive/Tesi/models/QE/test_1/5
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x7fd2bd2f5150>
Trial summary
Hyperparameters:
dense_kernel_quantizer: quantized_bits(4,0,1,alpha=1.0)
dense_bias_quantizer: quantized_bits(4,0,1,alpha=1.0)
Score: 1.133856177330017
Trial summary
Hyperparameters:
dense_kernel_quantizer: quantized_bits(16,5,1,alpha=1.0)
dense_bias_quantizer: quantized_bits(4,4,1,alpha=1.0)
Score: 1.1332590579986572
Trial summary
Hyperparameters:
dense_kernel_quantizer: quantized_bits(16,1,1,alpha=1.0)
dense_bias_quantizer: quantized_bits(16,16,1,alpha=1.0)
Score: 1.133127212524414
Trial summary
Hyperparameters:
dense_kernel_quantizer: quantized_bits(8,3,1,alpha=1.0)
dense_bias_quantizer: quantized_bits(16,16,1,alpha=1.0)
Score: 1.132880687713623
Trial summary
Hyperparameters:
dense_



stats: delta_p=0.05 delta_n=0.05 rate=2.0 trial_size=135382 reference_size=1008381
       delta=14.48%
Total Cost Reduction:
       135382 vs 1008381 (-86.57%)
conv2d_0             f=16 quantized_bits(16,6,1,alpha=1.0) 
bn_0                 QBN, mean=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
act_0                quantized_relu(4,1)
conv2d_1             f=32 quantized_bits(16,6,1,alpha=1.0) 
bn_1                 QBN, mean=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0.]
act_1                quantized_relu(4,1)
conv2d_2             f=48 quantized_bits(4,0,1,alpha=1.0) 
bn_2                 QBN, mean=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
act_2                quantized_relu(8,8)
conv2d_3             f=64 quantized_bits(4,0,1,alpha=1.0) 
bn_3                 QBN, mean=[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0

In [None]:
tot_search

2235.9604432582855