# Install packages + check hardware

In [None]:
! pip install tensorflow==2.9.0

restart runtime

In [None]:
!pip install tensorflow_model_optimization

In [None]:
from google.colab import drive
import tensorflow as tf
import tensorflow_model_optimization as tfmot

drive.mount('/content/drive')
print("tensorflow version: ", tf.__version__) # 2.9.0
!nvidia-smi -L

# Post-training quantization

## Quantization.py

In [None]:
import tensorflow as tf

class Contrastive_Loss_2(tf.keras.losses.Loss):
  def __init__(self, temperature=0.5, rate=0.5, name='Contrastive_Loss_2', **kwargs):
    super(Contrastive_Loss_2, self).__init__(name=name, **kwargs)
    self.temperature   = temperature
    self.rate          = rate
    self.cosine_sim    = tf.keras.losses.CosineSimilarity(axis=-1, reduction=tf.keras.losses.Reduction.NONE)
    
  # @tf.function
  def call(self, z1, z2):
    batch_size, n_dim = z1.shape

    # Compute Euclid Distance loss
    difference    = z1 - z2                                             # (BxB)   * z1 and z2 already applied soft max -> in the last axis, max dif will be 1 
    squared_norm  = tf.reduce_sum(tf.square(difference), axis=1)        # (B)
    distance      = tf.sqrt(squared_norm + 1e-8)                        # (B)     * + epsilon to avoid Nan in gradient
    mean_distance = tf.reduce_mean(distance)                            # () -> scalar
    tf.debugging.check_numerics(mean_distance.numpy(), 'Distance contains NaN values.')
    # print('distance: , ',mean_distance)

    # Compute Consine Similarity loss
    z = tf.concat((z1, z2), 0)

    sim_ij      = - self.cosine_sim(z[:batch_size], z[batch_size:])     # (B)  -> batch_size pair
    sim_ji      = - self.cosine_sim(z[batch_size:], z[:batch_size])     # (B)  -> batch_size pair
    sim_pos     = tf.concat((sim_ij,sim_ji), axis=0)                    # (2B) -> 2*batch_size positive pair
    numerator   = tf.math.exp(sim_pos / self.temperature)               # (2B) -> 2*batch_size positive pair
  
    sim_neg     = - self.cosine_sim(tf.expand_dims(z, 1), z)            # sim (Bx1xE, BxE) -> (2Bx2B)
    mask        = 1 - tf.eye(2*batch_size, dtype=tf.float32)            # (2Bx2B)
    sim_neg     = mask * tf.math.exp(sim_neg / self.temperature)        # (2Bx2B)
    denominator = tf.math.reduce_sum(sim_neg, axis=-1)                  # (2B) 
  
    mean_cosine_similarity = tf.reduce_mean(- tf.math.log((numerator + 1e-11) / (denominator + 1e-11)))       # () -> scalar
    tf.debugging.check_numerics(mean_cosine_similarity.numpy(), 'Cosine contains NaN values.')
    # print('similarity: , ',mean_cosine_similarity)

    # Compute total loss with associated rate
    total_loss = (1-self.rate)*mean_distance + self.rate*mean_cosine_similarity 
    tf.debugging.check_numerics(total_loss.numpy(), 'Total contains NaN values.')
    return total_loss

In [None]:
import tensorflow as tf
import os

best_model = tf.keras.models.load_model( filepath='/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_567_con_2loss_2/contrastive_model.h5', 
                                      custom_objects={'Contrastive_Loss_2': Contrastive_Loss_2})

converter = tf.lite.TFLiteConverter.from_keras_model(best_model) # quantize to 8 bit
converter.optimizations = [tf.lite.Optimize.DEFAULT] # int 8
tflite_quant_model = converter.convert()

# #save converted quantization model to tflite format
open("/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_567_con_2loss_2/tflite_model.tflite", "wb").write(tflite_quant_model)


best_model_path = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_567_con_2loss_2/contrastive_model.h5'
quant_model_path = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_567_con_2loss_2/tflite_model.tflite'

print( os.path.getsize(best_model_path) / float(2**20))
print( os.path.getsize(quant_model_path) / float(2**20))

## Evaluate_tflite_model.py

In [None]:
import tensorflow as tf
from datetime import datetime


physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices: # Use GPU for inference
    tf.config.set_visible_devices(physical_devices[0], 'GPU')

# tf.config.set_visible_devices([], 'GPU') # Use CPU for inference

def Accuracy(y_true, y_pred):
  y_true   = tf.argmax(y_true, axis=-1) # (B) 
  y_pred   = tf.argmax(y_pred, axis=-1) # (B)
  correct  = tf.cast(y_true == y_pred, tf.float32)
  accuracy = tf.reduce_sum(correct)
  if y_true.shape != y_pred.shape:
    raise ValueError('Something error in Acc calculation')
  return accuracy.numpy()

TEST_DIR        = '/content/dataset2/test/'
test_generator  = TestGenerator(img_dir=TEST_DIR)
quant_model_path = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_567_con_2loss_2/tflite_model.tflite'
interpreter = tf.lite.Interpreter(model_path=quant_model_path) # Load the TFLite model.
interpreter.resize_tensor_input(0, [45, 256, 256, 3])
interpreter.allocate_tensors() # Allocate memory for input and output tensors.

start_test   = datetime.now()
test_acc     = 0
for n_batch_test in range(560):
  x_test, y_true, n_imgs = test_generator.get_batch(n_batch_test)
  input_details = interpreter.get_input_details()
  interpreter.set_tensor(input_details[0]['index'], x_test)
  interpreter.invoke()
  output_details = interpreter.get_output_details()
  y_pred         = interpreter.get_tensor(output_details[0]['index'])
  test_acc      += Accuracy(y_true, y_pred)
  
test_acc /= 25200
print('# test accuray: ', test_acc, '  and time needed for test: ', datetime.now()-start_test)

In [None]:
from PIL import Image
img = Image.fromarray((x_test[-1].numpy()).astype(np.uint8), 'RGB') # *255
img.show()

# Network pruning

## Load_pre_trained_model.py

In [None]:
import tensorflow as tf
import os

class Contrastive_Loss_2(tf.keras.losses.Loss):
  def __init__(self, temperature=0.5, rate=0.5, name='Contrastive_Loss_2', **kwargs):
    super(Contrastive_Loss_2, self).__init__(name=name, **kwargs)
    self.temperature   = temperature
    self.rate          = rate
    self.cosine_sim    = tf.keras.losses.CosineSimilarity(axis=-1, reduction=tf.keras.losses.Reduction.NONE)
    
  # @tf.function
  def call(self, z1, z2):
    batch_size, n_dim = z1.shape

    # Compute Euclid Distance loss
    difference    = z1 - z2                                             # (BxB)   * z1 and z2 already applied soft max -> in the last axis, max dif will be 1 
    squared_norm  = tf.reduce_sum(tf.square(difference), axis=1)        # (B)
    distance      = tf.sqrt(squared_norm + 1e-8)                        # (B)     * + epsilon to avoid Nan in gradient
    mean_distance = tf.reduce_mean(distance)                            # () -> scalar
    tf.debugging.check_numerics(mean_distance.numpy(), 'Distance contains NaN values.')
    # print('distance: , ',mean_distance)

    # Compute Consine Similarity loss
    z = tf.concat((z1, z2), 0)

    sim_ij      = - self.cosine_sim(z[:batch_size], z[batch_size:])     # (B)  -> batch_size pair
    sim_ji      = - self.cosine_sim(z[batch_size:], z[:batch_size])     # (B)  -> batch_size pair
    sim_pos     = tf.concat((sim_ij,sim_ji), axis=0)                    # (2B) -> 2*batch_size positive pair
    numerator   = tf.math.exp(sim_pos / self.temperature)               # (2B) -> 2*batch_size positive pair
  
    sim_neg     = - self.cosine_sim(tf.expand_dims(z, 1), z)            # sim (Bx1xE, BxE) -> (2Bx2B)
    mask        = 1 - tf.eye(2*batch_size, dtype=tf.float32)            # (2Bx2B)
    sim_neg     = mask * tf.math.exp(sim_neg / self.temperature)        # (2Bx2B)
    denominator = tf.math.reduce_sum(sim_neg, axis=-1)                  # (2B) 
  
    mean_cosine_similarity = tf.reduce_mean(- tf.math.log((numerator + 1e-11) / (denominator + 1e-11)))       # () -> scalar
    tf.debugging.check_numerics(mean_cosine_similarity.numpy(), 'Cosine contains NaN values.')
    # print('similarity: , ',mean_cosine_similarity)

    # Compute total loss with associated rate
    total_loss = (1-self.rate)*mean_distance + self.rate*mean_cosine_similarity 
    tf.debugging.check_numerics(total_loss.numpy(), 'Total contains NaN values.')
    return total_loss
    
pre_trained_model = tf.keras.models.load_model( filepath='/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_567_con_2loss_2/contrastive_model.h5', 
                                      custom_objects={'Contrastive_Loss_2': Contrastive_Loss_2})

## Load_or_create_model_for_pruning.py

In [None]:
import os
import tensorflow_model_optimization as tfmot
import tensorflow as tf
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg19 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D, GlobalAveragePooling1D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling1D
from tensorflow.keras.layers import Conv2D, Activation, Dropout, Flatten, Input, Dense, MultiHeadAttention
from tensorflow.keras.layers import Add, Average, Concatenate,Reshape, multiply, Permute, Lambda
from tensorflow.keras import initializers, regularizers
from tensorflow.keras.activations import sigmoid


class Contrastive_Loss_2(tf.keras.losses.Loss):
  def __init__(self, temperature=0.5, rate=0.5, name='Contrastive_Loss_2', **kwargs):
    super(Contrastive_Loss_2, self).__init__(name=name, **kwargs)
    self.temperature   = temperature
    self.rate          = rate
    self.cosine_sim    = tf.keras.losses.CosineSimilarity(axis=-1, reduction=tf.keras.losses.Reduction.NONE)
    
  # @tf.function
  def call(self, z1, z2):
    batch_size, n_dim = z1.shape

    # Compute Euclid Distance loss
    difference    = z1 - z2                                             # (BxB)   * z1 and z2 already applied soft max -> in the last axis, max dif will be 1 
    squared_norm  = tf.reduce_sum(tf.square(difference), axis=1)        # (B)
    distance      = tf.sqrt(squared_norm + 1e-8)                        # (B)     * + epsilon to avoid Nan in gradient
    mean_distance = tf.reduce_mean(distance)                            # () -> scalar
    tf.debugging.check_numerics(mean_distance.numpy(), 'Distance contains NaN values.')
    # print('distance: , ',mean_distance)

    # Compute Consine Similarity loss
    z = tf.concat((z1, z2), 0)

    sim_ij      = - self.cosine_sim(z[:batch_size], z[batch_size:])     # (B)  -> batch_size pair
    sim_ji      = - self.cosine_sim(z[batch_size:], z[:batch_size])     # (B)  -> batch_size pair
    sim_pos     = tf.concat((sim_ij,sim_ji), axis=0)                    # (2B) -> 2*batch_size positive pair
    numerator   = tf.math.exp(sim_pos / self.temperature)               # (2B) -> 2*batch_size positive pair
  
    sim_neg     = - self.cosine_sim(tf.expand_dims(z, 1), z)            # sim (Bx1xE, BxE) -> (2Bx2B)
    mask        = 1 - tf.eye(2*batch_size, dtype=tf.float32)            # (2Bx2B)
    sim_neg     = mask * tf.math.exp(sim_neg / self.temperature)        # (2Bx2B)
    denominator = tf.math.reduce_sum(sim_neg, axis=-1)                  # (2B) 
  
    mean_cosine_similarity = tf.reduce_mean(- tf.math.log((numerator + 1e-11) / (denominator + 1e-11)))       # () -> scalar
    tf.debugging.check_numerics(mean_cosine_similarity.numpy(), 'Cosine contains NaN values.')
    # print('similarity: , ',mean_cosine_similarity)

    # Compute total loss with associated rate
    total_loss = (1-self.rate)*mean_distance + self.rate*mean_cosine_similarity 
    tf.debugging.check_numerics(total_loss.numpy(), 'Total contains NaN values.')
    return total_loss

def trippleAttention(x): # 8x8xc
  ## TA - Tripple Attention
  c = x.shape[-1]
    # channel
  tl1 = tf.math.reduce_mean(x, axis=-1) + tf.math.reduce_max(x, axis=-1)# 8x8        
  tl1 = MultiHeadAttention(num_heads=16, key_dim=8)(tl1, tl1)           # 8x8 
  tl1 = sigmoid(tl1)            # 8x8
  tl1 = Reshape((8,8,1))(tl1)   # 8x8x1
  tl1 = x * tl1                 # 8x8xc * 8x8x1 -> 8x8xc
    # width
  tl2 = tf.math.reduce_mean(x, axis=-2) + tf.math.reduce_max(x, axis=-2)# 8xc
  tl2 = MultiHeadAttention(num_heads=16, key_dim=8)(tl2, tl2)          # 8xc
  tl2 = sigmoid(tl2)            # 8xcx1
  tl2 = Reshape((8,1,c))(tl2)   # 8x1xc
  tl2 = x * tl2                 # 8x8xc * 8x1xc -> 8x8xc
    # height
  tl3 = tf.math.reduce_mean(x, axis=-3) + tf.math.reduce_max(x, axis=-3)# 8xc
  tl3 = MultiHeadAttention(num_heads=16, key_dim=8)(tl3, tl3)          # 8xc
  tl3 = sigmoid(tl3)            # 8xcx1
  tl3 = Reshape((1,8,c))(tl3)   # 8x1xc
  tl3 = x * tl3                 # 8x8xc * 1x8xc -> 8x8xc
    # average 
  t = Average()([tl1, tl2, tl3]) # 8x8xc
  t = GlobalAveragePooling2D(keepdims=False)(t) #channel
  return t # c

def get_model_architecture():
  base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(256,256,3))

  block7_x = base_model.output                           
  block6_x = base_model.get_layer('block6d_add').output  
  block5_x = base_model.get_layer('block5c_add').output  
  
  block6_x = Conv2D(filters=1280, kernel_size=1, strides=1)(block6_x) 
  block5_x = Conv2D(filters=1280, kernel_size=2, strides=2)(block5_x) 
  
  block7_x = trippleAttention(block7_x)
  block6_x = trippleAttention(block6_x)
  block5_x = trippleAttention(block5_x)
  
  x = block5_x + block6_x + block7_x
  x = Dense(512, 
                    activation='relu',
                    kernel_initializer=initializers.TruncatedNormal(mean=0.0,stddev=0.1),
                    kernel_regularizer=regularizers.l2(1e-5),
                    bias_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.1),
                    bias_regularizer=regularizers.l2(1e-5)
                    )(x)
  x = Dropout(0.2)(x)
  predictions = Dense(45, 
                        activation='softmax',
                        kernel_initializer=initializers.TruncatedNormal(mean=0.0,stddev=0.1),
                        kernel_regularizer=regularizers.l2(1e-5),
                        bias_initializer=initializers.TruncatedNormal(mean=0.0, stddev=0.1),
                        bias_regularizer=regularizers.l2(1e-5)
                        )(x)
  return Model(base_model.input, predictions)

# create full model
def get_cls_model(pre_trained_model):
  model = get_model_architecture()
  model.set_weights(pre_trained_model.get_weights()) 
  return model

def apply_pruning_to_dense(layer):
  batch_size = 15
  epochs     = 27
  end_step   = (6300 / batch_size) * epochs
  pruning_params = { 'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0.50, final_sparsity=0.75, begin_step=0, end_step=end_step)}

  if isinstance(layer, tf.keras.layers.Dense):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  if isinstance(layer, tf.keras.layers.Conv2D):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  if isinstance(layer, tf.keras.layers.DepthwiseConv2D):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  if isinstance(layer, tf.keras.layers.MultiHeadAttention):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  if isinstance(layer, tf.keras.layers.BatchNormalization):
    return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)
  return layer

if os.path.exists('/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/pruning_model.h5'):
  print('loading model !')
  model_for_pruning = tf.keras.models.load_model( filepath='/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/pruning_model.h5', 
                                      custom_objects={'Contrastive_Loss_2': Contrastive_Loss_2})
  print(model_for_pruning.loss)
  print(model_for_pruning.optimizer.learning_rate)
  # model_for_pruning.summary()
else:
  print('creating model !')
  model_for_pruning = get_cls_model(pre_trained_model)

  model_for_pruning = tf.keras.models.clone_model(model_for_pruning, clone_function=apply_pruning_to_dense,)
  
  # `prune_low_magnitude` requires a recompile.
  opt = tf.keras.optimizers.Adam(learning_rate=5e-6)  
  model_for_pruning.compile(optimizer=opt, loss=[tf.keras.losses.CategoricalCrossentropy(),Contrastive_Loss_2()], metrics=[])
  # model_for_pruning.summary()


## Fine_tune_pruned_model.py

In [None]:
import random
from datetime import datetime

def Accuracy(y_true, y_pred):
  if y_true.shape != y_pred.shape:
    print('Error metric !')
    raise ValueError('Something error in Acc calculation')
  y_true   = tf.argmax(y_true, axis=-1) # (B) 
  y_pred   = tf.argmax(y_pred, axis=-1) # (B)
  correct  = tf.cast(y_true == y_pred, tf.float32)
  accuracy = tf.reduce_sum(correct)
  return accuracy.numpy()

def lr_schedule(epoch, lr):
  if epoch < 5:
    return 2e-6
  elif epoch < 14:
    return 1e-6 
  elif epoch < 22:
    return 6e-7
  else:
    return 1e-7

TRAIN_DIR       = '/content/dataset2/train/'
TEST_DIR        = '/content/dataset2/test/'
stored_dir      = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned'
best_model_file = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/pruning_model.h5'
BATCH_SIZE      = 15  # 3 or 5 or 9 or 15 or 45
alpha           = 0.7 # tuning parameter
current_epoch   = 0
old_test_acc    = 0
train_generator = TrainGenerator(img_dir=TRAIN_DIR, batch_size=BATCH_SIZE)
test_generator  = TestGenerator(img_dir=TEST_DIR)

# pruning param
unused_arg = -1
step_callback = tfmot.sparsity.keras.UpdatePruningStep()
step_callback.set_model(model_for_pruning)
step_callback.on_train_begin() 

for epoch in range(current_epoch, 27):
  print('\n\n ==================== Epoch: ', epoch,'======================')
  s = datetime.now()
  train_acc = 0
  epoch_loss = 0
  label_dict = hypara().label_dict
  class_list = list(label_dict.keys())
  random.shuffle(class_list)

  model_for_pruning.optimizer.learning_rate = lr_schedule(epoch, model_for_pruning.optimizer.learning_rate.numpy())
  print(' *** learning rate: ', model_for_pruning.optimizer.learning_rate)
  print('-------- training ---------')
  for i in range(int(45 / BATCH_SIZE)):
    for n_batch_train in range(140):
      x_train, y_true_train = train_generator.get_batch(idx_num=n_batch_train, class_list=class_list[i*BATCH_SIZE:(i+1)*BATCH_SIZE], is_aug=True) # return 2 batches of images, each batch contain B images from B class
      
      step_callback.on_train_batch_begin(batch=unused_arg) # run pruning callback
      with tf.GradientTape() as tape:
        y_pred_train = model_for_pruning(x_train)
        train_acc   += Accuracy(y_true_train, y_pred_train)
        loss_1       = model_for_pruning.loss[0](y_true_train, y_pred_train)                            # categorical cross entropy
        loss_2       = model_for_pruning.loss[1](y_pred_train[:BATCH_SIZE], y_pred_train[BATCH_SIZE:])  # euclid distance + consine similairy
        loss         = alpha*loss_1 + (1-alpha)*loss_2                                      # total loss 
        epoch_loss  += loss 
        grads        = tape.gradient(loss, model_for_pruning.trainable_variables) 
        tf.debugging.check_numerics(grads[0], 'grad contains NaN values.')
        model_for_pruning.optimizer.apply_gradients(zip(grads, model_for_pruning.trainable_variables))

  train_acc /= 6300*2
  print('# epoch loss:  ', epoch_loss.numpy(), '; epoch acc: ', train_acc)
  
  print("------ testing -------")
  if epoch >= 0: 
    start_test   = datetime.now()
    test_acc     = 0
    img_test     = 0
    for n_batch_test in range(560):
      x_test, y_true_test, n_imgs = test_generator.get_batch(n_batch_test)
      y_pred_test         = model_for_pruning(x_test)
      test_acc           += Accuracy(y_true_test, y_pred_test)
 
    test_acc /= 25200
    print('# test accuray: ', test_acc, '  and time needed for test: ', datetime.now()-start_test)
       # Save model when successfully testing
    if (test_acc > old_test_acc): 
      old_test_acc = test_acc
      model_for_export = tfmot.sparsity.keras.strip_pruning(model_for_pruning)
      tf.keras.models.save_model(model_for_export, best_model_file, include_optimizer=True)
      print('Save model completed')
      with open(os.path.join(stored_dir,"prunning_log.txt"), "a") as text_file:
        text_file.write("Save best model at Epoch: {}; Accuracy: {}\n".format(epoch, old_test_acc))

  with open(os.path.join(stored_dir,"prunning_log.txt"), "a") as text_file:
    text_file.write("Epoch: {}; lr: {}; Train accuracy: {}\n".format(epoch, model_for_pruning.optimizer.learning_rate.numpy(), train_acc))

  step_callback.on_epoch_end(batch=unused_arg) # run pruning callback
  print('# epoch training time: ', datetime.now()-s, '\n')


# Test quantization + pruning

In [None]:
import tensorflow as tf

class Contrastive_Loss_2(tf.keras.losses.Loss):
  def __init__(self, temperature=0.5, rate=0.5, name='Contrastive_Loss_2', **kwargs):
    super(Contrastive_Loss_2, self).__init__(name=name, **kwargs)
    self.temperature   = temperature
    self.rate          = rate
    self.cosine_sim    = tf.keras.losses.CosineSimilarity(axis=-1, reduction=tf.keras.losses.Reduction.NONE)
    
  # @tf.function
  def call(self, z1, z2):
    batch_size, n_dim = z1.shape

    # Compute Euclid Distance loss
    difference    = z1 - z2                                             # (BxB)   * z1 and z2 already applied soft max -> in the last axis, max dif will be 1 
    squared_norm  = tf.reduce_sum(tf.square(difference), axis=1)        # (B)
    distance      = tf.sqrt(squared_norm + 1e-8)                        # (B)     * + epsilon to avoid Nan in gradient
    mean_distance = tf.reduce_mean(distance)                            # () -> scalar
    tf.debugging.check_numerics(mean_distance.numpy(), 'Distance contains NaN values.')
    # print('distance: , ',mean_distance)

    # Compute Consine Similarity loss
    z = tf.concat((z1, z2), 0)

    sim_ij      = - self.cosine_sim(z[:batch_size], z[batch_size:])     # (B)  -> batch_size pair
    sim_ji      = - self.cosine_sim(z[batch_size:], z[:batch_size])     # (B)  -> batch_size pair
    sim_pos     = tf.concat((sim_ij,sim_ji), axis=0)                    # (2B) -> 2*batch_size positive pair
    numerator   = tf.math.exp(sim_pos / self.temperature)               # (2B) -> 2*batch_size positive pair
  
    sim_neg     = - self.cosine_sim(tf.expand_dims(z, 1), z)            # sim (Bx1xE, BxE) -> (2Bx2B)
    mask        = 1 - tf.eye(2*batch_size, dtype=tf.float32)            # (2Bx2B)
    sim_neg     = mask * tf.math.exp(sim_neg / self.temperature)        # (2Bx2B)
    denominator = tf.math.reduce_sum(sim_neg, axis=-1)                  # (2B) 
  
    mean_cosine_similarity = tf.reduce_mean(- tf.math.log((numerator + 1e-11) / (denominator + 1e-11)))       # () -> scalar
    tf.debugging.check_numerics(mean_cosine_similarity.numpy(), 'Cosine contains NaN values.')
    # print('similarity: , ',mean_cosine_similarity)

    # Compute total loss with associated rate
    total_loss = (1-self.rate)*mean_distance + self.rate*mean_cosine_similarity 
    tf.debugging.check_numerics(total_loss.numpy(), 'Total contains NaN values.')
    return total_loss

In [None]:
import tensorflow as tf
import os

best_model = tf.keras.models.load_model( filepath='/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/pruning_model.h5', 
                                      custom_objects={'Contrastive_Loss_2': Contrastive_Loss_2})

converter = tf.lite.TFLiteConverter.from_keras_model(best_model) # quantize to 8 bit
converter.optimizations = [tf.lite.Optimize.DEFAULT] # int 8
tflite_quant_prune_model = converter.convert()

# #save converted quantization model to tflite format
open("/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/tflite_pruned_model.tflite", "wb").write(tflite_quant_prune_model)


best_model_path = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/pruning_model.h5'
quant_model_path = '/content/drive/MyDrive/RSIC/NWPU-RESISC45/effb0_pruned/tflite_pruned_model.tflite'

print( os.path.getsize(best_model_path) / float(2**20))
print( os.path.getsize(quant_model_path) / float(2**20))