In [None]:
'''
Mount Drive
To search for decoder.py in mounted drive, set path
Install tensorflow-model-optimization
'''
import sys
from google.colab import drive
drive.mount('/content/drive')
sys.path.append('####') # Add path
! pip install tensorflow-model-optimization

Mounted at /content/drive
Collecting tensorflow-model-optimization
[?25l  Downloading https://files.pythonhosted.org/packages/55/38/4fd48ea1bfcb0b6e36d949025200426fe9c3a8bfae029f0973d85518fa5a/tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl (172kB)
[K     |████████████████████████████████| 174kB 5.9MB/s 
Installing collected packages: tensorflow-model-optimization
Successfully installed tensorflow-model-optimization-0.5.0


In [None]:
import numpy as np
import pickle as pk
from decoder import *
import tensorflow_model_optimization as tfmot
import matplotlib.pyplot as plt
from keras.models import load_model
import tensorflow as tf
import time

In [None]:
'''
Define folderpath for further operations
'''
FOLDERPATH = '####' # Add path

In [None]:
'''
GLOBAL QUANTIZATION FUNCTIONS
-----------------------------
1. Save quant model
2. Get model size
'''
def save_quant(model,inpshape,modeldir,ty):
  run_model = tf.function(lambda x: model(x))
  concrete_func = run_model.get_concrete_function([tf.TensorSpec([1,inpshape], model.inputs[0].dtype),
                                                  tf.TensorSpec([1,34], model.inputs[1].dtype)])
  model.save(FOLDERPATH+'models-v2/'+ty+'/'+modeldir, save_format="tf", signatures=concrete_func)

def get_quant_model_size(model):
  import tempfile
  import os

  _, quant_file = tempfile.mkstemp('.tflite')

  with open(quant_file, 'wb') as f:
    f.write(model)

  print("Quantized model in Mb:", os.path.getsize(quant_file) / float(2**20))

### QUANTIZATION AWARE TRAINING

In [None]:
'''
FUNCTION QUANTIZATION AWARE TRAINING
------------------------------------
1. Initialize a model
2. Load weights for better accuracy (optional but recommended)
3. Annotate layers to quantize
4. Compile model
'''
def get_quant_model(featuresname,lw=True,modelname=None):

	# load training dataset 
	filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.trainImages.txt'
	train = load_set(filename)
	train_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', train)
	train_features = load_photo_features(FOLDERPATH+'dataset/{:}.pkl'.format(featuresname), train)
	tokenizer = create_tokenizer(train_descriptions)
	vocab_size = len(tokenizer.word_index) + 1
	maxlength = max_length(train_descriptions)
	train_data = (train_descriptions, train_features, tokenizer, maxlength, vocab_size)

	# LOAD MODEL FOR BETTER ACCURACY
	inputshape = list(train_features.values())[0].shape[1]
	model = define_model(vocab_size,maxlength,inputshape)
	if lw:
		model.load_weights(FOLDERPATH+'models-v2/baselines/'+modelname)

	# INITIALIZE QUANTIZATION
	def apply_quantization_to_dense(layer):
		if isinstance(layer, tf.keras.layers.Dense) or isinstance(layer,tf.keras.layers.Dropout):
			return tfmot.quantization.keras.quantize_annotate_layer(layer)
		return layer

	annotated_model = tf.keras.models.clone_model(model,clone_function=apply_quantization_to_dense)
	quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
	quant_aware_model.summary()

	quant_aware_model.compile(optimizer='adam',loss='categorical_crossentropy')

	return quant_aware_model, train_data

In [None]:
'''
QUANTIZATION AWARE TRAINING
'''
featuresname = 'features_ResNetP50Q'
baselinemodelname = 'pruned50quant-ResNet-baseline-LSTM.h5'
lw = True
if not lw:
	baselinemodelname = None
epochs = 10
quant_aware_model, train_data = get_quant_model(featuresname,lw,baselinemodelname)
steps = len(train_data[0])
for i in range(1,epochs+1):
	generator = data_generator(train_data[0],train_data[1],train_data[2],train_data[3],train_data[4])
	quant_aware_model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_3 (InputLayer)            [(None, 100)]        0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            [(None, 34)]         0                                            
__________________________________________________________________________________________________
quantize_layer_1 (QuantizeLayer (None, 100)          3           input_3[0][0]                    
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 34, 256)      1940224     input_4[0][0]                    
_______________________________________________________________________________________

In [None]:
'''
SAVE MODEL
'''
modelname = 'pruned50quant-ResNet-quantized-LSTM'
inputshape = list(train_data[1].values())[0].shape[1]
save_quant(quant_aware_model,inputshape,modelname,ty='quant-aware-training')

INFO:tensorflow:Assets written to: /content/drive/MyDrive/Fall20/10617_IDL/Project/models-v2/quant-aware-training/pruned50quant-ResNet-quantized-LSTM/assets


INFO:tensorflow:Assets written to: /content/drive/MyDrive/Fall20/10617_IDL/Project/models-v2/quant-aware-training/pruned50quant-ResNet-quantized-LSTM/assets


### EVALUATION FOR TENSORFLOW LITE MODELS

In [None]:
# generate a description for an image
def generate_desc_interpreter(interpreter, tokenizer, photo, maxlength, inp_ind, out_ind):
    in_text = 'startseq'
    for i in range(maxlength):
      sequence = tokenizer.texts_to_sequences([in_text])[0]
      sequence = pad_sequences([sequence], maxlen=maxlength)
      photo = photo.astype(np.float32)
      sequence = sequence.astype(np.float32)
      interpreter.set_tensor(inp_ind[0],photo)
      interpreter.set_tensor(inp_ind[1],sequence)
      interpreter.invoke()
      output = interpreter.tensor(out_ind)
      yhat = output()[0]
      yhat = argmax(yhat)
      word = word_for_id(yhat, tokenizer)
      if word is None:
        break
      in_text += ' ' + word
      if word == 'endseq':
        break
    return in_text

def evaluate_interpreter(interpreter, descriptions, photos, tokenizer, maxlength):
    actual, predicted = list(), list()
    input_index = []
    if interpreter.get_input_details()[0]['shape'][1] == 34:
      input_index.append(interpreter.get_input_details()[1]["index"])
      input_index.append(interpreter.get_input_details()[0]["index"])
    else:
      input_index.append(interpreter.get_input_details()[0]["index"])
      input_index.append(interpreter.get_input_details()[1]["index"])
    output_index = interpreter.get_output_details()[0]["index"]
    # input_index.append(interpreter.get_input_details()[0]["index"])
    # input_index.append(interpreter.get_input_details()[1]["index"])
    # output_index = interpreter.get_output_details()[0]["index"]
    for key, desc_list in descriptions.items():
      yhat = generate_desc_interpreter(interpreter, tokenizer, photos[key], maxlength, input_index, output_index)
      references = [d.split() for d in desc_list]
      actual.append(references)
      predicted.append(yhat.split())
	
    print('BLEU-1: %f' % corpus_bleu(actual, predicted, weights=(1.0, 0, 0, 0)))
    print('BLEU-2: %f' % corpus_bleu(actual, predicted, weights=(0.5, 0.5, 0, 0)))
    print('BLEU-3: %f' % corpus_bleu(actual, predicted, weights=(0.3, 0.3, 0.3, 0)))
    print('BLEU-4: %f' % corpus_bleu(actual, predicted, weights=(0.25, 0.25, 0.25, 0.25)))

def evaluate_call_interpreter(interpreter,feat_fname):

    # load training dataset (6K)
    filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.trainImages.txt'
    train = load_set(filename)
    train_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', train)
    tokenizer = create_tokenizer(train_descriptions)
    vocab_size = len(tokenizer.word_index) + 1
    maxlength = max_length(train_descriptions)

    # load test set
    filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.testImages.txt'
    test = load_set(filename)
    test_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', test)
    test_features = load_photo_features(FOLDERPATH+'dataset/{:}.pkl'.format(feat_fname), test)

    # evaluate model
    start = time.time()
    evaluate_interpreter(interpreter, test_descriptions, test_features, tokenizer, maxlength)
    print("\nTime taken: ", (time.time()-start)/60)

#### VGG16 RESULTS

##### BASELINE VGG QUANTIZED LSTM

In [None]:
modelname = 'baseline-VGG16-quantized-LSTM'
featuresname = 'features_VGG'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.521598
BLEU-2: 0.280835
BLEU-3: 0.186640
BLEU-4: 0.082050

Time taken:  1.724222469329834
Quantized model in Mb: 21.09545135498047


##### QUANTIZED VGG QUANTIZED LSTM

In [None]:
modelname = 'quantized-VGG16-quantized-LSTM'
featuresname = 'features_VGGQ'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.568242
BLEU-2: 0.304966
BLEU-3: 0.199659
BLEU-4: 0.084858

Time taken:  1.6369754473368328
Quantized model in Mb: 21.09540557861328


##### PRUNED50 VGG QUANTIZED LSTM

In [None]:
modelname = 'pruned50-VGG16-quantized-LSTM'
featuresname = 'features_VGGP50'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.523818
BLEU-2: 0.223779
BLEU-3: 0.137816
BLEU-4: 0.054964

Time taken:  1.43951096534729
Quantized model in Mb: 17.345420837402344


##### PRUNED50QUANT VGG QUANTIZED LSTM

In [None]:
modelname = 'pruned50quant-VGG16-quantized-LSTM'
featuresname = 'features_VGGP50Q'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.459181
BLEU-2: 0.187004
BLEU-3: 0.110588
BLEU-4: 0.039175

Time taken:  1.7875096837679545
Quantized model in Mb: 17.193092346191406


#### RESNET RESULTS

##### BASELINE RESNET QUANTIZED LSTM

In [None]:
modelname = 'baseline-ResNet-quantized-LSTM'
featuresname = 'features_ResNet'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.503329
BLEU-2: 0.262821
BLEU-3: 0.178434
BLEU-4: 0.079824

Time taken:  1.653984014193217
Quantized model in Mb: 19.095436096191406


##### QUANTIZED RESNET QUANTIZED LSTM


In [None]:
modelname = 'quantized-ResNet-quantized-LSTM'
featuresname = 'features_ResNetQ'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.483891
BLEU-2: 0.248940
BLEU-3: 0.167475
BLEU-4: 0.072523

Time taken:  1.7938211917877198
Quantized model in Mb: 21.095436096191406


##### PRUNED50 RESNET QUANTIZED LSTM

In [None]:
modelname = 'pruned50-ResNet-quantized-LSTM'
featuresname = 'features_ResNetP50'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.514513
BLEU-2: 0.252351
BLEU-3: 0.149940
BLEU-4: 0.062642

Time taken:  1.020120088259379
Quantized model in Mb: 18.07196807861328


##### PRUNED50QUANT RESNET QUANTIZED LSTM

In [None]:
modelname = 'pruned50quant-ResNet-quantized-LSTM'
featuresname = 'features_ResNetP50Q'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/quant-aware-training/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.442108
BLEU-2: 0.165824
BLEU-3: 0.091904
BLEU-4: 0.031607

Time taken:  1.329277722040812
Quantized model in Mb: 17.193092346191406


### POST TRAINING QUANTIZATION

In [None]:
'''
QUANTIZE POST TRAINING
'''
baselinemodelname = 'pruned50-VGG16-baseline-LSTM'
baseline_model = load_model(FOLDERPATH+'models-v2/baselines/'+baselinemodelname+'.h5')
inputshape = baseline_model.input[0].shape[1]
save_quant(baseline_model,inputshape,modeldir='pruned50-VGG16-quantized-LSTM',ty='post-training-quant')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: /content/drive/MyDrive/Fall20/10617_IDL/Project/models-v2/post-training-quant/pruned50-VGG16-quantized-LSTM/assets


##### BASELINE VGG QUANTIZED LSTM (post training)

In [None]:
modelname = 'baseline-VGG16-quantized-LSTM'
featuresname = 'features_VGG'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/post-training-quant/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

BLEU-1: 0.230242
BLEU-2: 0.077129
BLEU-3: 0.043668
BLEU-4: 0.018815

Time taken:  0.3262763977050781
Quantized model in Mb: 21.09308624267578


##### QUANTIZED VGG QUANTIZED LSTM (post training)

In [None]:
modelname = 'quantized-VGG16-quantized-LSTM'
featuresname = 'features_VGGQ'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/post-training-quant/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


BLEU-1: 0.050313
BLEU-2: 0.050313
BLEU-3: 0.050313
BLEU-4: 0.050313

Time taken:  0.11966217358907064
Quantized model in Mb: 21.09308624267578


##### BASELINE RESNET QUANTIZED LSTM (post training)

In [None]:
modelname = 'baseline-ResNet-quantized-LSTM'
featuresname = 'features_ResNet'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/post-training-quant/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

Corpus/Sentence contains 0 counts of 3-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


BLEU-1: 0.051701
BLEU-2: 0.002298
BLEU-3: 0.007991
BLEU-4: 0.010913

Time taken:  0.1322492003440857
Quantized model in Mb: 19.093082427978516


##### QUANTIZED RESNET QUANTIZED LSTM (post training)

In [None]:
modelname = 'quantized-ResNet-quantized-LSTM'
featuresname = 'features_ResNetQ'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/post-training-quant/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


BLEU-1: 0.050313
BLEU-2: 0.050313
BLEU-3: 0.050313
BLEU-4: 0.050313

Time taken:  0.12165356874465942
Quantized model in Mb: 21.09308624267578


##### PRUNED50 VGG QUANTIZED LSTM (post training)

In [None]:
modelname = 'pruned50-VGG16-quantized-LSTM'
featuresname = 'features_VGGP50'
converter = tf.lite.TFLiteConverter.from_saved_model(FOLDERPATH+'models-v2/post-training-quant/'+modelname)
tflite_model = converter.convert()
interpreter = tf.lite.Interpreter(model_content=tflite_model)
interpreter.allocate_tensors()
evaluate_call_interpreter(interpreter,featuresname)
get_quant_model_size(tflite_model)

OSError: ignored