# MAGNITUDE BASED PRUNING FOR LSTM BASED DECODER
1. Prune Training
2. Prune Evaluation

In [None]:
'''
Mount Drive
To search for decoder.py in mounted drive, set path
Install tensorflow-model-optimization
'''
import sys
from google.colab import drive
drive.mount('/content/drive')
sys.path.append('####') # Add path
! pip install tensorflow-model-optimization

Mounted at /content/drive
Collecting tensorflow-model-optimization
[?25l  Downloading https://files.pythonhosted.org/packages/55/38/4fd48ea1bfcb0b6e36d949025200426fe9c3a8bfae029f0973d85518fa5a/tensorflow_model_optimization-0.5.0-py2.py3-none-any.whl (172kB)
[K     |████████████████████████████████| 174kB 12.5MB/s 
Installing collected packages: tensorflow-model-optimization
Successfully installed tensorflow-model-optimization-0.5.0


In [None]:
import numpy as np
import pickle as pk
from decoder import *
import tensorflow_model_optimization as tfmot
import matplotlib.pyplot as plt
from keras.models import load_model
import tensorflow as tf

import tempfile
import time
import warnings
warnings.filterwarnings('ignore')

In [None]:
'''
Define folderpath for further operations
'''
FOLDERPATH = '####' # Add path

## PRUNE TRAINING

In [None]:
'''
BUILD PRUNING MODEL FUNCTION
----------------------------
Run this cell to run the build_pruning_model() function.
This function takes in model, begin sparsity and final sparsity
as inputs and return a pruned model architecture. Re-train
the pruned architecture to recover model generalization.
'''
def build_pruning_model(model,bsp,fsp):
  pruning_params = {'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=bsp,
                                                                  final_sparsity=fsp,
                                                                  begin_step=0,
                                                                  end_step=5000)}
  model_for_pruning = tfmot.sparsity.keras.prune_low_magnitude(model, **pruning_params)
  model_for_pruning.compile(optimizer='adam',
                              loss='categorical_crossentropy')

  return model_for_pruning


'''
ALTERNATIVE LAYER WISE PRUNING
------------------------------
Not attaching pruning wrapper to Embedding layer
'''
def apply_pruning_to_dense(layer):
  pruning_params = {'pruning_schedule': tfmot.sparsity.keras.PolynomialDecay(initial_sparsity=0,
                                                                  final_sparsity=0.8,
                                                                  begin_step=0,
                                                                  end_step=5000)}
  if isinstance(layer, tf.keras.layers.Add):
    return layer
  return tfmot.sparsity.keras.prune_low_magnitude(layer, **pruning_params)

def build_pruning_model_layers(base_model):
  model_for_pruning = tf.keras.models.clone_model(
    base_model,
    clone_function=apply_pruning_to_dense,
    )
  model_for_pruning.compile(optimizer='adam',
                              loss='categorical_crossentropy')
  return model_for_pruning

In [None]:
'''
LOAD BASELINE MODEL
'''
baseline_model = load_model(FOLDERPATH+'/models-v2/baselines/pruned50-ResNet-baseline-LSTM.h5')

In [None]:
'''
BUILD PRUNED MODEL, HYPER-PARAMETERS AND CALLBACKS
--------------------------------------------------
Set begin sparsity, final sparsity and number of epochs for re-training.
'''
bsp, fsp = 0, 0.50
epochs = 5

pruned_model = build_pruning_model(baseline_model,bsp,fsp)
# pruned_model = build_pruning_model_layers(baseline_model)
print(pruned_model.summary())

logdir = tempfile.mkdtemp()
callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep(),
  tfmot.sparsity.keras.PruningSummaries(log_dir=logdir),
]

Instructions for updating:
Please use `layer.add_weight` method instead.
Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 34)]         0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 1000)]       0                                            
__________________________________________________________________________________________________
prune_low_magnitude_embedding ( (None, 34, 256)      3880450     input_2[0][0]                    
__________________________________________________________________________________________________
prune_low_magnitude_dropout (Pr (None, 1000)         1           input_1[0][0]                    
______________

In [None]:
'''
PRUNE TRAINING
--------------
Define featuresname, load training set and re-train model
featuresname: name of the encoder features list file
'''
featuresname = 'features_ResNetP50'

# load training dataset 
filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.trainImages.txt'
train = load_set(filename)
train_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', train)
train_features = load_photo_features(FOLDERPATH+'dataset/{:}.pkl'.format(featuresname), train)
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
maxlength = max_length(train_descriptions)

# train the model, run epochs manually and save after each epoch
# modelname = 'baseline-VGG16-pruned80-LSTM'
steps = len(train_descriptions)
for i in range(1,epochs+1):
	generator = data_generator(train_descriptions, train_features, tokenizer, maxlength, vocab_size)
	history = pruned_model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1,callbacks=callbacks)
	# if i%5 == 0:
	# 	pruned_model.save(FOLDERPATH+'models-v2/'+modelname+'_ep_' + str(i) + '.h5')

Instructions for updating:
Please use Model.fit, which supports generators.
Instructions for updating:
use `tf.profiler.experimental.stop` instead.


In [None]:
'''
STRIP FOR EXPORT MODEL AND SAVE
-------------------------------
Before saving, set the following in filename 
1. Which model used?
2. What's pruning bsp and fsp
3. What's decoder architecture?
'''
modelname = 'pruned50-ResNet-pruned50-LSTM'
striped_model = tfmot.sparsity.keras.strip_pruning(pruned_model)
print(striped_model.summary())
striped_model.save(FOLDERPATH+'models-v2/pruned/'+modelname+'.h5')

# '''
# SAVE HISTORY AS WELL
# '''
# pk.dump(history,open(FOLDERPATH+'model-history/'+modelname+'.pkl','wb'))

Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 34)]         0                                            
__________________________________________________________________________________________________
input_1 (InputLayer)            [(None, 1000)]       0                                            
__________________________________________________________________________________________________
embedding (Embedding)           (None, 34, 256)      1940224     input_2[0][0]                    
__________________________________________________________________________________________________
dropout (Dropout)               (None, 1000)         0           input_1[0][0]                    
_______________________________________________________________________________________

## PRUNE EVALUATION


In [None]:
'''
EVALUATE THE PRUNED MODEL
-------------------------
Provide model and features list file name. 
The function prints BLEU scores and time taken to process.
'''

def evaluate_call(model,feat_fname):

  # load training dataset (6K)
  filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.trainImages.txt'
  train = load_set(filename)
  train_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', train)
  tokenizer = create_tokenizer(train_descriptions)
  vocab_size = len(tokenizer.word_index) + 1
  maxlength = max_length(train_descriptions)

  # load test set
  filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.testImages.txt'
  test = load_set(filename)
  test_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', test)
  test_features = load_photo_features(FOLDERPATH+'dataset/{:}.pkl'.format(feat_fname), test)

  # evaluate model
  start = time.time()
  evaluate_model(model, test_descriptions, test_features, tokenizer, maxlength)
  print("\nTime taken: ", (time.time()-start)/60)

### VGG16 Results

##### BASELINE VGG16 AND 50% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-VGG16-pruned50-LSTM.h5'
featuresname = 'features_VGG'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.534086
BLEU-2: 0.281099
BLEU-3: 0.181940
BLEU-4: 0.076307

Time taken:  5.583005964756012


##### BASELINE VGG16 AND 80% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-VGG16-pruned80-LSTM.h5'
featuresname = 'features_VGG'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.599606
BLEU-2: 0.299564
BLEU-3: 0.169109
BLEU-4: 0.070046

Time taken:  4.449543042977651


##### BASELINE VGG16 AND 90% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-VGG16-pruned90-LSTM.h5'
featuresname = 'features_VGG'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.521724
BLEU-2: 0.238366
BLEU-3: 0.396328
BLEU-4: 0.450047

Time taken:  3.842176659901937


##### QUANTIZED VGG16 AND 50% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-VGG16-pruned50-LSTM.h5'
featuresname = 'features_VGGQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.502071
BLEU-2: 0.260290
BLEU-3: 0.177183
BLEU-4: 0.078390

Time taken:  6.2438020745913185


##### QUANTIZED VGG16 AND 80% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-VGG16-pruned80-LSTM.h5'
featuresname = 'features_VGGQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.518027
BLEU-2: 0.254723
BLEU-3: 0.132197
BLEU-4: 0.049560

Time taken:  4.9500048716863


##### QUANTIZED VGG16 AND 90% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-VGG16-pruned90-LSTM.h5'
featuresname = 'features_VGGQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.557672
BLEU-2: 0.254117
BLEU-3: 0.146251
BLEU-4: 0.060697

Time taken:  4.359605813026429


##### PRUNED50 VGG16 AND 50% PRUNED LSTM

In [None]:
modelname = 'pruned50-VGG16-pruned50-LSTM.h5'
featuresname = 'features_VGGP50'
model_eval = load_model(FOLDERPATH+'models-v2/pruned/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.418220
BLEU-2: 0.169478
BLEU-3: 0.107443
BLEU-4: 0.043284

Time taken:  6.213016577561697


### RESNET Results

##### BASELINE RESNET AND 50% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-ResNet-pruned50-LSTM.h5'
featuresname = 'features_ResNet'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.514461
BLEU-2: 0.267497
BLEU-3: 0.186613
BLEU-4: 0.082742

Time taken:  5.7076051791508995


##### BASELINE RESNET AND *80*% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-ResNet-pruned80-LSTM.h5'
featuresname = 'features_ResNet'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.509572
BLEU-2: 0.244919
BLEU-3: 0.145254
BLEU-4: 0.062147

Time taken:  5.000856161117554


##### BASELINE RESNET AND *90*% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-ResNet-pruned90-LSTM.h5'
featuresname = 'features_ResNet'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.540337
BLEU-2: 0.262965
BLEU-3: 0.133826
BLEU-4: 0.049221

Time taken:  5.124466558297475


##### QUANTIZED RESNET AND *50*% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-ResNet-pruned50-LSTM.h5'
featuresname = 'features_ResNetQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.524480
BLEU-2: 0.277066
BLEU-3: 0.191862
BLEU-4: 0.091881

Time taken:  6.653377254803975


##### QUANTIZED RESNET AND *80*% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-ResNet-pruned80-LSTM.h5'
featuresname = 'features_ResNetQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.529209
BLEU-2: 0.259698
BLEU-3: 0.166630
BLEU-4: 0.077764

Time taken:  5.359142843882243


##### QUANTIZED RESNET AND *90*% PRUNED LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-ResNet-pruned90-LSTM.h5'
featuresname = 'features_ResNetQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.427553
BLEU-2: 0.176371
BLEU-3: 0.110494
BLEU-4: 0.040035

Time taken:  5.916788101196289


##### PRUNED50 RESNET AND 50% PRUNED LSTM

In [None]:
modelname = 'pruned50-ResNet-pruned50-LSTM.h5'
featuresname = 'features_ResNetP50'
model_eval = load_model(FOLDERPATH+'models-v2/pruned/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.418220
BLEU-2: 0.169478
BLEU-3: 0.107443
BLEU-4: 0.043284

Time taken:  4.798878228664398


### ROUGH