# DECODER TRAINING FOR CNN-LSTM BASED IMAGE CAPTIONING


In [None]:
'''
Mount Drive
To search for decoder.py in mounted drive, set path
'''
import sys
from google.colab import drive
drive.mount('/content/drive')
sys.path.append('####') # Add path

Mounted at /content/drive


In [None]:
import tensorflow as tf
import matplotlib.pyplot as plt
from decoder import *

import time

In [None]:
'''
Define folderpath for further operations
'''
FOLDERPATH = '####' # Add path

# TRAINING

In [None]:
'''
DECODER TRAINING
----------------
featuresname: which cnn encoder feature list
'''

featuresname = 'features_ResNetP50Q'

filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.trainImages.txt'
train = load_set(filename)
train_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', train)
train_features = load_photo_features(FOLDERPATH+'dataset/{:}.pkl'.format(featuresname), train)
tokenizer = create_tokenizer(train_descriptions)
vocab_size = len(tokenizer.word_index) + 1
maxlength = max_length(train_descriptions)

# define the model
inputshape = list(train_features.values())[0].shape[1]
model = define_model(vocab_size, maxlength, inputshape)
print(model.summary())
epochs = 10
steps = len(train_descriptions)

# Resume training
# model = load_model(FOLDERPATH+'models-v2/baselines/pruned50-VGG16-baseline-LSTM_ep_10.h5')

# train loop
modelname = 'pruned50quant-ResNet-baseline-LSTM'
for i in range(1,epochs+1):
	generator = data_generator(train_descriptions, train_features, tokenizer, maxlength, vocab_size)
	history = model.fit_generator(generator, epochs=1, steps_per_epoch=steps, verbose=1)
	if i%5 == 0:
		model.save(FOLDERPATH+'models-v2/baselines/'+modelname+'_ep_' + str(i) + '.h5')

# '''
# SAVE HISTORY
# '''
# from pickle import dump
# dump(history,open('../models-history/'+modelname+'.pkl','wb'))

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 34)]         0                                            
__________________________________________________________________________________________________
input_3 (InputLayer)            [(None, 100)]        0                                            
__________________________________________________________________________________________________
embedding_1 (Embedding)         (None, 34, 256)      1940224     input_4[0][0]                    
__________________________________________________________________________________________________
dropout_2 (Dropout)             (None, 100)          0           input_3[0][0]                    
_______________________________________________________________________________________

# EVALUATION

In [None]:
'''
EVALUATE THE TRAINED MODEL
-------------------------
Provide model and features list file name. 
The function prints BLEU scores and time taken to process.
'''

def evaluate_call(model,feat_fname):

  # load training dataset (6K)
  filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.trainImages.txt'
  train = load_set(filename)
  train_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', train)
  tokenizer = create_tokenizer(train_descriptions)
  vocab_size = len(tokenizer.word_index) + 1
  maxlength = max_length(train_descriptions)

  # load test set
  filename = FOLDERPATH+'dataset/Flickr8k_text/Flickr_8k.testImages.txt'
  test = load_set(filename)
  test_descriptions = load_clean_descriptions(FOLDERPATH+'dataset/descriptions.txt', test)
  test_features = load_photo_features(FOLDERPATH+'dataset/{:}.pkl'.format(feat_fname), test)

  # evaluate model
  start = time.time()
  evaluate_model(model, test_descriptions, test_features, tokenizer, maxlength)
  print("\nTime taken: ", (time.time()-start)/60)

#### VGG16 RESULTS


##### BASELINE VGG16 AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-VGG16-baseline-LSTM.h5'
featuresname = 'features_VGG'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.527181
BLEU-2: 0.280733
BLEU-3: 0.188469
BLEU-4: 0.083586

Time taken:  5.68393585284551


##### QUANTIZED VGG16 AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-VGG16-baseline-LSTM.h5'
featuresname = 'features_VGGQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.527145
BLEU-2: 0.277764
BLEU-3: 0.182242
BLEU-4: 0.078872

Time taken:  5.333242078622182


##### PRUNED50 VGG16 AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'pruned50-VGG16-baseline-LSTM.h5'
featuresname = 'features_VGGP50'
model_eval = load_model(FOLDERPATH+'models-v2/baselines/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.418220
BLEU-2: 0.169478
BLEU-3: 0.107443
BLEU-4: 0.043284

Time taken:  5.790212110678355


##### PRUNED50QUANT VGG16 AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'pruned50quant-VGG16-baseline-LSTM.h5'
featuresname = 'features_VGGP50Q'
model_eval = load_model(FOLDERPATH+'models-v2/baselines/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.504851
BLEU-2: 0.246832
BLEU-3: 0.152256
BLEU-4: 0.069298

Time taken:  5.908193639914194


#### RESNET RESULTS

##### BASELINE RESNET AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'baseline-ResNet-baseline-LSTM.h5'
featuresname = 'features_ResNet'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.510045
BLEU-2: 0.265207
BLEU-3: 0.181706
BLEU-4: 0.083025

Time taken:  5.910696216424307


##### QUANTIZED RESNET AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'quantized-ResNet-baseline-LSTM.h5'
featuresname = 'features_ResNetQ'
model_eval = load_model(FOLDERPATH+'models-v2/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.546315
BLEU-2: 0.299104
BLEU-3: 0.210328
BLEU-4: 0.100582

Time taken:  5.879957898457845


##### PRUNED50 RESNET AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'pruned50-ResNet-baseline-LSTM.h5'
featuresname = 'features_ResNetP50'
model_eval = load_model(FOLDERPATH+'models-v2/baselines/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.418220
BLEU-2: 0.169478
BLEU-3: 0.107443
BLEU-4: 0.043284

Time taken:  6.262433421611786


##### PRUNED50QUANT RESNET AND BASELINE LSTM

In [None]:
'''
EVALUATE MODEL
--------------
Load model, call evaluate
'''
modelname = 'pruned50quant-ResNet-baseline-LSTM.h5'
featuresname = 'features_ResNetP50Q'
model_eval = load_model(FOLDERPATH+'models-v2/baselines/'+modelname)
model_eval.compile(optimizer='adam',loss='categorical_crossentropy')
evaluate_call(model_eval,featuresname)

BLEU-1: 0.501118
BLEU-2: 0.240574
BLEU-3: 0.145263
BLEU-4: 0.065242

Time taken:  6.214895681540171


#### ROUGH